blob: ad2f2f2d3269c966ccf09b24ba7a7ea9c9c15ae3 [file] [log] [blame]
Eli Benderskybf05df22013-04-20 05:44:01 -07001/*--------------------------------------------------------------------
2 * Licensed to PSF under a Contributor Agreement.
3 * See http://www.python.org/psf/license for licensing details.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
Eli Benderskybf05df22013-04-20 05:44:01 -07005 * _elementtree - C accelerator for xml.etree.ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00006 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
7 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00008 *
9 * info@pythonware.com
10 * http://www.pythonware.com
Eli Benderskybf05df22013-04-20 05:44:01 -070011 *--------------------------------------------------------------------
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000012 */
13
14#include "Python.h"
Eli Benderskyebf37a22012-04-03 22:02:37 +030015#include "structmember.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000016
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000017/* -------------------------------------------------------------------- */
18/* configuration */
19
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000020/* An element can hold this many children without extra memory
21 allocations. */
22#define STATIC_CHILDREN 4
23
24/* For best performance, chose a value so that 80-90% of all nodes
25 have no more than the given number of children. Set this to zero
26 to minimize the size of the element structure itself (this only
27 helps if you have lots of leaf nodes with attributes). */
28
29/* Also note that pymalloc always allocates blocks in multiples of
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010030 eight bytes. For the current C version of ElementTree, this means
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000031 that the number of children should be an even number, at least on
32 32-bit platforms. */
33
34/* -------------------------------------------------------------------- */
35
36#if 0
37static int memory = 0;
38#define ALLOC(size, comment)\
39do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
40#define RELEASE(size, comment)\
41do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
42#else
43#define ALLOC(size, comment)
44#define RELEASE(size, comment)
45#endif
46
47/* compiler tweaks */
48#if defined(_MSC_VER)
49#define LOCAL(type) static __inline type __fastcall
50#else
51#define LOCAL(type) static type
52#endif
53
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000054/* macros used to store 'join' flags in string object pointers. note
55 that all use of text and tail as object pointers must be wrapped in
56 JOIN_OBJ. see comments in the ElementObject definition for more
57 info. */
58#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
59#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
Antoine Pitrouca8aa4a2012-09-20 20:56:47 +020060#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~(Py_uintptr_t)1))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000061
Eli Benderskydd3661e2013-09-13 06:24:25 -070062/* Py_CLEAR for a PyObject* that uses a join flag. Pass the pointer by
63 * reference since this function sets it to NULL.
64*/
doko@ubuntu.com0648bf72013-09-18 12:12:28 +020065static void _clear_joined_ptr(PyObject **p)
Eli Benderskydd3661e2013-09-13 06:24:25 -070066{
67 if (*p) {
68 PyObject *tmp = JOIN_OBJ(*p);
69 *p = NULL;
70 Py_DECREF(tmp);
71 }
72}
73
Ronald Oussoren138d0802013-07-19 11:11:25 +020074/* Types defined by this extension */
75static PyTypeObject Element_Type;
76static PyTypeObject ElementIter_Type;
77static PyTypeObject TreeBuilder_Type;
78static PyTypeObject XMLParser_Type;
79
80
Eli Bendersky532d03e2013-08-10 08:00:39 -070081/* Per-module state; PEP 3121 */
82typedef struct {
83 PyObject *parseerror_obj;
84 PyObject *deepcopy_obj;
85 PyObject *elementpath_obj;
86} elementtreestate;
87
88static struct PyModuleDef elementtreemodule;
89
90/* Given a module object (assumed to be _elementtree), get its per-module
91 * state.
92 */
93#define ET_STATE(mod) ((elementtreestate *) PyModule_GetState(mod))
94
95/* Find the module instance imported in the currently running sub-interpreter
96 * and get its state.
97 */
98#define ET_STATE_GLOBAL \
99 ((elementtreestate *) PyModule_GetState(PyState_FindModule(&elementtreemodule)))
100
101static int
102elementtree_clear(PyObject *m)
103{
104 elementtreestate *st = ET_STATE(m);
105 Py_CLEAR(st->parseerror_obj);
106 Py_CLEAR(st->deepcopy_obj);
107 Py_CLEAR(st->elementpath_obj);
108 return 0;
109}
110
111static int
112elementtree_traverse(PyObject *m, visitproc visit, void *arg)
113{
114 elementtreestate *st = ET_STATE(m);
115 Py_VISIT(st->parseerror_obj);
116 Py_VISIT(st->deepcopy_obj);
117 Py_VISIT(st->elementpath_obj);
118 return 0;
119}
120
121static void
122elementtree_free(void *m)
123{
124 elementtree_clear((PyObject *)m);
125}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000126
127/* helpers */
128
129LOCAL(PyObject*)
130deepcopy(PyObject* object, PyObject* memo)
131{
132 /* do a deep copy of the given object */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000133 PyObject* args;
134 PyObject* result;
Eli Bendersky532d03e2013-08-10 08:00:39 -0700135 elementtreestate *st = ET_STATE_GLOBAL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000136
Eli Bendersky532d03e2013-08-10 08:00:39 -0700137 if (!st->deepcopy_obj) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000138 PyErr_SetString(
139 PyExc_RuntimeError,
140 "deepcopy helper not found"
141 );
142 return NULL;
143 }
144
Antoine Pitrouc1948842012-10-01 23:40:37 +0200145 args = PyTuple_Pack(2, object, memo);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000146 if (!args)
147 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -0700148 result = PyObject_CallObject(st->deepcopy_obj, args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000149 Py_DECREF(args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000150 return result;
151}
152
153LOCAL(PyObject*)
154list_join(PyObject* list)
155{
156 /* join list elements (destroying the list in the process) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000157 PyObject* joiner;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000158 PyObject* result;
159
Antoine Pitrouc1948842012-10-01 23:40:37 +0200160 joiner = PyUnicode_FromStringAndSize("", 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000161 if (!joiner)
162 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200163 result = PyUnicode_Join(joiner, list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000164 Py_DECREF(joiner);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200165 if (result)
166 Py_DECREF(list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000167 return result;
168}
169
Eli Bendersky48d358b2012-05-30 17:57:50 +0300170/* Is the given object an empty dictionary?
171*/
172static int
173is_empty_dict(PyObject *obj)
174{
175 return PyDict_CheckExact(obj) && PyDict_Size(obj) == 0;
176}
177
178
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000179/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200180/* the Element type */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000181
182typedef struct {
183
184 /* attributes (a dictionary object), or None if no attributes */
185 PyObject* attrib;
186
187 /* child elements */
188 int length; /* actual number of items */
189 int allocated; /* allocated items */
190
191 /* this either points to _children or to a malloced buffer */
192 PyObject* *children;
193
194 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100195
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000196} ElementObjectExtra;
197
198typedef struct {
199 PyObject_HEAD
200
201 /* element tag (a string). */
202 PyObject* tag;
203
204 /* text before first child. note that this is a tagged pointer;
205 use JOIN_OBJ to get the object pointer. the join flag is used
206 to distinguish lists created by the tree builder from lists
207 assigned to the attribute by application code; the former
208 should be joined before being returned to the user, the latter
209 should be left intact. */
210 PyObject* text;
211
212 /* text after this element, in parent. note that this is a tagged
213 pointer; use JOIN_OBJ to get the object pointer. */
214 PyObject* tail;
215
216 ElementObjectExtra* extra;
217
Eli Benderskyebf37a22012-04-03 22:02:37 +0300218 PyObject *weakreflist; /* For tp_weaklistoffset */
219
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000220} ElementObject;
221
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000222
Christian Heimes90aa7642007-12-19 02:45:37 +0000223#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000224
225/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200226/* Element constructors and destructor */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000227
228LOCAL(int)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200229create_extra(ElementObject* self, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000230{
231 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
Victor Stinner81aac732013-07-12 02:03:34 +0200232 if (!self->extra) {
233 PyErr_NoMemory();
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000234 return -1;
Victor Stinner81aac732013-07-12 02:03:34 +0200235 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000236
237 if (!attrib)
238 attrib = Py_None;
239
240 Py_INCREF(attrib);
241 self->extra->attrib = attrib;
242
243 self->extra->length = 0;
244 self->extra->allocated = STATIC_CHILDREN;
245 self->extra->children = self->extra->_children;
246
247 return 0;
248}
249
250LOCAL(void)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200251dealloc_extra(ElementObject* self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000252{
Eli Bendersky08b85292012-04-04 15:55:07 +0300253 ElementObjectExtra *myextra;
254 int i;
255
Eli Benderskyebf37a22012-04-03 22:02:37 +0300256 if (!self->extra)
257 return;
258
259 /* Avoid DECREFs calling into this code again (cycles, etc.)
260 */
Eli Bendersky08b85292012-04-04 15:55:07 +0300261 myextra = self->extra;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300262 self->extra = NULL;
263
264 Py_DECREF(myextra->attrib);
265
Eli Benderskyebf37a22012-04-03 22:02:37 +0300266 for (i = 0; i < myextra->length; i++)
267 Py_DECREF(myextra->children[i]);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000268
Eli Benderskyebf37a22012-04-03 22:02:37 +0300269 if (myextra->children != myextra->_children)
270 PyObject_Free(myextra->children);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000271
Eli Benderskyebf37a22012-04-03 22:02:37 +0300272 PyObject_Free(myextra);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000273}
274
Eli Bendersky092af1f2012-03-04 07:14:03 +0200275/* Convenience internal function to create new Element objects with the given
276 * tag and attributes.
277*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000278LOCAL(PyObject*)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200279create_new_element(PyObject* tag, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000280{
281 ElementObject* self;
282
Eli Bendersky0192ba32012-03-30 16:38:33 +0300283 self = PyObject_GC_New(ElementObject, &Element_Type);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000284 if (self == NULL)
285 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000286 self->extra = NULL;
287
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000288 Py_INCREF(tag);
289 self->tag = tag;
290
291 Py_INCREF(Py_None);
292 self->text = Py_None;
293
294 Py_INCREF(Py_None);
295 self->tail = Py_None;
296
Eli Benderskyebf37a22012-04-03 22:02:37 +0300297 self->weakreflist = NULL;
298
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200299 ALLOC(sizeof(ElementObject), "create element");
300 PyObject_GC_Track(self);
301
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200302 if (attrib != Py_None && !is_empty_dict(attrib)) {
303 if (create_extra(self, attrib) < 0) {
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200304 Py_DECREF(self);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200305 return NULL;
306 }
307 }
308
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000309 return (PyObject*) self;
310}
311
Eli Bendersky092af1f2012-03-04 07:14:03 +0200312static PyObject *
313element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
314{
315 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
316 if (e != NULL) {
317 Py_INCREF(Py_None);
318 e->tag = Py_None;
319
320 Py_INCREF(Py_None);
321 e->text = Py_None;
322
323 Py_INCREF(Py_None);
324 e->tail = Py_None;
325
326 e->extra = NULL;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300327 e->weakreflist = NULL;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200328 }
329 return (PyObject *)e;
330}
331
Eli Bendersky737b1732012-05-29 06:02:56 +0300332/* Helper function for extracting the attrib dictionary from a keywords dict.
333 * This is required by some constructors/functions in this module that can
Eli Bendersky45839902013-01-13 05:14:47 -0800334 * either accept attrib as a keyword argument or all attributes splashed
Eli Bendersky737b1732012-05-29 06:02:56 +0300335 * directly into *kwds.
Eli Benderskyd4cb4b72013-04-22 05:25:25 -0700336 *
337 * Return a dictionary with the content of kwds merged into the content of
338 * attrib. If there is no attrib keyword, return a copy of kwds.
Eli Bendersky737b1732012-05-29 06:02:56 +0300339 */
340static PyObject*
341get_attrib_from_keywords(PyObject *kwds)
342{
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700343 PyObject *attrib_str = PyUnicode_FromString("attrib");
344 PyObject *attrib = PyDict_GetItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300345
346 if (attrib) {
347 /* If attrib was found in kwds, copy its value and remove it from
348 * kwds
349 */
350 if (!PyDict_Check(attrib)) {
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700351 Py_DECREF(attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300352 PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
353 Py_TYPE(attrib)->tp_name);
354 return NULL;
355 }
356 attrib = PyDict_Copy(attrib);
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700357 PyDict_DelItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300358 } else {
359 attrib = PyDict_New();
360 }
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700361
362 Py_DECREF(attrib_str);
363
364 /* attrib can be NULL if PyDict_New failed */
365 if (attrib)
Christian Heimes7ed42942013-07-20 15:12:09 +0200366 if (PyDict_Update(attrib, kwds) < 0)
367 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300368 return attrib;
369}
370
Eli Bendersky092af1f2012-03-04 07:14:03 +0200371static int
372element_init(PyObject *self, PyObject *args, PyObject *kwds)
373{
374 PyObject *tag;
375 PyObject *tmp;
376 PyObject *attrib = NULL;
377 ElementObject *self_elem;
378
379 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
380 return -1;
381
Eli Bendersky737b1732012-05-29 06:02:56 +0300382 if (attrib) {
383 /* attrib passed as positional arg */
384 attrib = PyDict_Copy(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200385 if (!attrib)
386 return -1;
Eli Bendersky737b1732012-05-29 06:02:56 +0300387 if (kwds) {
388 if (PyDict_Update(attrib, kwds) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200389 Py_DECREF(attrib);
Eli Bendersky737b1732012-05-29 06:02:56 +0300390 return -1;
391 }
392 }
393 } else if (kwds) {
394 /* have keywords args */
395 attrib = get_attrib_from_keywords(kwds);
396 if (!attrib)
397 return -1;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200398 }
399
400 self_elem = (ElementObject *)self;
401
Antoine Pitrouc1948842012-10-01 23:40:37 +0200402 if (attrib != NULL && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200403 if (create_extra(self_elem, attrib) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200404 Py_DECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200405 return -1;
406 }
407 }
408
Eli Bendersky48d358b2012-05-30 17:57:50 +0300409 /* We own a reference to attrib here and it's no longer needed. */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200410 Py_XDECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200411
412 /* Replace the objects already pointed to by tag, text and tail. */
413 tmp = self_elem->tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200414 Py_INCREF(tag);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200415 self_elem->tag = tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200416 Py_DECREF(tmp);
417
418 tmp = self_elem->text;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200419 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200420 self_elem->text = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200421 Py_DECREF(JOIN_OBJ(tmp));
422
423 tmp = self_elem->tail;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200424 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200425 self_elem->tail = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200426 Py_DECREF(JOIN_OBJ(tmp));
427
428 return 0;
429}
430
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000431LOCAL(int)
Serhiy Storchaka097a6642015-11-25 20:12:37 +0200432element_resize(ElementObject* self, Py_ssize_t extra)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000433{
Serhiy Storchaka097a6642015-11-25 20:12:37 +0200434 Py_ssize_t size;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000435 PyObject* *children;
436
437 /* make sure self->children can hold the given number of extra
438 elements. set an exception and return -1 if allocation failed */
439
Victor Stinner5f0af232013-07-11 23:01:36 +0200440 if (!self->extra) {
441 if (create_extra(self, NULL) < 0)
442 return -1;
443 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000444
445 size = self->extra->length + extra;
446
447 if (size > self->extra->allocated) {
448 /* use Python 2.4's list growth strategy */
449 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000450 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100451 * which needs at least 4 bytes.
452 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000453 * be safe.
454 */
455 size = size ? size : 1;
Serhiy Storchaka097a6642015-11-25 20:12:37 +0200456 if ((size_t)size > PY_SSIZE_T_MAX/sizeof(PyObject*))
457 goto nomemory;
458 if (size > INT_MAX) {
459 PyErr_SetString(PyExc_OverflowError,
460 "too many children");
461 return -1;
462 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000463 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000464 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100465 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000466 * false alarm always assume at least one child to be safe.
467 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000468 children = PyObject_Realloc(self->extra->children,
469 size * sizeof(PyObject*));
470 if (!children)
471 goto nomemory;
472 } else {
473 children = PyObject_Malloc(size * sizeof(PyObject*));
474 if (!children)
475 goto nomemory;
476 /* copy existing children from static area to malloc buffer */
477 memcpy(children, self->extra->children,
478 self->extra->length * sizeof(PyObject*));
479 }
480 self->extra->children = children;
481 self->extra->allocated = size;
482 }
483
484 return 0;
485
486 nomemory:
487 PyErr_NoMemory();
488 return -1;
489}
490
491LOCAL(int)
492element_add_subelement(ElementObject* self, PyObject* element)
493{
494 /* add a child element to a parent */
495
496 if (element_resize(self, 1) < 0)
497 return -1;
498
499 Py_INCREF(element);
500 self->extra->children[self->extra->length] = element;
501
502 self->extra->length++;
503
504 return 0;
505}
506
507LOCAL(PyObject*)
508element_get_attrib(ElementObject* self)
509{
510 /* return borrowed reference to attrib dictionary */
511 /* note: this function assumes that the extra section exists */
512
513 PyObject* res = self->extra->attrib;
514
515 if (res == Py_None) {
516 /* create missing dictionary */
517 res = PyDict_New();
518 if (!res)
519 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200520 Py_DECREF(Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000521 self->extra->attrib = res;
522 }
523
524 return res;
525}
526
527LOCAL(PyObject*)
528element_get_text(ElementObject* self)
529{
530 /* return borrowed reference to text attribute */
531
532 PyObject* res = self->text;
533
534 if (JOIN_GET(res)) {
535 res = JOIN_OBJ(res);
536 if (PyList_CheckExact(res)) {
537 res = list_join(res);
538 if (!res)
539 return NULL;
540 self->text = res;
541 }
542 }
543
544 return res;
545}
546
547LOCAL(PyObject*)
548element_get_tail(ElementObject* self)
549{
550 /* return borrowed reference to text attribute */
551
552 PyObject* res = self->tail;
553
554 if (JOIN_GET(res)) {
555 res = JOIN_OBJ(res);
556 if (PyList_CheckExact(res)) {
557 res = list_join(res);
558 if (!res)
559 return NULL;
560 self->tail = res;
561 }
562 }
563
564 return res;
565}
566
567static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300568subelement(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000569{
570 PyObject* elem;
571
572 ElementObject* parent;
573 PyObject* tag;
574 PyObject* attrib = NULL;
575 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
576 &Element_Type, &parent, &tag,
Eli Bendersky163d7f02013-11-24 06:55:04 -0800577 &PyDict_Type, &attrib)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000578 return NULL;
Eli Bendersky163d7f02013-11-24 06:55:04 -0800579 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000580
Eli Bendersky737b1732012-05-29 06:02:56 +0300581 if (attrib) {
582 /* attrib passed as positional arg */
583 attrib = PyDict_Copy(attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000584 if (!attrib)
585 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300586 if (kwds) {
587 if (PyDict_Update(attrib, kwds) < 0) {
588 return NULL;
589 }
590 }
591 } else if (kwds) {
592 /* have keyword args */
593 attrib = get_attrib_from_keywords(kwds);
594 if (!attrib)
595 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000596 } else {
Eli Bendersky737b1732012-05-29 06:02:56 +0300597 /* no attrib arg, no kwds, so no attribute */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000598 Py_INCREF(Py_None);
599 attrib = Py_None;
600 }
601
Eli Bendersky092af1f2012-03-04 07:14:03 +0200602 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000603 Py_DECREF(attrib);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200604 if (elem == NULL)
605 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000606
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000607 if (element_add_subelement(parent, elem) < 0) {
608 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000609 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000610 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000611
612 return elem;
613}
614
Eli Bendersky0192ba32012-03-30 16:38:33 +0300615static int
616element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
617{
618 Py_VISIT(self->tag);
619 Py_VISIT(JOIN_OBJ(self->text));
620 Py_VISIT(JOIN_OBJ(self->tail));
621
622 if (self->extra) {
623 int i;
624 Py_VISIT(self->extra->attrib);
625
626 for (i = 0; i < self->extra->length; ++i)
627 Py_VISIT(self->extra->children[i]);
628 }
629 return 0;
630}
631
632static int
633element_gc_clear(ElementObject *self)
634{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300635 Py_CLEAR(self->tag);
Eli Benderskydd3661e2013-09-13 06:24:25 -0700636 _clear_joined_ptr(&self->text);
637 _clear_joined_ptr(&self->tail);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300638
639 /* After dropping all references from extra, it's no longer valid anyway,
Eli Benderskyebf37a22012-04-03 22:02:37 +0300640 * so fully deallocate it.
Eli Bendersky0192ba32012-03-30 16:38:33 +0300641 */
Eli Benderskyebf37a22012-04-03 22:02:37 +0300642 dealloc_extra(self);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300643 return 0;
644}
645
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000646static void
647element_dealloc(ElementObject* self)
648{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300649 PyObject_GC_UnTrack(self);
Eli Benderskyebf37a22012-04-03 22:02:37 +0300650
651 if (self->weakreflist != NULL)
652 PyObject_ClearWeakRefs((PyObject *) self);
653
Eli Bendersky0192ba32012-03-30 16:38:33 +0300654 /* element_gc_clear clears all references and deallocates extra
655 */
656 element_gc_clear(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000657
658 RELEASE(sizeof(ElementObject), "destroy element");
Eli Bendersky092af1f2012-03-04 07:14:03 +0200659 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000660}
661
662/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000663
664static PyObject*
665element_append(ElementObject* self, PyObject* args)
666{
667 PyObject* element;
668 if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element))
669 return NULL;
670
671 if (element_add_subelement(self, element) < 0)
672 return NULL;
673
674 Py_RETURN_NONE;
675}
676
677static PyObject*
Eli Bendersky0192ba32012-03-30 16:38:33 +0300678element_clearmethod(ElementObject* self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000679{
680 if (!PyArg_ParseTuple(args, ":clear"))
681 return NULL;
682
Eli Benderskyebf37a22012-04-03 22:02:37 +0300683 dealloc_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000684
685 Py_INCREF(Py_None);
686 Py_DECREF(JOIN_OBJ(self->text));
687 self->text = Py_None;
688
689 Py_INCREF(Py_None);
690 Py_DECREF(JOIN_OBJ(self->tail));
691 self->tail = Py_None;
692
693 Py_RETURN_NONE;
694}
695
696static PyObject*
697element_copy(ElementObject* self, PyObject* args)
698{
699 int i;
700 ElementObject* element;
701
702 if (!PyArg_ParseTuple(args, ":__copy__"))
703 return NULL;
704
Eli Bendersky092af1f2012-03-04 07:14:03 +0200705 element = (ElementObject*) create_new_element(
Eli Bendersky163d7f02013-11-24 06:55:04 -0800706 self->tag, (self->extra) ? self->extra->attrib : Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000707 if (!element)
708 return NULL;
709
710 Py_DECREF(JOIN_OBJ(element->text));
711 element->text = self->text;
712 Py_INCREF(JOIN_OBJ(element->text));
713
714 Py_DECREF(JOIN_OBJ(element->tail));
715 element->tail = self->tail;
716 Py_INCREF(JOIN_OBJ(element->tail));
717
718 if (self->extra) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000719 if (element_resize(element, self->extra->length) < 0) {
720 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000721 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000722 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000723
724 for (i = 0; i < self->extra->length; i++) {
725 Py_INCREF(self->extra->children[i]);
726 element->extra->children[i] = self->extra->children[i];
727 }
728
729 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000730 }
731
732 return (PyObject*) element;
733}
734
735static PyObject*
736element_deepcopy(ElementObject* self, PyObject* args)
737{
738 int i;
739 ElementObject* element;
740 PyObject* tag;
741 PyObject* attrib;
742 PyObject* text;
743 PyObject* tail;
744 PyObject* id;
745
746 PyObject* memo;
747 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
748 return NULL;
749
750 tag = deepcopy(self->tag, memo);
751 if (!tag)
752 return NULL;
753
754 if (self->extra) {
755 attrib = deepcopy(self->extra->attrib, memo);
756 if (!attrib) {
757 Py_DECREF(tag);
758 return NULL;
759 }
760 } else {
761 Py_INCREF(Py_None);
762 attrib = Py_None;
763 }
764
Eli Bendersky092af1f2012-03-04 07:14:03 +0200765 element = (ElementObject*) create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000766
767 Py_DECREF(tag);
768 Py_DECREF(attrib);
769
770 if (!element)
771 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100772
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000773 text = deepcopy(JOIN_OBJ(self->text), memo);
774 if (!text)
775 goto error;
776 Py_DECREF(element->text);
777 element->text = JOIN_SET(text, JOIN_GET(self->text));
778
779 tail = deepcopy(JOIN_OBJ(self->tail), memo);
780 if (!tail)
781 goto error;
782 Py_DECREF(element->tail);
783 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
784
785 if (self->extra) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000786 if (element_resize(element, self->extra->length) < 0)
787 goto error;
788
789 for (i = 0; i < self->extra->length; i++) {
790 PyObject* child = deepcopy(self->extra->children[i], memo);
791 if (!child) {
792 element->extra->length = i;
793 goto error;
794 }
795 element->extra->children[i] = child;
796 }
797
798 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000799 }
800
801 /* add object to memo dictionary (so deepcopy won't visit it again) */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200802 id = PyLong_FromSsize_t((Py_uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000803 if (!id)
804 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000805
806 i = PyDict_SetItem(memo, id, (PyObject*) element);
807
808 Py_DECREF(id);
809
810 if (i < 0)
811 goto error;
812
813 return (PyObject*) element;
814
815 error:
816 Py_DECREF(element);
817 return NULL;
818}
819
Martin v. Löwisbce16662012-06-17 10:41:22 +0200820static PyObject*
Antoine Pitrou09fcb722013-10-23 19:20:21 +0200821element_sizeof(PyObject* myself, PyObject* args)
Martin v. Löwisbce16662012-06-17 10:41:22 +0200822{
Antoine Pitrou09fcb722013-10-23 19:20:21 +0200823 ElementObject *self = (ElementObject*)myself;
Martin v. Löwisbce16662012-06-17 10:41:22 +0200824 Py_ssize_t result = sizeof(ElementObject);
825 if (self->extra) {
826 result += sizeof(ElementObjectExtra);
827 if (self->extra->children != self->extra->_children)
828 result += sizeof(PyObject*) * self->extra->allocated;
829 }
830 return PyLong_FromSsize_t(result);
831}
832
Eli Bendersky698bdb22013-01-10 06:01:06 -0800833/* dict keys for getstate/setstate. */
834#define PICKLED_TAG "tag"
835#define PICKLED_CHILDREN "_children"
836#define PICKLED_ATTRIB "attrib"
837#define PICKLED_TAIL "tail"
838#define PICKLED_TEXT "text"
839
840/* __getstate__ returns a fabricated instance dict as in the pure-Python
841 * Element implementation, for interoperability/interchangeability. This
842 * makes the pure-Python implementation details an API, but (a) there aren't
843 * any unnecessary structures there; and (b) it buys compatibility with 3.2
844 * pickles. See issue #16076.
845 */
846static PyObject *
847element_getstate(ElementObject *self)
848{
849 int i, noattrib;
850 PyObject *instancedict = NULL, *children;
851
852 /* Build a list of children. */
853 children = PyList_New(self->extra ? self->extra->length : 0);
854 if (!children)
855 return NULL;
856 for (i = 0; i < PyList_GET_SIZE(children); i++) {
857 PyObject *child = self->extra->children[i];
858 Py_INCREF(child);
859 PyList_SET_ITEM(children, i, child);
860 }
861
862 /* Construct the state object. */
863 noattrib = (self->extra == NULL || self->extra->attrib == Py_None);
864 if (noattrib)
865 instancedict = Py_BuildValue("{sOsOs{}sOsO}",
866 PICKLED_TAG, self->tag,
867 PICKLED_CHILDREN, children,
868 PICKLED_ATTRIB,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700869 PICKLED_TEXT, JOIN_OBJ(self->text),
870 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800871 else
872 instancedict = Py_BuildValue("{sOsOsOsOsO}",
873 PICKLED_TAG, self->tag,
874 PICKLED_CHILDREN, children,
875 PICKLED_ATTRIB, self->extra->attrib,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700876 PICKLED_TEXT, JOIN_OBJ(self->text),
877 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800878 if (instancedict) {
879 Py_DECREF(children);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800880 return instancedict;
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800881 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800882 else {
883 for (i = 0; i < PyList_GET_SIZE(children); i++)
884 Py_DECREF(PyList_GET_ITEM(children, i));
885 Py_DECREF(children);
886
887 return NULL;
888 }
889}
890
891static PyObject *
892element_setstate_from_attributes(ElementObject *self,
893 PyObject *tag,
894 PyObject *attrib,
895 PyObject *text,
896 PyObject *tail,
897 PyObject *children)
898{
Serhiy Storchaka097a6642015-11-25 20:12:37 +0200899 int i, nchildren;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800900
901 if (!tag) {
902 PyErr_SetString(PyExc_TypeError, "tag may not be NULL");
903 return NULL;
904 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800905
906 Py_CLEAR(self->tag);
907 self->tag = tag;
908 Py_INCREF(self->tag);
909
Eli Benderskydd3661e2013-09-13 06:24:25 -0700910 _clear_joined_ptr(&self->text);
911 self->text = text ? JOIN_SET(text, PyList_CheckExact(text)) : Py_None;
912 Py_INCREF(JOIN_OBJ(self->text));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800913
Eli Benderskydd3661e2013-09-13 06:24:25 -0700914 _clear_joined_ptr(&self->tail);
915 self->tail = tail ? JOIN_SET(tail, PyList_CheckExact(tail)) : Py_None;
916 Py_INCREF(JOIN_OBJ(self->tail));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800917
918 /* Handle ATTRIB and CHILDREN. */
919 if (!children && !attrib)
920 Py_RETURN_NONE;
921
922 /* Compute 'nchildren'. */
923 if (children) {
Serhiy Storchaka097a6642015-11-25 20:12:37 +0200924 Py_ssize_t size;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800925 if (!PyList_Check(children)) {
926 PyErr_SetString(PyExc_TypeError, "'_children' is not a list");
927 return NULL;
928 }
Serhiy Storchaka097a6642015-11-25 20:12:37 +0200929 size = PyList_Size(children);
930 /* expat limits nchildren to int */
931 if (size > INT_MAX) {
932 PyErr_SetString(PyExc_OverflowError, "too many children");
933 return NULL;
934 }
935 nchildren = (int)size;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800936 }
937 else {
938 nchildren = 0;
939 }
940
941 /* Allocate 'extra'. */
942 if (element_resize(self, nchildren)) {
943 return NULL;
944 }
945 assert(self->extra && self->extra->allocated >= nchildren);
946
947 /* Copy children */
948 for (i = 0; i < nchildren; i++) {
949 self->extra->children[i] = PyList_GET_ITEM(children, i);
950 Py_INCREF(self->extra->children[i]);
951 }
952
953 self->extra->length = nchildren;
954 self->extra->allocated = nchildren;
955
956 /* Stash attrib. */
957 if (attrib) {
958 Py_CLEAR(self->extra->attrib);
959 self->extra->attrib = attrib;
960 Py_INCREF(attrib);
961 }
962
963 Py_RETURN_NONE;
964}
965
966/* __setstate__ for Element instance from the Python implementation.
967 * 'state' should be the instance dict.
968 */
969static PyObject *
970element_setstate_from_Python(ElementObject *self, PyObject *state)
971{
972 static char *kwlist[] = {PICKLED_TAG, PICKLED_ATTRIB, PICKLED_TEXT,
973 PICKLED_TAIL, PICKLED_CHILDREN, 0};
974 PyObject *args;
975 PyObject *tag, *attrib, *text, *tail, *children;
Eli Bendersky799e3ed2013-01-12 05:42:38 -0800976 PyObject *retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800977
Eli Bendersky698bdb22013-01-10 06:01:06 -0800978 tag = attrib = text = tail = children = NULL;
979 args = PyTuple_New(0);
Eli Bendersky799e3ed2013-01-12 05:42:38 -0800980 if (!args)
Eli Bendersky698bdb22013-01-10 06:01:06 -0800981 return NULL;
Eli Bendersky799e3ed2013-01-12 05:42:38 -0800982
983 if (PyArg_ParseTupleAndKeywords(args, state, "|$OOOOO", kwlist, &tag,
984 &attrib, &text, &tail, &children))
985 retval = element_setstate_from_attributes(self, tag, attrib, text,
986 tail, children);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800987 else
Eli Bendersky799e3ed2013-01-12 05:42:38 -0800988 retval = NULL;
989
990 Py_DECREF(args);
991 return retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800992}
993
994static PyObject *
995element_setstate(ElementObject *self, PyObject *state)
996{
997 if (!PyDict_CheckExact(state)) {
998 PyErr_Format(PyExc_TypeError,
999 "Don't know how to unpickle \"%.200R\" as an Element",
1000 state);
1001 return NULL;
1002 }
1003 else
1004 return element_setstate_from_Python(self, state);
1005}
1006
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001007LOCAL(int)
1008checkpath(PyObject* tag)
1009{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001010 Py_ssize_t i;
1011 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001012
1013 /* check if a tag contains an xpath character */
1014
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001015#define PATHCHAR(ch) \
1016 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001017
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001018 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001019 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
1020 void *data = PyUnicode_DATA(tag);
1021 unsigned int kind = PyUnicode_KIND(tag);
1022 for (i = 0; i < len; i++) {
1023 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1024 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001025 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001026 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001027 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001028 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001029 return 1;
1030 }
1031 return 0;
1032 }
Christian Heimes72b710a2008-05-26 13:28:38 +00001033 if (PyBytes_Check(tag)) {
1034 char *p = PyBytes_AS_STRING(tag);
1035 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001036 if (p[i] == '{')
1037 check = 0;
1038 else if (p[i] == '}')
1039 check = 1;
1040 else if (check && PATHCHAR(p[i]))
1041 return 1;
1042 }
1043 return 0;
1044 }
1045
1046 return 1; /* unknown type; might be path expression */
1047}
1048
1049static PyObject*
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001050element_extend(ElementObject* self, PyObject* args)
1051{
1052 PyObject* seq;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001053 Py_ssize_t i;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001054
1055 PyObject* seq_in;
1056 if (!PyArg_ParseTuple(args, "O:extend", &seq_in))
1057 return NULL;
1058
1059 seq = PySequence_Fast(seq_in, "");
1060 if (!seq) {
1061 PyErr_Format(
1062 PyExc_TypeError,
1063 "expected sequence, not \"%.200s\"", Py_TYPE(seq_in)->tp_name
1064 );
1065 return NULL;
1066 }
1067
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001068 for (i = 0; i < PySequence_Fast_GET_SIZE(seq); i++) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001069 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001070 Py_INCREF(element);
1071 if (!PyObject_TypeCheck(element, (PyTypeObject *)&Element_Type)) {
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001072 PyErr_Format(
1073 PyExc_TypeError,
1074 "expected an Element, not \"%.200s\"",
1075 Py_TYPE(element)->tp_name);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001076 Py_DECREF(seq);
1077 Py_DECREF(element);
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001078 return NULL;
1079 }
1080
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001081 if (element_add_subelement(self, element) < 0) {
1082 Py_DECREF(seq);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001083 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001084 return NULL;
1085 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001086 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001087 }
1088
1089 Py_DECREF(seq);
1090
1091 Py_RETURN_NONE;
1092}
1093
1094static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001095element_find(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001096{
1097 int i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001098 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001099 PyObject* namespaces = Py_None;
Eli Bendersky737b1732012-05-29 06:02:56 +03001100 static char *kwlist[] = {"path", "namespaces", 0};
Eli Bendersky532d03e2013-08-10 08:00:39 -07001101 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001102
Eli Bendersky737b1732012-05-29 06:02:56 +03001103 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:find", kwlist,
1104 &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001105 return NULL;
1106
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001107 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001108 _Py_IDENTIFIER(find);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001109 return _PyObject_CallMethodId(
Eli Bendersky532d03e2013-08-10 08:00:39 -07001110 st->elementpath_obj, &PyId_find, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001111 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001112 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001113
1114 if (!self->extra)
1115 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001116
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001117 for (i = 0; i < self->extra->length; i++) {
1118 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001119 int rc;
1120 if (!Element_CheckExact(item))
1121 continue;
1122 Py_INCREF(item);
1123 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ);
1124 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001125 return item;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001126 Py_DECREF(item);
1127 if (rc < 0)
1128 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001129 }
1130
1131 Py_RETURN_NONE;
1132}
1133
1134static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001135element_findtext(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001136{
1137 int i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001138 PyObject* tag;
1139 PyObject* default_value = Py_None;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001140 PyObject* namespaces = Py_None;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001141 _Py_IDENTIFIER(findtext);
Eli Bendersky737b1732012-05-29 06:02:56 +03001142 static char *kwlist[] = {"path", "default", "namespaces", 0};
Eli Bendersky532d03e2013-08-10 08:00:39 -07001143 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001144
Eli Bendersky737b1732012-05-29 06:02:56 +03001145 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO:findtext", kwlist,
1146 &tag, &default_value, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001147 return NULL;
1148
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001149 if (checkpath(tag) || namespaces != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001150 return _PyObject_CallMethodId(
Eli Bendersky532d03e2013-08-10 08:00:39 -07001151 st->elementpath_obj, &PyId_findtext, "OOOO", self, tag, default_value, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001152 );
1153
1154 if (!self->extra) {
1155 Py_INCREF(default_value);
1156 return default_value;
1157 }
1158
1159 for (i = 0; i < self->extra->length; i++) {
1160 ElementObject* item = (ElementObject*) self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001161 int rc;
1162 if (!Element_CheckExact(item))
1163 continue;
1164 Py_INCREF(item);
1165 rc = PyObject_RichCompareBool(item->tag, tag, Py_EQ);
1166 if (rc > 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001167 PyObject* text = element_get_text(item);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001168 if (text == Py_None) {
1169 Py_DECREF(item);
Eli Bendersky25771b32013-01-13 05:26:07 -08001170 return PyUnicode_New(0, 0);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001171 }
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001172 Py_XINCREF(text);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001173 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001174 return text;
1175 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001176 Py_DECREF(item);
1177 if (rc < 0)
1178 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001179 }
1180
1181 Py_INCREF(default_value);
1182 return default_value;
1183}
1184
1185static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001186element_findall(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001187{
1188 int i;
1189 PyObject* out;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001190 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001191 PyObject* namespaces = Py_None;
Eli Bendersky737b1732012-05-29 06:02:56 +03001192 static char *kwlist[] = {"path", "namespaces", 0};
Eli Bendersky532d03e2013-08-10 08:00:39 -07001193 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001194
Eli Bendersky737b1732012-05-29 06:02:56 +03001195 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:findall", kwlist,
1196 &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001197 return NULL;
1198
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001199 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001200 _Py_IDENTIFIER(findall);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001201 return _PyObject_CallMethodId(
Eli Bendersky532d03e2013-08-10 08:00:39 -07001202 st->elementpath_obj, &PyId_findall, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001203 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001204 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001205
1206 out = PyList_New(0);
1207 if (!out)
1208 return NULL;
1209
1210 if (!self->extra)
1211 return out;
1212
1213 for (i = 0; i < self->extra->length; i++) {
1214 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001215 int rc;
1216 if (!Element_CheckExact(item))
1217 continue;
1218 Py_INCREF(item);
1219 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ);
1220 if (rc != 0 && (rc < 0 || PyList_Append(out, item) < 0)) {
1221 Py_DECREF(item);
1222 Py_DECREF(out);
1223 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001224 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001225 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001226 }
1227
1228 return out;
1229}
1230
1231static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001232element_iterfind(ElementObject *self, PyObject *args, PyObject *kwds)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001233{
1234 PyObject* tag;
1235 PyObject* namespaces = Py_None;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001236 _Py_IDENTIFIER(iterfind);
Eli Bendersky737b1732012-05-29 06:02:56 +03001237 static char *kwlist[] = {"path", "namespaces", 0};
Eli Bendersky532d03e2013-08-10 08:00:39 -07001238 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001239
Eli Bendersky737b1732012-05-29 06:02:56 +03001240 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:iterfind", kwlist,
Eli Bendersky163d7f02013-11-24 06:55:04 -08001241 &tag, &namespaces)) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001242 return NULL;
Eli Bendersky163d7f02013-11-24 06:55:04 -08001243 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001244
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001245 return _PyObject_CallMethodId(
Eli Bendersky163d7f02013-11-24 06:55:04 -08001246 st->elementpath_obj, &PyId_iterfind, "OOO", self, tag, namespaces);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001247}
1248
1249static PyObject*
Eli Benderskya8736902013-01-05 06:26:39 -08001250element_get(ElementObject* self, PyObject* args, PyObject* kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001251{
1252 PyObject* value;
Eli Benderskya8736902013-01-05 06:26:39 -08001253 static char* kwlist[] = {"key", "default", 0};
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001254
1255 PyObject* key;
1256 PyObject* default_value = Py_None;
Eli Benderskya8736902013-01-05 06:26:39 -08001257
1258 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:get", kwlist, &key,
1259 &default_value))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001260 return NULL;
1261
1262 if (!self->extra || self->extra->attrib == Py_None)
1263 value = default_value;
1264 else {
1265 value = PyDict_GetItem(self->extra->attrib, key);
1266 if (!value)
1267 value = default_value;
1268 }
1269
1270 Py_INCREF(value);
1271 return value;
1272}
1273
1274static PyObject*
1275element_getchildren(ElementObject* self, PyObject* args)
1276{
1277 int i;
1278 PyObject* list;
1279
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001280 /* FIXME: report as deprecated? */
1281
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001282 if (!PyArg_ParseTuple(args, ":getchildren"))
1283 return NULL;
1284
1285 if (!self->extra)
1286 return PyList_New(0);
1287
1288 list = PyList_New(self->extra->length);
1289 if (!list)
1290 return NULL;
1291
1292 for (i = 0; i < self->extra->length; i++) {
1293 PyObject* item = self->extra->children[i];
1294 Py_INCREF(item);
1295 PyList_SET_ITEM(list, i, item);
1296 }
1297
1298 return list;
1299}
1300
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001301
Eli Bendersky64d11e62012-06-15 07:42:50 +03001302static PyObject *
1303create_elementiter(ElementObject *self, PyObject *tag, int gettext);
1304
1305
1306static PyObject *
Eli Benderskya8736902013-01-05 06:26:39 -08001307element_iter(ElementObject *self, PyObject *args, PyObject *kwds)
Eli Bendersky64d11e62012-06-15 07:42:50 +03001308{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001309 PyObject* tag = Py_None;
Eli Benderskya8736902013-01-05 06:26:39 -08001310 static char* kwlist[] = {"tag", 0};
1311
1312 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:iter", kwlist, &tag))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001313 return NULL;
1314
Eli Bendersky64d11e62012-06-15 07:42:50 +03001315 return create_elementiter(self, tag, 0);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001316}
1317
1318
1319static PyObject*
1320element_itertext(ElementObject* self, PyObject* args)
1321{
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001322 if (!PyArg_ParseTuple(args, ":itertext"))
1323 return NULL;
1324
Eli Bendersky64d11e62012-06-15 07:42:50 +03001325 return create_elementiter(self, Py_None, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001326}
1327
Eli Bendersky64d11e62012-06-15 07:42:50 +03001328
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001329static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001330element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001331{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001332 ElementObject* self = (ElementObject*) self_;
1333
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001334 if (!self->extra || index < 0 || index >= self->extra->length) {
1335 PyErr_SetString(
1336 PyExc_IndexError,
1337 "child index out of range"
1338 );
1339 return NULL;
1340 }
1341
1342 Py_INCREF(self->extra->children[index]);
1343 return self->extra->children[index];
1344}
1345
1346static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001347element_insert(ElementObject* self, PyObject* args)
1348{
1349 int i;
1350
1351 int index;
1352 PyObject* element;
1353 if (!PyArg_ParseTuple(args, "iO!:insert", &index,
1354 &Element_Type, &element))
1355 return NULL;
1356
Victor Stinner5f0af232013-07-11 23:01:36 +02001357 if (!self->extra) {
1358 if (create_extra(self, NULL) < 0)
1359 return NULL;
1360 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001361
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001362 if (index < 0) {
1363 index += self->extra->length;
1364 if (index < 0)
1365 index = 0;
1366 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001367 if (index > self->extra->length)
1368 index = self->extra->length;
1369
1370 if (element_resize(self, 1) < 0)
1371 return NULL;
1372
1373 for (i = self->extra->length; i > index; i--)
1374 self->extra->children[i] = self->extra->children[i-1];
1375
1376 Py_INCREF(element);
1377 self->extra->children[index] = element;
1378
1379 self->extra->length++;
1380
1381 Py_RETURN_NONE;
1382}
1383
1384static PyObject*
1385element_items(ElementObject* self, PyObject* args)
1386{
1387 if (!PyArg_ParseTuple(args, ":items"))
1388 return NULL;
1389
1390 if (!self->extra || self->extra->attrib == Py_None)
1391 return PyList_New(0);
1392
1393 return PyDict_Items(self->extra->attrib);
1394}
1395
1396static PyObject*
1397element_keys(ElementObject* self, PyObject* args)
1398{
1399 if (!PyArg_ParseTuple(args, ":keys"))
1400 return NULL;
1401
1402 if (!self->extra || self->extra->attrib == Py_None)
1403 return PyList_New(0);
1404
1405 return PyDict_Keys(self->extra->attrib);
1406}
1407
Martin v. Löwis18e16552006-02-15 17:27:45 +00001408static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001409element_length(ElementObject* self)
1410{
1411 if (!self->extra)
1412 return 0;
1413
1414 return self->extra->length;
1415}
1416
1417static PyObject*
1418element_makeelement(PyObject* self, PyObject* args, PyObject* kw)
1419{
1420 PyObject* elem;
1421
1422 PyObject* tag;
1423 PyObject* attrib;
1424 if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib))
1425 return NULL;
1426
1427 attrib = PyDict_Copy(attrib);
1428 if (!attrib)
1429 return NULL;
1430
Eli Bendersky092af1f2012-03-04 07:14:03 +02001431 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001432
1433 Py_DECREF(attrib);
1434
1435 return elem;
1436}
1437
1438static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001439element_remove(ElementObject* self, PyObject* args)
1440{
1441 int i;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001442 int rc;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001443 PyObject* element;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001444 PyObject* found;
1445
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001446 if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element))
1447 return NULL;
1448
1449 if (!self->extra) {
1450 /* element has no children, so raise exception */
1451 PyErr_SetString(
1452 PyExc_ValueError,
1453 "list.remove(x): x not in list"
1454 );
1455 return NULL;
1456 }
1457
1458 for (i = 0; i < self->extra->length; i++) {
1459 if (self->extra->children[i] == element)
1460 break;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001461 rc = PyObject_RichCompareBool(self->extra->children[i], element, Py_EQ);
1462 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001463 break;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001464 if (rc < 0)
1465 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001466 }
1467
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001468 if (i >= self->extra->length) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001469 /* element is not in children, so raise exception */
1470 PyErr_SetString(
1471 PyExc_ValueError,
1472 "list.remove(x): x not in list"
1473 );
1474 return NULL;
1475 }
1476
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001477 found = self->extra->children[i];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001478
1479 self->extra->length--;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001480 for (; i < self->extra->length; i++)
1481 self->extra->children[i] = self->extra->children[i+1];
1482
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001483 Py_DECREF(found);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001484 Py_RETURN_NONE;
1485}
1486
1487static PyObject*
1488element_repr(ElementObject* self)
1489{
Eli Bendersky092af1f2012-03-04 07:14:03 +02001490 if (self->tag)
1491 return PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1492 else
1493 return PyUnicode_FromFormat("<Element at %p>", self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001494}
1495
1496static PyObject*
1497element_set(ElementObject* self, PyObject* args)
1498{
1499 PyObject* attrib;
1500
1501 PyObject* key;
1502 PyObject* value;
1503 if (!PyArg_ParseTuple(args, "OO:set", &key, &value))
1504 return NULL;
1505
Victor Stinner5f0af232013-07-11 23:01:36 +02001506 if (!self->extra) {
1507 if (create_extra(self, NULL) < 0)
1508 return NULL;
1509 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001510
1511 attrib = element_get_attrib(self);
1512 if (!attrib)
1513 return NULL;
1514
1515 if (PyDict_SetItem(attrib, key, value) < 0)
1516 return NULL;
1517
1518 Py_RETURN_NONE;
1519}
1520
1521static int
Serhiy Storchaka097a6642015-11-25 20:12:37 +02001522element_setitem(PyObject* self_, Py_ssize_t index_, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001523{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001524 ElementObject* self = (ElementObject*) self_;
Serhiy Storchaka097a6642015-11-25 20:12:37 +02001525 int i, index;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001526 PyObject* old;
1527
Serhiy Storchaka097a6642015-11-25 20:12:37 +02001528 if (!self->extra || index_ < 0 || index_ >= self->extra->length) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001529 PyErr_SetString(
1530 PyExc_IndexError,
1531 "child assignment index out of range");
1532 return -1;
1533 }
Serhiy Storchaka097a6642015-11-25 20:12:37 +02001534 index = (int)index_;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001535
1536 old = self->extra->children[index];
1537
1538 if (item) {
1539 Py_INCREF(item);
1540 self->extra->children[index] = item;
1541 } else {
1542 self->extra->length--;
1543 for (i = index; i < self->extra->length; i++)
1544 self->extra->children[i] = self->extra->children[i+1];
1545 }
1546
1547 Py_DECREF(old);
1548
1549 return 0;
1550}
1551
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001552static PyObject*
1553element_subscr(PyObject* self_, PyObject* item)
1554{
1555 ElementObject* self = (ElementObject*) self_;
1556
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001557 if (PyIndex_Check(item)) {
1558 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001559
1560 if (i == -1 && PyErr_Occurred()) {
1561 return NULL;
1562 }
1563 if (i < 0 && self->extra)
1564 i += self->extra->length;
1565 return element_getitem(self_, i);
1566 }
1567 else if (PySlice_Check(item)) {
1568 Py_ssize_t start, stop, step, slicelen, cur, i;
1569 PyObject* list;
1570
1571 if (!self->extra)
1572 return PyList_New(0);
1573
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001574 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001575 self->extra->length,
1576 &start, &stop, &step, &slicelen) < 0) {
1577 return NULL;
1578 }
1579
1580 if (slicelen <= 0)
1581 return PyList_New(0);
1582 else {
1583 list = PyList_New(slicelen);
1584 if (!list)
1585 return NULL;
1586
1587 for (cur = start, i = 0; i < slicelen;
1588 cur += step, i++) {
1589 PyObject* item = self->extra->children[cur];
1590 Py_INCREF(item);
1591 PyList_SET_ITEM(list, i, item);
1592 }
1593
1594 return list;
1595 }
1596 }
1597 else {
1598 PyErr_SetString(PyExc_TypeError,
1599 "element indices must be integers");
1600 return NULL;
1601 }
1602}
1603
1604static int
1605element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1606{
1607 ElementObject* self = (ElementObject*) self_;
1608
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001609 if (PyIndex_Check(item)) {
1610 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001611
1612 if (i == -1 && PyErr_Occurred()) {
1613 return -1;
1614 }
1615 if (i < 0 && self->extra)
1616 i += self->extra->length;
1617 return element_setitem(self_, i, value);
1618 }
1619 else if (PySlice_Check(item)) {
1620 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1621
1622 PyObject* recycle = NULL;
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001623 PyObject* seq;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001624
Victor Stinner5f0af232013-07-11 23:01:36 +02001625 if (!self->extra) {
1626 if (create_extra(self, NULL) < 0)
1627 return -1;
1628 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001629
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001630 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001631 self->extra->length,
1632 &start, &stop, &step, &slicelen) < 0) {
1633 return -1;
1634 }
Serhiy Storchaka097a6642015-11-25 20:12:37 +02001635 assert(slicelen <= self->extra->length);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001636
Eli Bendersky865756a2012-03-09 13:38:15 +02001637 if (value == NULL) {
1638 /* Delete slice */
1639 size_t cur;
1640 Py_ssize_t i;
1641
1642 if (slicelen <= 0)
1643 return 0;
1644
1645 /* Since we're deleting, the direction of the range doesn't matter,
1646 * so for simplicity make it always ascending.
1647 */
1648 if (step < 0) {
1649 stop = start + 1;
1650 start = stop + step * (slicelen - 1) - 1;
1651 step = -step;
1652 }
1653
1654 assert((size_t)slicelen <= PY_SIZE_MAX / sizeof(PyObject *));
1655
1656 /* recycle is a list that will contain all the children
1657 * scheduled for removal.
1658 */
1659 if (!(recycle = PyList_New(slicelen))) {
1660 PyErr_NoMemory();
1661 return -1;
1662 }
1663
1664 /* This loop walks over all the children that have to be deleted,
1665 * with cur pointing at them. num_moved is the amount of children
1666 * until the next deleted child that have to be "shifted down" to
1667 * occupy the deleted's places.
1668 * Note that in the ith iteration, shifting is done i+i places down
1669 * because i children were already removed.
1670 */
1671 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1672 /* Compute how many children have to be moved, clipping at the
1673 * list end.
1674 */
1675 Py_ssize_t num_moved = step - 1;
1676 if (cur + step >= (size_t)self->extra->length) {
1677 num_moved = self->extra->length - cur - 1;
1678 }
1679
1680 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1681
1682 memmove(
1683 self->extra->children + cur - i,
1684 self->extra->children + cur + 1,
1685 num_moved * sizeof(PyObject *));
1686 }
1687
1688 /* Leftover "tail" after the last removed child */
1689 cur = start + (size_t)slicelen * step;
1690 if (cur < (size_t)self->extra->length) {
1691 memmove(
1692 self->extra->children + cur - slicelen,
1693 self->extra->children + cur,
1694 (self->extra->length - cur) * sizeof(PyObject *));
1695 }
1696
Serhiy Storchaka097a6642015-11-25 20:12:37 +02001697 self->extra->length -= (int)slicelen;
Eli Bendersky865756a2012-03-09 13:38:15 +02001698
1699 /* Discard the recycle list with all the deleted sub-elements */
1700 Py_XDECREF(recycle);
1701 return 0;
1702 }
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001703
1704 /* A new slice is actually being assigned */
1705 seq = PySequence_Fast(value, "");
1706 if (!seq) {
1707 PyErr_Format(
1708 PyExc_TypeError,
1709 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1710 );
1711 return -1;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001712 }
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001713 newlen = PySequence_Size(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001714
1715 if (step != 1 && newlen != slicelen)
1716 {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001717 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001718 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001719 "attempt to assign sequence of size %zd "
1720 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001721 newlen, slicelen
1722 );
1723 return -1;
1724 }
1725
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001726 /* Resize before creating the recycle bin, to prevent refleaks. */
1727 if (newlen > slicelen) {
1728 if (element_resize(self, newlen - slicelen) < 0) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001729 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001730 return -1;
1731 }
1732 }
Serhiy Storchaka097a6642015-11-25 20:12:37 +02001733 assert(newlen - slicelen <= INT_MAX - self->extra->length);
1734 assert(newlen - slicelen >= -self->extra->length);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001735
1736 if (slicelen > 0) {
1737 /* to avoid recursive calls to this method (via decref), move
1738 old items to the recycle bin here, and get rid of them when
1739 we're done modifying the element */
1740 recycle = PyList_New(slicelen);
1741 if (!recycle) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001742 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001743 return -1;
1744 }
1745 for (cur = start, i = 0; i < slicelen;
1746 cur += step, i++)
1747 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1748 }
1749
1750 if (newlen < slicelen) {
1751 /* delete slice */
1752 for (i = stop; i < self->extra->length; i++)
1753 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1754 } else if (newlen > slicelen) {
1755 /* insert slice */
1756 for (i = self->extra->length-1; i >= stop; i--)
1757 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1758 }
1759
1760 /* replace the slice */
1761 for (cur = start, i = 0; i < newlen;
1762 cur += step, i++) {
1763 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1764 Py_INCREF(element);
1765 self->extra->children[cur] = element;
1766 }
1767
Serhiy Storchaka097a6642015-11-25 20:12:37 +02001768 self->extra->length += (int)(newlen - slicelen);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001769
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001770 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001771
1772 /* discard the recycle bin, and everything in it */
1773 Py_XDECREF(recycle);
1774
1775 return 0;
1776 }
1777 else {
1778 PyErr_SetString(PyExc_TypeError,
1779 "element indices must be integers");
1780 return -1;
1781 }
1782}
1783
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001784static PyMethodDef element_methods[] = {
1785
Eli Bendersky0192ba32012-03-30 16:38:33 +03001786 {"clear", (PyCFunction) element_clearmethod, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001787
Eli Benderskya8736902013-01-05 06:26:39 -08001788 {"get", (PyCFunction) element_get, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001789 {"set", (PyCFunction) element_set, METH_VARARGS},
1790
Eli Bendersky737b1732012-05-29 06:02:56 +03001791 {"find", (PyCFunction) element_find, METH_VARARGS | METH_KEYWORDS},
1792 {"findtext", (PyCFunction) element_findtext, METH_VARARGS | METH_KEYWORDS},
1793 {"findall", (PyCFunction) element_findall, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001794
1795 {"append", (PyCFunction) element_append, METH_VARARGS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001796 {"extend", (PyCFunction) element_extend, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001797 {"insert", (PyCFunction) element_insert, METH_VARARGS},
1798 {"remove", (PyCFunction) element_remove, METH_VARARGS},
1799
Eli Benderskya8736902013-01-05 06:26:39 -08001800 {"iter", (PyCFunction) element_iter, METH_VARARGS | METH_KEYWORDS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001801 {"itertext", (PyCFunction) element_itertext, METH_VARARGS},
Eli Bendersky737b1732012-05-29 06:02:56 +03001802 {"iterfind", (PyCFunction) element_iterfind, METH_VARARGS | METH_KEYWORDS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001803
Eli Benderskya8736902013-01-05 06:26:39 -08001804 {"getiterator", (PyCFunction) element_iter, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001805 {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS},
1806
1807 {"items", (PyCFunction) element_items, METH_VARARGS},
1808 {"keys", (PyCFunction) element_keys, METH_VARARGS},
1809
1810 {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS},
1811
1812 {"__copy__", (PyCFunction) element_copy, METH_VARARGS},
1813 {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS},
Martin v. Löwisbce16662012-06-17 10:41:22 +02001814 {"__sizeof__", element_sizeof, METH_NOARGS},
Eli Bendersky698bdb22013-01-10 06:01:06 -08001815 {"__getstate__", (PyCFunction)element_getstate, METH_NOARGS},
1816 {"__setstate__", (PyCFunction)element_setstate, METH_O},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001817
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001818 {NULL, NULL}
1819};
1820
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001821static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001822element_getattro(ElementObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001823{
1824 PyObject* res;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001825 char *name = "";
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001826
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001827 if (PyUnicode_Check(nameobj))
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001828 name = _PyUnicode_AsString(nameobj);
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001829
Alexander Belopolskye239d232010-12-08 23:31:48 +00001830 if (name == NULL)
1831 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001832
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001833 /* handle common attributes first */
1834 if (strcmp(name, "tag") == 0) {
1835 res = self->tag;
1836 Py_INCREF(res);
1837 return res;
1838 } else if (strcmp(name, "text") == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001839 res = element_get_text(self);
Victor Stinner71c8b7e2013-07-11 23:08:39 +02001840 Py_XINCREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001841 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001842 }
1843
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001844 /* methods */
1845 res = PyObject_GenericGetAttr((PyObject*) self, nameobj);
1846 if (res)
1847 return res;
1848
1849 /* less common attributes */
1850 if (strcmp(name, "tail") == 0) {
1851 PyErr_Clear();
1852 res = element_get_tail(self);
1853 } else if (strcmp(name, "attrib") == 0) {
1854 PyErr_Clear();
Victor Stinner5f0af232013-07-11 23:01:36 +02001855 if (!self->extra) {
1856 if (create_extra(self, NULL) < 0)
1857 return NULL;
1858 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001859 res = element_get_attrib(self);
1860 }
1861
1862 if (!res)
1863 return NULL;
1864
1865 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001866 return res;
1867}
1868
Eli Benderskyef9683b2013-05-18 07:52:34 -07001869static int
Eli Benderskyb20df952012-05-20 06:33:29 +03001870element_setattro(ElementObject* self, PyObject* nameobj, PyObject* value)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001871{
Eli Benderskyb20df952012-05-20 06:33:29 +03001872 char *name = "";
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02001873
1874 if (value == NULL) {
1875 PyErr_SetString(PyExc_AttributeError,
1876 "can't delete attribute");
1877 return -1;
1878 }
Eli Benderskyb20df952012-05-20 06:33:29 +03001879 if (PyUnicode_Check(nameobj))
1880 name = _PyUnicode_AsString(nameobj);
Victor Stinner4d463432013-07-11 23:05:03 +02001881 if (name == NULL)
Eli Benderskyef9683b2013-05-18 07:52:34 -07001882 return -1;
Victor Stinner4d463432013-07-11 23:05:03 +02001883
1884 if (strcmp(name, "tag") == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001885 Py_DECREF(self->tag);
1886 self->tag = value;
1887 Py_INCREF(self->tag);
1888 } else if (strcmp(name, "text") == 0) {
1889 Py_DECREF(JOIN_OBJ(self->text));
1890 self->text = value;
1891 Py_INCREF(self->text);
1892 } else if (strcmp(name, "tail") == 0) {
1893 Py_DECREF(JOIN_OBJ(self->tail));
1894 self->tail = value;
1895 Py_INCREF(self->tail);
1896 } else if (strcmp(name, "attrib") == 0) {
Victor Stinner5f0af232013-07-11 23:01:36 +02001897 if (!self->extra) {
1898 if (create_extra(self, NULL) < 0)
1899 return -1;
1900 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001901 Py_DECREF(self->extra->attrib);
1902 self->extra->attrib = value;
1903 Py_INCREF(self->extra->attrib);
1904 } else {
Eli Benderskyef9683b2013-05-18 07:52:34 -07001905 PyErr_SetString(PyExc_AttributeError,
Eli Bendersky6a55dc32013-05-19 16:59:59 -07001906 "Can't set arbitrary attributes on Element");
Eli Benderskyef9683b2013-05-18 07:52:34 -07001907 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001908 }
1909
Eli Benderskyef9683b2013-05-18 07:52:34 -07001910 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001911}
1912
1913static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001914 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001915 0, /* sq_concat */
1916 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001917 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001918 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001919 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001920 0,
1921};
1922
1923static PyMappingMethods element_as_mapping = {
1924 (lenfunc) element_length,
1925 (binaryfunc) element_subscr,
1926 (objobjargproc) element_ass_subscr,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001927};
1928
Neal Norwitz227b5332006-03-22 09:28:35 +00001929static PyTypeObject Element_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001930 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08001931 "xml.etree.ElementTree.Element", sizeof(ElementObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001932 /* methods */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001933 (destructor)element_dealloc, /* tp_dealloc */
1934 0, /* tp_print */
1935 0, /* tp_getattr */
Eli Benderskyb20df952012-05-20 06:33:29 +03001936 0, /* tp_setattr */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001937 0, /* tp_reserved */
1938 (reprfunc)element_repr, /* tp_repr */
1939 0, /* tp_as_number */
1940 &element_as_sequence, /* tp_as_sequence */
1941 &element_as_mapping, /* tp_as_mapping */
1942 0, /* tp_hash */
1943 0, /* tp_call */
1944 0, /* tp_str */
1945 (getattrofunc)element_getattro, /* tp_getattro */
Eli Benderskyb20df952012-05-20 06:33:29 +03001946 (setattrofunc)element_setattro, /* tp_setattro */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001947 0, /* tp_as_buffer */
Eli Bendersky0192ba32012-03-30 16:38:33 +03001948 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
1949 /* tp_flags */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001950 0, /* tp_doc */
Eli Bendersky0192ba32012-03-30 16:38:33 +03001951 (traverseproc)element_gc_traverse, /* tp_traverse */
1952 (inquiry)element_gc_clear, /* tp_clear */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001953 0, /* tp_richcompare */
Eli Benderskyebf37a22012-04-03 22:02:37 +03001954 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001955 0, /* tp_iter */
1956 0, /* tp_iternext */
1957 element_methods, /* tp_methods */
1958 0, /* tp_members */
1959 0, /* tp_getset */
1960 0, /* tp_base */
1961 0, /* tp_dict */
1962 0, /* tp_descr_get */
1963 0, /* tp_descr_set */
1964 0, /* tp_dictoffset */
1965 (initproc)element_init, /* tp_init */
1966 PyType_GenericAlloc, /* tp_alloc */
1967 element_new, /* tp_new */
1968 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001969};
1970
Eli Bendersky64d11e62012-06-15 07:42:50 +03001971/******************************* Element iterator ****************************/
1972
1973/* ElementIterObject represents the iteration state over an XML element in
1974 * pre-order traversal. To keep track of which sub-element should be returned
1975 * next, a stack of parents is maintained. This is a standard stack-based
1976 * iterative pre-order traversal of a tree.
1977 * The stack is managed using a single-linked list starting at parent_stack.
1978 * Each stack node contains the saved parent to which we should return after
1979 * the current one is exhausted, and the next child to examine in that parent.
1980 */
1981typedef struct ParentLocator_t {
1982 ElementObject *parent;
1983 Py_ssize_t child_index;
1984 struct ParentLocator_t *next;
1985} ParentLocator;
1986
1987typedef struct {
1988 PyObject_HEAD
1989 ParentLocator *parent_stack;
1990 ElementObject *root_element;
1991 PyObject *sought_tag;
1992 int root_done;
1993 int gettext;
1994} ElementIterObject;
1995
1996
1997static void
1998elementiter_dealloc(ElementIterObject *it)
1999{
2000 ParentLocator *p = it->parent_stack;
2001 while (p) {
2002 ParentLocator *temp = p;
2003 Py_XDECREF(p->parent);
2004 p = p->next;
2005 PyObject_Free(temp);
2006 }
2007
2008 Py_XDECREF(it->sought_tag);
2009 Py_XDECREF(it->root_element);
2010
2011 PyObject_GC_UnTrack(it);
2012 PyObject_GC_Del(it);
2013}
2014
2015static int
2016elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
2017{
2018 ParentLocator *p = it->parent_stack;
2019 while (p) {
2020 Py_VISIT(p->parent);
2021 p = p->next;
2022 }
2023
2024 Py_VISIT(it->root_element);
2025 Py_VISIT(it->sought_tag);
2026 return 0;
2027}
2028
2029/* Helper function for elementiter_next. Add a new parent to the parent stack.
2030 */
2031static ParentLocator *
2032parent_stack_push_new(ParentLocator *stack, ElementObject *parent)
2033{
2034 ParentLocator *new_node = PyObject_Malloc(sizeof(ParentLocator));
2035 if (new_node) {
2036 new_node->parent = parent;
2037 Py_INCREF(parent);
2038 new_node->child_index = 0;
2039 new_node->next = stack;
2040 }
2041 return new_node;
2042}
2043
2044static PyObject *
2045elementiter_next(ElementIterObject *it)
2046{
2047 /* Sub-element iterator.
Eli Bendersky45839902013-01-13 05:14:47 -08002048 *
Eli Bendersky64d11e62012-06-15 07:42:50 +03002049 * A short note on gettext: this function serves both the iter() and
2050 * itertext() methods to avoid code duplication. However, there are a few
2051 * small differences in the way these iterations work. Namely:
2052 * - itertext() only yields text from nodes that have it, and continues
2053 * iterating when a node doesn't have text (so it doesn't return any
2054 * node like iter())
2055 * - itertext() also has to handle tail, after finishing with all the
2056 * children of a node.
2057 */
Eli Bendersky113da642012-06-15 07:52:49 +03002058 ElementObject *cur_parent;
2059 Py_ssize_t child_index;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002060 int rc;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002061
2062 while (1) {
2063 /* Handle the case reached in the beginning and end of iteration, where
2064 * the parent stack is empty. The root_done flag gives us indication
2065 * whether we've just started iterating (so root_done is 0), in which
2066 * case the root is returned. If root_done is 1 and we're here, the
2067 * iterator is exhausted.
2068 */
2069 if (!it->parent_stack->parent) {
2070 if (it->root_done) {
2071 PyErr_SetNone(PyExc_StopIteration);
2072 return NULL;
2073 } else {
2074 it->parent_stack = parent_stack_push_new(it->parent_stack,
2075 it->root_element);
2076 if (!it->parent_stack) {
2077 PyErr_NoMemory();
2078 return NULL;
2079 }
2080
2081 it->root_done = 1;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002082 rc = (it->sought_tag == Py_None);
2083 if (!rc) {
2084 rc = PyObject_RichCompareBool(it->root_element->tag,
2085 it->sought_tag, Py_EQ);
2086 if (rc < 0)
2087 return NULL;
2088 }
2089 if (rc) {
Eli Bendersky64d11e62012-06-15 07:42:50 +03002090 if (it->gettext) {
Eli Benderskye6174ca2013-01-10 06:27:53 -08002091 PyObject *text = element_get_text(it->root_element);
2092 if (!text)
2093 return NULL;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002094 rc = PyObject_IsTrue(text);
2095 if (rc < 0)
2096 return NULL;
2097 if (rc) {
Eli Bendersky64d11e62012-06-15 07:42:50 +03002098 Py_INCREF(text);
2099 return text;
2100 }
2101 } else {
2102 Py_INCREF(it->root_element);
2103 return (PyObject *)it->root_element;
2104 }
2105 }
2106 }
2107 }
2108
2109 /* See if there are children left to traverse in the current parent. If
2110 * yes, visit the next child. If not, pop the stack and try again.
2111 */
Eli Bendersky113da642012-06-15 07:52:49 +03002112 cur_parent = it->parent_stack->parent;
2113 child_index = it->parent_stack->child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002114 if (cur_parent->extra && child_index < cur_parent->extra->length) {
2115 ElementObject *child = (ElementObject *)
2116 cur_parent->extra->children[child_index];
2117 it->parent_stack->child_index++;
2118 it->parent_stack = parent_stack_push_new(it->parent_stack,
2119 child);
2120 if (!it->parent_stack) {
2121 PyErr_NoMemory();
2122 return NULL;
2123 }
2124
2125 if (it->gettext) {
Eli Benderskye6174ca2013-01-10 06:27:53 -08002126 PyObject *text = element_get_text(child);
2127 if (!text)
2128 return NULL;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002129 rc = PyObject_IsTrue(text);
2130 if (rc < 0)
2131 return NULL;
2132 if (rc) {
Eli Bendersky64d11e62012-06-15 07:42:50 +03002133 Py_INCREF(text);
2134 return text;
2135 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002136 } else {
2137 rc = (it->sought_tag == Py_None);
2138 if (!rc) {
2139 rc = PyObject_RichCompareBool(child->tag,
2140 it->sought_tag, Py_EQ);
2141 if (rc < 0)
2142 return NULL;
2143 }
2144 if (rc) {
2145 Py_INCREF(child);
2146 return (PyObject *)child;
2147 }
Eli Bendersky64d11e62012-06-15 07:42:50 +03002148 }
Eli Bendersky64d11e62012-06-15 07:42:50 +03002149 }
2150 else {
Eli Benderskye6174ca2013-01-10 06:27:53 -08002151 PyObject *tail;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002152 ParentLocator *next = it->parent_stack->next;
Eli Benderskye6174ca2013-01-10 06:27:53 -08002153 if (it->gettext) {
2154 tail = element_get_tail(cur_parent);
2155 if (!tail)
2156 return NULL;
2157 }
2158 else
2159 tail = Py_None;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002160 Py_XDECREF(it->parent_stack->parent);
2161 PyObject_Free(it->parent_stack);
2162 it->parent_stack = next;
2163
2164 /* Note that extra condition on it->parent_stack->parent here;
2165 * this is because itertext() is supposed to only return *inner*
2166 * text, not text following the element it began iteration with.
2167 */
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002168 if (it->parent_stack->parent) {
2169 rc = PyObject_IsTrue(tail);
2170 if (rc < 0)
2171 return NULL;
2172 if (rc) {
2173 Py_INCREF(tail);
2174 return tail;
2175 }
Eli Bendersky64d11e62012-06-15 07:42:50 +03002176 }
2177 }
2178 }
2179
2180 return NULL;
2181}
2182
2183
2184static PyTypeObject ElementIter_Type = {
2185 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002186 /* Using the module's name since the pure-Python implementation does not
2187 have such a type. */
Eli Bendersky64d11e62012-06-15 07:42:50 +03002188 "_elementtree._element_iterator", /* tp_name */
2189 sizeof(ElementIterObject), /* tp_basicsize */
2190 0, /* tp_itemsize */
2191 /* methods */
2192 (destructor)elementiter_dealloc, /* tp_dealloc */
2193 0, /* tp_print */
2194 0, /* tp_getattr */
2195 0, /* tp_setattr */
2196 0, /* tp_reserved */
2197 0, /* tp_repr */
2198 0, /* tp_as_number */
2199 0, /* tp_as_sequence */
2200 0, /* tp_as_mapping */
2201 0, /* tp_hash */
2202 0, /* tp_call */
2203 0, /* tp_str */
2204 0, /* tp_getattro */
2205 0, /* tp_setattro */
2206 0, /* tp_as_buffer */
2207 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2208 0, /* tp_doc */
2209 (traverseproc)elementiter_traverse, /* tp_traverse */
2210 0, /* tp_clear */
2211 0, /* tp_richcompare */
2212 0, /* tp_weaklistoffset */
2213 PyObject_SelfIter, /* tp_iter */
2214 (iternextfunc)elementiter_next, /* tp_iternext */
2215 0, /* tp_methods */
2216 0, /* tp_members */
2217 0, /* tp_getset */
2218 0, /* tp_base */
2219 0, /* tp_dict */
2220 0, /* tp_descr_get */
2221 0, /* tp_descr_set */
2222 0, /* tp_dictoffset */
2223 0, /* tp_init */
2224 0, /* tp_alloc */
2225 0, /* tp_new */
2226};
2227
2228
2229static PyObject *
2230create_elementiter(ElementObject *self, PyObject *tag, int gettext)
2231{
2232 ElementIterObject *it;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002233
2234 it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
2235 if (!it)
2236 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002237
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002238 if (PyUnicode_Check(tag)) {
2239 if (PyUnicode_READY(tag) < 0)
2240 return NULL;
2241 if (PyUnicode_GET_LENGTH(tag) == 1 && PyUnicode_READ_CHAR(tag, 0) == '*')
2242 tag = Py_None;
2243 }
2244 else if (PyBytes_Check(tag)) {
2245 if (PyBytes_GET_SIZE(tag) == 1 && *PyBytes_AS_STRING(tag) == '*')
2246 tag = Py_None;
2247 }
Victor Stinner4d463432013-07-11 23:05:03 +02002248
2249 Py_INCREF(tag);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002250 it->sought_tag = tag;
2251 it->root_done = 0;
2252 it->gettext = gettext;
Victor Stinner4d463432013-07-11 23:05:03 +02002253 Py_INCREF(self);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002254 it->root_element = self;
2255
Eli Bendersky64d11e62012-06-15 07:42:50 +03002256 PyObject_GC_Track(it);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002257
2258 it->parent_stack = PyObject_Malloc(sizeof(ParentLocator));
2259 if (it->parent_stack == NULL) {
2260 Py_DECREF(it);
2261 PyErr_NoMemory();
2262 return NULL;
2263 }
2264 it->parent_stack->parent = NULL;
2265 it->parent_stack->child_index = 0;
2266 it->parent_stack->next = NULL;
2267
Eli Bendersky64d11e62012-06-15 07:42:50 +03002268 return (PyObject *)it;
2269}
2270
2271
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002272/* ==================================================================== */
2273/* the tree builder type */
2274
2275typedef struct {
2276 PyObject_HEAD
2277
Eli Bendersky58d548d2012-05-29 15:45:16 +03002278 PyObject *root; /* root node (first created node) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002279
Antoine Pitrouee329312012-10-04 19:53:29 +02002280 PyObject *this; /* current node */
2281 PyObject *last; /* most recently created node */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002282
Eli Bendersky58d548d2012-05-29 15:45:16 +03002283 PyObject *data; /* data collector (string or list), or NULL */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002284
Eli Bendersky58d548d2012-05-29 15:45:16 +03002285 PyObject *stack; /* element stack */
2286 Py_ssize_t index; /* current stack size (0 means empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002287
Eli Bendersky48d358b2012-05-30 17:57:50 +03002288 PyObject *element_factory;
2289
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002290 /* element tracing */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002291 PyObject *events; /* list of events, or NULL if not collecting */
2292 PyObject *start_event_obj; /* event objects (NULL to ignore) */
2293 PyObject *end_event_obj;
2294 PyObject *start_ns_event_obj;
2295 PyObject *end_ns_event_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002296} TreeBuilderObject;
2297
Christian Heimes90aa7642007-12-19 02:45:37 +00002298#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002299
2300/* -------------------------------------------------------------------- */
2301/* constructor and destructor */
2302
Eli Bendersky58d548d2012-05-29 15:45:16 +03002303static PyObject *
2304treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002305{
Eli Bendersky58d548d2012-05-29 15:45:16 +03002306 TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
2307 if (t != NULL) {
2308 t->root = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002309
Eli Bendersky58d548d2012-05-29 15:45:16 +03002310 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002311 t->this = Py_None;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002312 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002313 t->last = Py_None;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002314
Eli Bendersky58d548d2012-05-29 15:45:16 +03002315 t->data = NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002316 t->element_factory = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002317 t->stack = PyList_New(20);
2318 if (!t->stack) {
2319 Py_DECREF(t->this);
2320 Py_DECREF(t->last);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002321 Py_DECREF((PyObject *) t);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002322 return NULL;
2323 }
2324 t->index = 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002325
Eli Bendersky58d548d2012-05-29 15:45:16 +03002326 t->events = NULL;
2327 t->start_event_obj = t->end_event_obj = NULL;
2328 t->start_ns_event_obj = t->end_ns_event_obj = NULL;
2329 }
2330 return (PyObject *)t;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002331}
2332
Eli Bendersky58d548d2012-05-29 15:45:16 +03002333static int
2334treebuilder_init(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002335{
Eli Benderskyc68e1362012-06-03 06:09:42 +03002336 static char *kwlist[] = {"element_factory", 0};
Eli Bendersky48d358b2012-05-30 17:57:50 +03002337 PyObject *element_factory = NULL;
2338 TreeBuilderObject *self_tb = (TreeBuilderObject *)self;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002339 PyObject *tmp;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002340
2341 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:TreeBuilder", kwlist,
2342 &element_factory)) {
2343 return -1;
2344 }
2345
2346 if (element_factory) {
2347 Py_INCREF(element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002348 tmp = self_tb->element_factory;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002349 self_tb->element_factory = element_factory;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002350 Py_XDECREF(tmp);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002351 }
2352
Eli Bendersky58d548d2012-05-29 15:45:16 +03002353 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002354}
2355
Eli Bendersky48d358b2012-05-30 17:57:50 +03002356static int
2357treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
2358{
2359 Py_VISIT(self->root);
2360 Py_VISIT(self->this);
2361 Py_VISIT(self->last);
2362 Py_VISIT(self->data);
2363 Py_VISIT(self->stack);
2364 Py_VISIT(self->element_factory);
2365 return 0;
2366}
2367
2368static int
2369treebuilder_gc_clear(TreeBuilderObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002370{
Antoine Pitrouc1948842012-10-01 23:40:37 +02002371 Py_CLEAR(self->end_ns_event_obj);
2372 Py_CLEAR(self->start_ns_event_obj);
2373 Py_CLEAR(self->end_event_obj);
2374 Py_CLEAR(self->start_event_obj);
2375 Py_CLEAR(self->events);
2376 Py_CLEAR(self->stack);
2377 Py_CLEAR(self->data);
2378 Py_CLEAR(self->last);
2379 Py_CLEAR(self->this);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002380 Py_CLEAR(self->element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002381 Py_CLEAR(self->root);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002382 return 0;
2383}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002384
Eli Bendersky48d358b2012-05-30 17:57:50 +03002385static void
2386treebuilder_dealloc(TreeBuilderObject *self)
2387{
2388 PyObject_GC_UnTrack(self);
2389 treebuilder_gc_clear(self);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002390 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002391}
2392
2393/* -------------------------------------------------------------------- */
Antoine Pitrouee329312012-10-04 19:53:29 +02002394/* helpers for handling of arbitrary element-like objects */
2395
2396static int
2397treebuilder_set_element_text_or_tail(PyObject *element, PyObject *data,
2398 PyObject **dest, _Py_Identifier *name)
2399{
2400 if (Element_CheckExact(element)) {
2401 Py_DECREF(JOIN_OBJ(*dest));
2402 *dest = JOIN_SET(data, PyList_CheckExact(data));
2403 return 0;
2404 }
2405 else {
2406 PyObject *joined = list_join(data);
2407 int r;
2408 if (joined == NULL)
2409 return -1;
2410 r = _PyObject_SetAttrId(element, name, joined);
2411 Py_DECREF(joined);
2412 return r;
2413 }
2414}
2415
2416/* These two functions steal a reference to data */
2417static int
2418treebuilder_set_element_text(PyObject *element, PyObject *data)
2419{
2420 _Py_IDENTIFIER(text);
2421 return treebuilder_set_element_text_or_tail(
2422 element, data, &((ElementObject *) element)->text, &PyId_text);
2423}
2424
2425static int
2426treebuilder_set_element_tail(PyObject *element, PyObject *data)
2427{
2428 _Py_IDENTIFIER(tail);
2429 return treebuilder_set_element_text_or_tail(
2430 element, data, &((ElementObject *) element)->tail, &PyId_tail);
2431}
2432
2433static int
2434treebuilder_add_subelement(PyObject *element, PyObject *child)
2435{
2436 _Py_IDENTIFIER(append);
2437 if (Element_CheckExact(element)) {
2438 ElementObject *elem = (ElementObject *) element;
2439 return element_add_subelement(elem, child);
2440 }
2441 else {
2442 PyObject *res;
2443 res = _PyObject_CallMethodId(element, &PyId_append, "O", child);
2444 if (res == NULL)
2445 return -1;
2446 Py_DECREF(res);
2447 return 0;
2448 }
2449}
2450
2451/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002452/* handlers */
2453
2454LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002455treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
2456 PyObject* attrib)
2457{
2458 PyObject* node;
2459 PyObject* this;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002460 elementtreestate *st = ET_STATE_GLOBAL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002461
2462 if (self->data) {
2463 if (self->this == self->last) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002464 if (treebuilder_set_element_text(self->last, self->data))
2465 return NULL;
2466 }
2467 else {
2468 if (treebuilder_set_element_tail(self->last, self->data))
2469 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002470 }
2471 self->data = NULL;
2472 }
2473
Eli Bendersky08231a92013-05-18 15:47:16 -07002474 if (self->element_factory && self->element_factory != Py_None) {
Eli Bendersky48d358b2012-05-30 17:57:50 +03002475 node = PyObject_CallFunction(self->element_factory, "OO", tag, attrib);
2476 } else {
2477 node = create_new_element(tag, attrib);
2478 }
2479 if (!node) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002480 return NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002481 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002482
Antoine Pitrouee329312012-10-04 19:53:29 +02002483 this = self->this;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002484
2485 if (this != Py_None) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002486 if (treebuilder_add_subelement(this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002487 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002488 } else {
2489 if (self->root) {
2490 PyErr_SetString(
Eli Bendersky532d03e2013-08-10 08:00:39 -07002491 st->parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002492 "multiple elements on top level"
2493 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002494 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002495 }
2496 Py_INCREF(node);
2497 self->root = node;
2498 }
2499
2500 if (self->index < PyList_GET_SIZE(self->stack)) {
2501 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002502 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002503 Py_INCREF(this);
2504 } else {
2505 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002506 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002507 }
2508 self->index++;
2509
2510 Py_DECREF(this);
2511 Py_INCREF(node);
Antoine Pitrouee329312012-10-04 19:53:29 +02002512 self->this = node;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002513
2514 Py_DECREF(self->last);
2515 Py_INCREF(node);
Antoine Pitrouee329312012-10-04 19:53:29 +02002516 self->last = node;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002517
2518 if (self->start_event_obj) {
2519 PyObject* res;
2520 PyObject* action = self->start_event_obj;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002521 res = PyTuple_Pack(2, action, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002522 if (res) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002523 PyList_Append(self->events, res);
2524 Py_DECREF(res);
2525 } else
2526 PyErr_Clear(); /* FIXME: propagate error */
2527 }
2528
2529 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002530
2531 error:
2532 Py_DECREF(node);
2533 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002534}
2535
2536LOCAL(PyObject*)
2537treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
2538{
2539 if (!self->data) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002540 if (self->last == Py_None) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00002541 /* ignore calls to data before the first call to start */
2542 Py_RETURN_NONE;
2543 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002544 /* store the first item as is */
2545 Py_INCREF(data); self->data = data;
2546 } else {
2547 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00002548 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
2549 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002550 /* XXX this code path unused in Python 3? */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002551 /* expat often generates single character data sections; handle
2552 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00002553 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
2554 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002555 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00002556 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002557 } else if (PyList_CheckExact(self->data)) {
2558 if (PyList_Append(self->data, data) < 0)
2559 return NULL;
2560 } else {
2561 PyObject* list = PyList_New(2);
2562 if (!list)
2563 return NULL;
2564 PyList_SET_ITEM(list, 0, self->data);
2565 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2566 self->data = list;
2567 }
2568 }
2569
2570 Py_RETURN_NONE;
2571}
2572
2573LOCAL(PyObject*)
2574treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2575{
2576 PyObject* item;
2577
2578 if (self->data) {
2579 if (self->this == self->last) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002580 if (treebuilder_set_element_text(self->last, self->data))
2581 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002582 } else {
Antoine Pitrouee329312012-10-04 19:53:29 +02002583 if (treebuilder_set_element_tail(self->last, self->data))
2584 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002585 }
2586 self->data = NULL;
2587 }
2588
2589 if (self->index == 0) {
2590 PyErr_SetString(
2591 PyExc_IndexError,
2592 "pop from empty stack"
2593 );
2594 return NULL;
2595 }
2596
2597 self->index--;
2598
2599 item = PyList_GET_ITEM(self->stack, self->index);
2600 Py_INCREF(item);
2601
2602 Py_DECREF(self->last);
2603
Antoine Pitrouee329312012-10-04 19:53:29 +02002604 self->last = self->this;
2605 self->this = item;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002606
2607 if (self->end_event_obj) {
2608 PyObject* res;
2609 PyObject* action = self->end_event_obj;
2610 PyObject* node = (PyObject*) self->last;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002611 res = PyTuple_Pack(2, action, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002612 if (res) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002613 PyList_Append(self->events, res);
2614 Py_DECREF(res);
2615 } else
2616 PyErr_Clear(); /* FIXME: propagate error */
2617 }
2618
2619 Py_INCREF(self->last);
2620 return (PyObject*) self->last;
2621}
2622
2623LOCAL(void)
2624treebuilder_handle_namespace(TreeBuilderObject* self, int start,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002625 PyObject *prefix, PyObject *uri)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002626{
2627 PyObject* res;
2628 PyObject* action;
2629 PyObject* parcel;
2630
2631 if (!self->events)
2632 return;
2633
2634 if (start) {
2635 if (!self->start_ns_event_obj)
2636 return;
2637 action = self->start_ns_event_obj;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002638 parcel = Py_BuildValue("OO", prefix, uri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002639 if (!parcel)
2640 return;
2641 Py_INCREF(action);
2642 } else {
2643 if (!self->end_ns_event_obj)
2644 return;
2645 action = self->end_ns_event_obj;
2646 Py_INCREF(action);
2647 parcel = Py_None;
2648 Py_INCREF(parcel);
2649 }
2650
2651 res = PyTuple_New(2);
2652
2653 if (res) {
2654 PyTuple_SET_ITEM(res, 0, action);
2655 PyTuple_SET_ITEM(res, 1, parcel);
2656 PyList_Append(self->events, res);
2657 Py_DECREF(res);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002658 }
2659 else {
2660 Py_DECREF(action);
2661 Py_DECREF(parcel);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002662 PyErr_Clear(); /* FIXME: propagate error */
Antoine Pitrouc1948842012-10-01 23:40:37 +02002663 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002664}
2665
2666/* -------------------------------------------------------------------- */
2667/* methods (in alphabetical order) */
2668
2669static PyObject*
2670treebuilder_data(TreeBuilderObject* self, PyObject* args)
2671{
2672 PyObject* data;
2673 if (!PyArg_ParseTuple(args, "O:data", &data))
2674 return NULL;
2675
2676 return treebuilder_handle_data(self, data);
2677}
2678
2679static PyObject*
2680treebuilder_end(TreeBuilderObject* self, PyObject* args)
2681{
2682 PyObject* tag;
2683 if (!PyArg_ParseTuple(args, "O:end", &tag))
2684 return NULL;
2685
2686 return treebuilder_handle_end(self, tag);
2687}
2688
2689LOCAL(PyObject*)
2690treebuilder_done(TreeBuilderObject* self)
2691{
2692 PyObject* res;
2693
2694 /* FIXME: check stack size? */
2695
2696 if (self->root)
2697 res = self->root;
2698 else
2699 res = Py_None;
2700
2701 Py_INCREF(res);
2702 return res;
2703}
2704
2705static PyObject*
2706treebuilder_close(TreeBuilderObject* self, PyObject* args)
2707{
2708 if (!PyArg_ParseTuple(args, ":close"))
2709 return NULL;
2710
2711 return treebuilder_done(self);
2712}
2713
2714static PyObject*
2715treebuilder_start(TreeBuilderObject* self, PyObject* args)
2716{
2717 PyObject* tag;
2718 PyObject* attrib = Py_None;
2719 if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib))
2720 return NULL;
2721
2722 return treebuilder_handle_start(self, tag, attrib);
2723}
2724
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002725static PyMethodDef treebuilder_methods[] = {
2726 {"data", (PyCFunction) treebuilder_data, METH_VARARGS},
2727 {"start", (PyCFunction) treebuilder_start, METH_VARARGS},
2728 {"end", (PyCFunction) treebuilder_end, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002729 {"close", (PyCFunction) treebuilder_close, METH_VARARGS},
2730 {NULL, NULL}
2731};
2732
Neal Norwitz227b5332006-03-22 09:28:35 +00002733static PyTypeObject TreeBuilder_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002734 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002735 "xml.etree.ElementTree.TreeBuilder", sizeof(TreeBuilderObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002736 /* methods */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002737 (destructor)treebuilder_dealloc, /* tp_dealloc */
2738 0, /* tp_print */
2739 0, /* tp_getattr */
2740 0, /* tp_setattr */
2741 0, /* tp_reserved */
2742 0, /* tp_repr */
2743 0, /* tp_as_number */
2744 0, /* tp_as_sequence */
2745 0, /* tp_as_mapping */
2746 0, /* tp_hash */
2747 0, /* tp_call */
2748 0, /* tp_str */
2749 0, /* tp_getattro */
2750 0, /* tp_setattro */
2751 0, /* tp_as_buffer */
Eli Bendersky48d358b2012-05-30 17:57:50 +03002752 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
2753 /* tp_flags */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002754 0, /* tp_doc */
Eli Bendersky48d358b2012-05-30 17:57:50 +03002755 (traverseproc)treebuilder_gc_traverse, /* tp_traverse */
2756 (inquiry)treebuilder_gc_clear, /* tp_clear */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002757 0, /* tp_richcompare */
2758 0, /* tp_weaklistoffset */
2759 0, /* tp_iter */
2760 0, /* tp_iternext */
2761 treebuilder_methods, /* tp_methods */
2762 0, /* tp_members */
2763 0, /* tp_getset */
2764 0, /* tp_base */
2765 0, /* tp_dict */
2766 0, /* tp_descr_get */
2767 0, /* tp_descr_set */
2768 0, /* tp_dictoffset */
2769 (initproc)treebuilder_init, /* tp_init */
2770 PyType_GenericAlloc, /* tp_alloc */
2771 treebuilder_new, /* tp_new */
2772 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002773};
2774
2775/* ==================================================================== */
2776/* the expat interface */
2777
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002778#include "expat.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002779#include "pyexpat.h"
Eli Bendersky532d03e2013-08-10 08:00:39 -07002780
2781/* The PyExpat_CAPI structure is an immutable dispatch table, so it can be
2782 * cached globally without being in per-module state.
2783 */
Eli Bendersky20d41742012-06-01 09:48:37 +03002784static struct PyExpat_CAPI *expat_capi;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002785#define EXPAT(func) (expat_capi->func)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002786
Eli Bendersky52467b12012-06-01 07:13:08 +03002787static XML_Memory_Handling_Suite ExpatMemoryHandler = {
2788 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
2789
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002790typedef struct {
2791 PyObject_HEAD
2792
2793 XML_Parser parser;
2794
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002795 PyObject *target;
2796 PyObject *entity;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002797
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002798 PyObject *names;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002799
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002800 PyObject *handle_start;
2801 PyObject *handle_data;
2802 PyObject *handle_end;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002803
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002804 PyObject *handle_comment;
2805 PyObject *handle_pi;
2806 PyObject *handle_doctype;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002807
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002808 PyObject *handle_close;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002809
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002810} XMLParserObject;
2811
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03002812static PyObject* xmlparser_doctype(XMLParserObject* self, PyObject* args);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002813
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002814/* helpers */
2815
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002816LOCAL(PyObject*)
2817makeuniversal(XMLParserObject* self, const char* string)
2818{
2819 /* convert a UTF-8 tag/attribute name from the expat parser
2820 to a universal name string */
2821
Antoine Pitrouc1948842012-10-01 23:40:37 +02002822 Py_ssize_t size = (Py_ssize_t) strlen(string);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002823 PyObject* key;
2824 PyObject* value;
2825
2826 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00002827 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002828 if (!key)
2829 return NULL;
2830
2831 value = PyDict_GetItem(self->names, key);
2832
2833 if (value) {
2834 Py_INCREF(value);
2835 } else {
2836 /* new name. convert to universal name, and decode as
2837 necessary */
2838
2839 PyObject* tag;
2840 char* p;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002841 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002842
2843 /* look for namespace separator */
2844 for (i = 0; i < size; i++)
2845 if (string[i] == '}')
2846 break;
2847 if (i != size) {
2848 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002849 tag = PyBytes_FromStringAndSize(NULL, size+1);
Victor Stinner71c8b7e2013-07-11 23:08:39 +02002850 if (tag == NULL) {
2851 Py_DECREF(key);
2852 return NULL;
2853 }
Christian Heimes72b710a2008-05-26 13:28:38 +00002854 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002855 p[0] = '{';
2856 memcpy(p+1, string, size);
2857 size++;
2858 } else {
2859 /* plain name; use key as tag */
2860 Py_INCREF(key);
2861 tag = key;
2862 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002863
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002864 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002865 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00002866 value = PyUnicode_DecodeUTF8(p, size, "strict");
2867 Py_DECREF(tag);
2868 if (!value) {
2869 Py_DECREF(key);
2870 return NULL;
2871 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002872
2873 /* add to names dictionary */
2874 if (PyDict_SetItem(self->names, key, value) < 0) {
2875 Py_DECREF(key);
2876 Py_DECREF(value);
2877 return NULL;
2878 }
2879 }
2880
2881 Py_DECREF(key);
2882 return value;
2883}
2884
Eli Bendersky5b77d812012-03-16 08:20:05 +02002885/* Set the ParseError exception with the given parameters.
2886 * If message is not NULL, it's used as the error string. Otherwise, the
2887 * message string is the default for the given error_code.
2888*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002889static void
Eli Bendersky5b77d812012-03-16 08:20:05 +02002890expat_set_error(enum XML_Error error_code, int line, int column, char *message)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002891{
Eli Bendersky5b77d812012-03-16 08:20:05 +02002892 PyObject *errmsg, *error, *position, *code;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002893 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002894
Victor Stinner499dfcf2011-03-21 13:26:24 +01002895 errmsg = PyUnicode_FromFormat("%s: line %d, column %d",
Eli Bendersky5b77d812012-03-16 08:20:05 +02002896 message ? message : EXPAT(ErrorString)(error_code),
2897 line, column);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002898 if (errmsg == NULL)
2899 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002900
Eli Bendersky532d03e2013-08-10 08:00:39 -07002901 error = PyObject_CallFunction(st->parseerror_obj, "O", errmsg);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002902 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002903 if (!error)
2904 return;
2905
Eli Bendersky5b77d812012-03-16 08:20:05 +02002906 /* Add code and position attributes */
2907 code = PyLong_FromLong((long)error_code);
2908 if (!code) {
2909 Py_DECREF(error);
2910 return;
2911 }
2912 if (PyObject_SetAttrString(error, "code", code) == -1) {
2913 Py_DECREF(error);
2914 Py_DECREF(code);
2915 return;
2916 }
2917 Py_DECREF(code);
2918
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002919 position = Py_BuildValue("(ii)", line, column);
2920 if (!position) {
2921 Py_DECREF(error);
2922 return;
2923 }
2924 if (PyObject_SetAttrString(error, "position", position) == -1) {
2925 Py_DECREF(error);
2926 Py_DECREF(position);
2927 return;
2928 }
2929 Py_DECREF(position);
2930
Eli Bendersky532d03e2013-08-10 08:00:39 -07002931 PyErr_SetObject(st->parseerror_obj, error);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002932 Py_DECREF(error);
2933}
2934
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002935/* -------------------------------------------------------------------- */
2936/* handlers */
2937
2938static void
2939expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2940 int data_len)
2941{
2942 PyObject* key;
2943 PyObject* value;
2944 PyObject* res;
2945
2946 if (data_len < 2 || data_in[0] != '&')
2947 return;
2948
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002949 if (PyErr_Occurred())
2950 return;
2951
Neal Norwitz0269b912007-08-08 06:56:02 +00002952 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002953 if (!key)
2954 return;
2955
2956 value = PyDict_GetItem(self->entity, key);
2957
2958 if (value) {
2959 if (TreeBuilder_CheckExact(self->target))
2960 res = treebuilder_handle_data(
2961 (TreeBuilderObject*) self->target, value
2962 );
2963 else if (self->handle_data)
2964 res = PyObject_CallFunction(self->handle_data, "O", value);
2965 else
2966 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002967 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002968 } else if (!PyErr_Occurred()) {
2969 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00002970 char message[128] = "undefined entity ";
2971 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002972 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02002973 XML_ERROR_UNDEFINED_ENTITY,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002974 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02002975 EXPAT(GetErrorColumnNumber)(self->parser),
2976 message
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002977 );
2978 }
2979
2980 Py_DECREF(key);
2981}
2982
2983static void
2984expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2985 const XML_Char **attrib_in)
2986{
2987 PyObject* res;
2988 PyObject* tag;
2989 PyObject* attrib;
2990 int ok;
2991
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002992 if (PyErr_Occurred())
2993 return;
2994
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002995 /* tag name */
2996 tag = makeuniversal(self, tag_in);
2997 if (!tag)
2998 return; /* parser will look for errors */
2999
3000 /* attributes */
3001 if (attrib_in[0]) {
3002 attrib = PyDict_New();
3003 if (!attrib)
3004 return;
3005 while (attrib_in[0] && attrib_in[1]) {
3006 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00003007 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003008 if (!key || !value) {
3009 Py_XDECREF(value);
3010 Py_XDECREF(key);
3011 Py_DECREF(attrib);
3012 return;
3013 }
3014 ok = PyDict_SetItem(attrib, key, value);
3015 Py_DECREF(value);
3016 Py_DECREF(key);
3017 if (ok < 0) {
3018 Py_DECREF(attrib);
3019 return;
3020 }
3021 attrib_in += 2;
3022 }
3023 } else {
Antoine Pitrouc1948842012-10-01 23:40:37 +02003024 /* Pass an empty dictionary on */
Eli Bendersky48d358b2012-05-30 17:57:50 +03003025 attrib = PyDict_New();
3026 if (!attrib)
3027 return;
3028 }
3029
3030 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003031 /* shortcut */
3032 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
3033 tag, attrib);
Eli Bendersky48d358b2012-05-30 17:57:50 +03003034 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003035 else if (self->handle_start) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003036 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003037 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003038 res = NULL;
3039
3040 Py_DECREF(tag);
3041 Py_DECREF(attrib);
3042
3043 Py_XDECREF(res);
3044}
3045
3046static void
3047expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
3048 int data_len)
3049{
3050 PyObject* data;
3051 PyObject* res;
3052
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003053 if (PyErr_Occurred())
3054 return;
3055
Neal Norwitz0269b912007-08-08 06:56:02 +00003056 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003057 if (!data)
3058 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003059
3060 if (TreeBuilder_CheckExact(self->target))
3061 /* shortcut */
3062 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
3063 else if (self->handle_data)
3064 res = PyObject_CallFunction(self->handle_data, "O", data);
3065 else
3066 res = NULL;
3067
3068 Py_DECREF(data);
3069
3070 Py_XDECREF(res);
3071}
3072
3073static void
3074expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
3075{
3076 PyObject* tag;
3077 PyObject* res = NULL;
3078
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003079 if (PyErr_Occurred())
3080 return;
3081
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003082 if (TreeBuilder_CheckExact(self->target))
3083 /* shortcut */
3084 /* the standard tree builder doesn't look at the end tag */
3085 res = treebuilder_handle_end(
3086 (TreeBuilderObject*) self->target, Py_None
3087 );
3088 else if (self->handle_end) {
3089 tag = makeuniversal(self, tag_in);
3090 if (tag) {
3091 res = PyObject_CallFunction(self->handle_end, "O", tag);
3092 Py_DECREF(tag);
3093 }
3094 }
3095
3096 Py_XDECREF(res);
3097}
3098
3099static void
3100expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
3101 const XML_Char *uri)
3102{
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003103 PyObject* sprefix = NULL;
3104 PyObject* suri = NULL;
3105
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003106 if (PyErr_Occurred())
3107 return;
3108
Eli Bendersky5dd40e52013-11-28 06:31:58 -08003109 if (uri)
Eli Bendersky4b795182013-11-28 06:33:21 -08003110 suri = PyUnicode_DecodeUTF8(uri, strlen(uri), "strict");
Eli Bendersky5dd40e52013-11-28 06:31:58 -08003111 else
Eli Bendersky4b795182013-11-28 06:33:21 -08003112 suri = PyUnicode_FromString("");
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003113 if (!suri)
3114 return;
3115
3116 if (prefix)
3117 sprefix = PyUnicode_DecodeUTF8(prefix, strlen(prefix), "strict");
3118 else
3119 sprefix = PyUnicode_FromString("");
3120 if (!sprefix) {
3121 Py_DECREF(suri);
3122 return;
3123 }
3124
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003125 treebuilder_handle_namespace(
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003126 (TreeBuilderObject*) self->target, 1, sprefix, suri
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003127 );
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003128
3129 Py_DECREF(sprefix);
3130 Py_DECREF(suri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003131}
3132
3133static void
3134expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
3135{
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003136 if (PyErr_Occurred())
3137 return;
3138
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003139 treebuilder_handle_namespace(
3140 (TreeBuilderObject*) self->target, 0, NULL, NULL
3141 );
3142}
3143
3144static void
3145expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
3146{
3147 PyObject* comment;
3148 PyObject* res;
3149
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003150 if (PyErr_Occurred())
3151 return;
3152
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003153 if (self->handle_comment) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003154 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003155 if (comment) {
3156 res = PyObject_CallFunction(self->handle_comment, "O", comment);
3157 Py_XDECREF(res);
3158 Py_DECREF(comment);
3159 }
3160 }
3161}
3162
Eli Bendersky45839902013-01-13 05:14:47 -08003163static void
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003164expat_start_doctype_handler(XMLParserObject *self,
3165 const XML_Char *doctype_name,
3166 const XML_Char *sysid,
3167 const XML_Char *pubid,
3168 int has_internal_subset)
3169{
3170 PyObject *self_pyobj = (PyObject *)self;
3171 PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
3172 PyObject *parser_doctype = NULL;
3173 PyObject *res = NULL;
3174
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003175 if (PyErr_Occurred())
3176 return;
3177
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003178 doctype_name_obj = makeuniversal(self, doctype_name);
3179 if (!doctype_name_obj)
3180 return;
3181
3182 if (sysid) {
3183 sysid_obj = makeuniversal(self, sysid);
3184 if (!sysid_obj) {
3185 Py_DECREF(doctype_name_obj);
3186 return;
3187 }
3188 } else {
3189 Py_INCREF(Py_None);
3190 sysid_obj = Py_None;
3191 }
3192
3193 if (pubid) {
3194 pubid_obj = makeuniversal(self, pubid);
3195 if (!pubid_obj) {
3196 Py_DECREF(doctype_name_obj);
3197 Py_DECREF(sysid_obj);
3198 return;
3199 }
3200 } else {
3201 Py_INCREF(Py_None);
3202 pubid_obj = Py_None;
3203 }
3204
3205 /* If the target has a handler for doctype, call it. */
3206 if (self->handle_doctype) {
3207 res = PyObject_CallFunction(self->handle_doctype, "OOO",
3208 doctype_name_obj, pubid_obj, sysid_obj);
3209 Py_CLEAR(res);
3210 }
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003211 else {
3212 /* Now see if the parser itself has a doctype method. If yes and it's
3213 * a custom method, call it but warn about deprecation. If it's only
3214 * the vanilla XMLParser method, do nothing.
3215 */
3216 parser_doctype = PyObject_GetAttrString(self_pyobj, "doctype");
3217 if (parser_doctype &&
3218 !(PyCFunction_Check(parser_doctype) &&
3219 PyCFunction_GET_SELF(parser_doctype) == self_pyobj &&
3220 PyCFunction_GET_FUNCTION(parser_doctype) ==
3221 (PyCFunction) xmlparser_doctype)) {
3222 res = xmlparser_doctype(self, NULL);
3223 if (!res)
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003224 goto clear;
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003225 Py_DECREF(res);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003226 res = PyObject_CallFunction(parser_doctype, "OOO",
3227 doctype_name_obj, pubid_obj, sysid_obj);
3228 Py_CLEAR(res);
3229 }
3230 }
3231
3232clear:
3233 Py_XDECREF(parser_doctype);
3234 Py_DECREF(doctype_name_obj);
3235 Py_DECREF(pubid_obj);
3236 Py_DECREF(sysid_obj);
3237}
3238
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003239static void
3240expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
3241 const XML_Char* data_in)
3242{
3243 PyObject* target;
3244 PyObject* data;
3245 PyObject* res;
3246
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003247 if (PyErr_Occurred())
3248 return;
3249
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003250 if (self->handle_pi) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003251 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3252 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003253 if (target && data) {
3254 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
3255 Py_XDECREF(res);
3256 Py_DECREF(data);
3257 Py_DECREF(target);
3258 } else {
3259 Py_XDECREF(data);
3260 Py_XDECREF(target);
3261 }
3262 }
3263}
3264
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003265/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003266
Eli Bendersky52467b12012-06-01 07:13:08 +03003267static PyObject *
3268xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003269{
Eli Bendersky52467b12012-06-01 07:13:08 +03003270 XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
3271 if (self) {
3272 self->parser = NULL;
3273 self->target = self->entity = self->names = NULL;
3274 self->handle_start = self->handle_data = self->handle_end = NULL;
3275 self->handle_comment = self->handle_pi = self->handle_close = NULL;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003276 self->handle_doctype = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003277 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003278 return (PyObject *)self;
3279}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003280
Eli Bendersky52467b12012-06-01 07:13:08 +03003281static int
3282xmlparser_init(PyObject *self, PyObject *args, PyObject *kwds)
3283{
3284 XMLParserObject *self_xp = (XMLParserObject *)self;
3285 PyObject *target = NULL, *html = NULL;
3286 char *encoding = NULL;
Eli Benderskyc68e1362012-06-03 06:09:42 +03003287 static char *kwlist[] = {"html", "target", "encoding", 0};
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003288
Eli Bendersky52467b12012-06-01 07:13:08 +03003289 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|OOz:XMLParser", kwlist,
3290 &html, &target, &encoding)) {
3291 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003292 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003293
Eli Bendersky52467b12012-06-01 07:13:08 +03003294 self_xp->entity = PyDict_New();
3295 if (!self_xp->entity)
3296 return -1;
3297
3298 self_xp->names = PyDict_New();
3299 if (!self_xp->names) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02003300 Py_CLEAR(self_xp->entity);
Eli Bendersky52467b12012-06-01 07:13:08 +03003301 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003302 }
3303
Eli Bendersky52467b12012-06-01 07:13:08 +03003304 self_xp->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
3305 if (!self_xp->parser) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02003306 Py_CLEAR(self_xp->entity);
3307 Py_CLEAR(self_xp->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003308 PyErr_NoMemory();
Eli Bendersky52467b12012-06-01 07:13:08 +03003309 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003310 }
3311
Eli Bendersky52467b12012-06-01 07:13:08 +03003312 if (target) {
3313 Py_INCREF(target);
3314 } else {
Eli Bendersky58d548d2012-05-29 15:45:16 +03003315 target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003316 if (!target) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02003317 Py_CLEAR(self_xp->entity);
3318 Py_CLEAR(self_xp->names);
Eli Bendersky52467b12012-06-01 07:13:08 +03003319 EXPAT(ParserFree)(self_xp->parser);
3320 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003321 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003322 }
3323 self_xp->target = target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003324
Eli Bendersky52467b12012-06-01 07:13:08 +03003325 self_xp->handle_start = PyObject_GetAttrString(target, "start");
3326 self_xp->handle_data = PyObject_GetAttrString(target, "data");
3327 self_xp->handle_end = PyObject_GetAttrString(target, "end");
3328 self_xp->handle_comment = PyObject_GetAttrString(target, "comment");
3329 self_xp->handle_pi = PyObject_GetAttrString(target, "pi");
3330 self_xp->handle_close = PyObject_GetAttrString(target, "close");
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003331 self_xp->handle_doctype = PyObject_GetAttrString(target, "doctype");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003332
3333 PyErr_Clear();
Eli Bendersky45839902013-01-13 05:14:47 -08003334
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003335 /* configure parser */
Eli Bendersky52467b12012-06-01 07:13:08 +03003336 EXPAT(SetUserData)(self_xp->parser, self_xp);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003337 EXPAT(SetElementHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003338 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003339 (XML_StartElementHandler) expat_start_handler,
3340 (XML_EndElementHandler) expat_end_handler
3341 );
3342 EXPAT(SetDefaultHandlerExpand)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003343 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003344 (XML_DefaultHandler) expat_default_handler
3345 );
3346 EXPAT(SetCharacterDataHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003347 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003348 (XML_CharacterDataHandler) expat_data_handler
3349 );
Eli Bendersky52467b12012-06-01 07:13:08 +03003350 if (self_xp->handle_comment)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003351 EXPAT(SetCommentHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003352 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003353 (XML_CommentHandler) expat_comment_handler
3354 );
Eli Bendersky52467b12012-06-01 07:13:08 +03003355 if (self_xp->handle_pi)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003356 EXPAT(SetProcessingInstructionHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003357 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003358 (XML_ProcessingInstructionHandler) expat_pi_handler
3359 );
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003360 EXPAT(SetStartDoctypeDeclHandler)(
3361 self_xp->parser,
3362 (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
3363 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003364 EXPAT(SetUnknownEncodingHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003365 self_xp->parser,
Eli Bendersky6dc32b32013-05-25 05:25:48 -07003366 EXPAT(DefaultUnknownEncodingHandler), NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003367 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003368
Eli Bendersky52467b12012-06-01 07:13:08 +03003369 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003370}
3371
Eli Bendersky52467b12012-06-01 07:13:08 +03003372static int
3373xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
3374{
3375 Py_VISIT(self->handle_close);
3376 Py_VISIT(self->handle_pi);
3377 Py_VISIT(self->handle_comment);
3378 Py_VISIT(self->handle_end);
3379 Py_VISIT(self->handle_data);
3380 Py_VISIT(self->handle_start);
3381
3382 Py_VISIT(self->target);
3383 Py_VISIT(self->entity);
3384 Py_VISIT(self->names);
3385
3386 return 0;
3387}
3388
3389static int
3390xmlparser_gc_clear(XMLParserObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003391{
3392 EXPAT(ParserFree)(self->parser);
3393
Antoine Pitrouc1948842012-10-01 23:40:37 +02003394 Py_CLEAR(self->handle_close);
3395 Py_CLEAR(self->handle_pi);
3396 Py_CLEAR(self->handle_comment);
3397 Py_CLEAR(self->handle_end);
3398 Py_CLEAR(self->handle_data);
3399 Py_CLEAR(self->handle_start);
3400 Py_CLEAR(self->handle_doctype);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003401
Antoine Pitrouc1948842012-10-01 23:40:37 +02003402 Py_CLEAR(self->target);
3403 Py_CLEAR(self->entity);
3404 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003405
Eli Bendersky52467b12012-06-01 07:13:08 +03003406 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003407}
3408
Eli Bendersky52467b12012-06-01 07:13:08 +03003409static void
3410xmlparser_dealloc(XMLParserObject* self)
3411{
3412 PyObject_GC_UnTrack(self);
3413 xmlparser_gc_clear(self);
3414 Py_TYPE(self)->tp_free((PyObject *)self);
3415}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003416
3417LOCAL(PyObject*)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003418expat_parse(XMLParserObject* self, const char* data, int data_len, int final)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003419{
3420 int ok;
3421
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003422 assert(!PyErr_Occurred());
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003423 ok = EXPAT(Parse)(self->parser, data, data_len, final);
3424
3425 if (PyErr_Occurred())
3426 return NULL;
3427
3428 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003429 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02003430 EXPAT(GetErrorCode)(self->parser),
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003431 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02003432 EXPAT(GetErrorColumnNumber)(self->parser),
3433 NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003434 );
3435 return NULL;
3436 }
3437
3438 Py_RETURN_NONE;
3439}
3440
3441static PyObject*
3442xmlparser_close(XMLParserObject* self, PyObject* args)
3443{
3444 /* end feeding data to parser */
3445
3446 PyObject* res;
3447 if (!PyArg_ParseTuple(args, ":close"))
3448 return NULL;
3449
3450 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003451 if (!res)
3452 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003453
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003454 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003455 Py_DECREF(res);
3456 return treebuilder_done((TreeBuilderObject*) self->target);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003457 }
3458 else if (self->handle_close) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003459 Py_DECREF(res);
3460 return PyObject_CallFunction(self->handle_close, "");
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003461 }
3462 else {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003463 return res;
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003464 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003465}
3466
3467static PyObject*
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003468xmlparser_feed(XMLParserObject* self, PyObject* arg)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003469{
3470 /* feed data to parser */
3471
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003472 if (PyUnicode_Check(arg)) {
3473 Py_ssize_t data_len;
3474 const char *data = PyUnicode_AsUTF8AndSize(arg, &data_len);
3475 if (data == NULL)
3476 return NULL;
3477 if (data_len > INT_MAX) {
3478 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3479 return NULL;
3480 }
3481 /* Explicitly set UTF-8 encoding. Return code ignored. */
3482 (void)EXPAT(SetEncoding)(self->parser, "utf-8");
3483 return expat_parse(self, data, (int)data_len, 0);
3484 }
3485 else {
3486 Py_buffer view;
3487 PyObject *res;
3488 if (PyObject_GetBuffer(arg, &view, PyBUF_SIMPLE) < 0)
3489 return NULL;
3490 if (view.len > INT_MAX) {
3491 PyBuffer_Release(&view);
3492 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3493 return NULL;
3494 }
3495 res = expat_parse(self, view.buf, (int)view.len, 0);
3496 PyBuffer_Release(&view);
3497 return res;
3498 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003499}
3500
3501static PyObject*
Eli Benderskya3699232013-05-19 18:47:23 -07003502xmlparser_parse_whole(XMLParserObject* self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003503{
Eli Benderskya3699232013-05-19 18:47:23 -07003504 /* (internal) parse the whole input, until end of stream */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003505 PyObject* reader;
3506 PyObject* buffer;
Eli Benderskyf996e772012-03-16 05:53:30 +02003507 PyObject* temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003508 PyObject* res;
3509
3510 PyObject* fileobj;
3511 if (!PyArg_ParseTuple(args, "O:_parse", &fileobj))
3512 return NULL;
3513
3514 reader = PyObject_GetAttrString(fileobj, "read");
3515 if (!reader)
3516 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003517
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003518 /* read from open file object */
3519 for (;;) {
3520
3521 buffer = PyObject_CallFunction(reader, "i", 64*1024);
3522
3523 if (!buffer) {
3524 /* read failed (e.g. due to KeyboardInterrupt) */
3525 Py_DECREF(reader);
3526 return NULL;
3527 }
3528
Eli Benderskyf996e772012-03-16 05:53:30 +02003529 if (PyUnicode_CheckExact(buffer)) {
3530 /* A unicode object is encoded into bytes using UTF-8 */
Victor Stinner59799a82013-11-13 14:17:30 +01003531 if (PyUnicode_GET_LENGTH(buffer) == 0) {
Eli Benderskyf996e772012-03-16 05:53:30 +02003532 Py_DECREF(buffer);
3533 break;
3534 }
3535 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
Antoine Pitrouc1948842012-10-01 23:40:37 +02003536 Py_DECREF(buffer);
Eli Benderskyf996e772012-03-16 05:53:30 +02003537 if (!temp) {
3538 /* Propagate exception from PyUnicode_AsEncodedString */
Eli Benderskyf996e772012-03-16 05:53:30 +02003539 Py_DECREF(reader);
3540 return NULL;
3541 }
Eli Benderskyf996e772012-03-16 05:53:30 +02003542 buffer = temp;
3543 }
3544 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003545 Py_DECREF(buffer);
3546 break;
3547 }
3548
Serhiy Storchaka097a6642015-11-25 20:12:37 +02003549 if (PyBytes_GET_SIZE(buffer) > INT_MAX) {
3550 Py_DECREF(buffer);
3551 Py_DECREF(reader);
3552 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3553 return NULL;
3554 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003555 res = expat_parse(
Serhiy Storchaka097a6642015-11-25 20:12:37 +02003556 self, PyBytes_AS_STRING(buffer), (int)PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003557 );
3558
3559 Py_DECREF(buffer);
3560
3561 if (!res) {
3562 Py_DECREF(reader);
3563 return NULL;
3564 }
3565 Py_DECREF(res);
3566
3567 }
3568
3569 Py_DECREF(reader);
3570
3571 res = expat_parse(self, "", 0, 1);
3572
3573 if (res && TreeBuilder_CheckExact(self->target)) {
3574 Py_DECREF(res);
3575 return treebuilder_done((TreeBuilderObject*) self->target);
3576 }
3577
3578 return res;
3579}
3580
3581static PyObject*
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003582xmlparser_doctype(XMLParserObject *self, PyObject *args)
3583{
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003584 if (PyErr_WarnEx(PyExc_DeprecationWarning,
3585 "This method of XMLParser is deprecated. Define"
3586 " doctype() method on the TreeBuilder target.",
3587 1) < 0) {
3588 return NULL;
3589 }
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003590 Py_RETURN_NONE;
3591}
3592
3593static PyObject*
3594xmlparser_setevents(XMLParserObject *self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003595{
3596 /* activate element event reporting */
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003597 Py_ssize_t i, seqlen;
3598 TreeBuilderObject *target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003599
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003600 PyObject *events_queue;
3601 PyObject *events_to_report = Py_None;
3602 PyObject *events_seq;
3603 if (!PyArg_ParseTuple(args, "O!|O:_setevents", &PyList_Type, &events_queue,
3604 &events_to_report))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003605 return NULL;
3606
3607 if (!TreeBuilder_CheckExact(self->target)) {
3608 PyErr_SetString(
3609 PyExc_TypeError,
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003610 "event handling only supported for ElementTree.TreeBuilder "
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003611 "targets"
3612 );
3613 return NULL;
3614 }
3615
3616 target = (TreeBuilderObject*) self->target;
3617
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003618 Py_INCREF(events_queue);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003619 Py_XDECREF(target->events);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003620 target->events = events_queue;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003621
3622 /* clear out existing events */
Antoine Pitrouc1948842012-10-01 23:40:37 +02003623 Py_CLEAR(target->start_event_obj);
3624 Py_CLEAR(target->end_event_obj);
3625 Py_CLEAR(target->start_ns_event_obj);
3626 Py_CLEAR(target->end_ns_event_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003627
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003628 if (events_to_report == Py_None) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003629 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003630 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003631 Py_RETURN_NONE;
3632 }
3633
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003634 if (!(events_seq = PySequence_Fast(events_to_report,
3635 "events must be a sequence"))) {
3636 return NULL;
3637 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003638
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003639 seqlen = PySequence_Size(events_seq);
3640 for (i = 0; i < seqlen; ++i) {
3641 PyObject *event_name_obj = PySequence_Fast_GET_ITEM(events_seq, i);
3642 char *event_name = NULL;
3643 if (PyUnicode_Check(event_name_obj)) {
3644 event_name = _PyUnicode_AsString(event_name_obj);
3645 } else if (PyBytes_Check(event_name_obj)) {
3646 event_name = PyBytes_AS_STRING(event_name_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003647 }
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003648
3649 if (event_name == NULL) {
3650 Py_DECREF(events_seq);
3651 PyErr_Format(PyExc_ValueError, "invalid events sequence");
3652 return NULL;
3653 } else if (strcmp(event_name, "start") == 0) {
3654 Py_INCREF(event_name_obj);
3655 target->start_event_obj = event_name_obj;
3656 } else if (strcmp(event_name, "end") == 0) {
3657 Py_INCREF(event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003658 Py_XDECREF(target->end_event_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003659 target->end_event_obj = event_name_obj;
3660 } else if (strcmp(event_name, "start-ns") == 0) {
3661 Py_INCREF(event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003662 Py_XDECREF(target->start_ns_event_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003663 target->start_ns_event_obj = event_name_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003664 EXPAT(SetNamespaceDeclHandler)(
3665 self->parser,
3666 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3667 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3668 );
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003669 } else if (strcmp(event_name, "end-ns") == 0) {
3670 Py_INCREF(event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003671 Py_XDECREF(target->end_ns_event_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003672 target->end_ns_event_obj = event_name_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003673 EXPAT(SetNamespaceDeclHandler)(
3674 self->parser,
3675 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3676 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3677 );
3678 } else {
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003679 Py_DECREF(events_seq);
3680 PyErr_Format(PyExc_ValueError, "unknown event '%s'", event_name);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003681 return NULL;
3682 }
3683 }
3684
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003685 Py_DECREF(events_seq);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003686 Py_RETURN_NONE;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003687}
3688
3689static PyMethodDef xmlparser_methods[] = {
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003690 {"feed", (PyCFunction) xmlparser_feed, METH_O},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003691 {"close", (PyCFunction) xmlparser_close, METH_VARARGS},
Eli Benderskya3699232013-05-19 18:47:23 -07003692 {"_parse_whole", (PyCFunction) xmlparser_parse_whole, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003693 {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003694 {"doctype", (PyCFunction) xmlparser_doctype, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003695 {NULL, NULL}
3696};
3697
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003698static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003699xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003700{
Alexander Belopolskye239d232010-12-08 23:31:48 +00003701 if (PyUnicode_Check(nameobj)) {
3702 PyObject* res;
3703 if (PyUnicode_CompareWithASCIIString(nameobj, "entity") == 0)
3704 res = self->entity;
3705 else if (PyUnicode_CompareWithASCIIString(nameobj, "target") == 0)
3706 res = self->target;
3707 else if (PyUnicode_CompareWithASCIIString(nameobj, "version") == 0) {
3708 return PyUnicode_FromFormat(
3709 "Expat %d.%d.%d", XML_MAJOR_VERSION,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003710 XML_MINOR_VERSION, XML_MICRO_VERSION);
Alexander Belopolskye239d232010-12-08 23:31:48 +00003711 }
3712 else
3713 goto generic;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003714
Alexander Belopolskye239d232010-12-08 23:31:48 +00003715 Py_INCREF(res);
3716 return res;
3717 }
3718 generic:
3719 return PyObject_GenericGetAttr((PyObject*) self, nameobj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003720}
3721
Neal Norwitz227b5332006-03-22 09:28:35 +00003722static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003723 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08003724 "xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003725 /* methods */
Eli Bendersky52467b12012-06-01 07:13:08 +03003726 (destructor)xmlparser_dealloc, /* tp_dealloc */
3727 0, /* tp_print */
3728 0, /* tp_getattr */
3729 0, /* tp_setattr */
3730 0, /* tp_reserved */
3731 0, /* tp_repr */
3732 0, /* tp_as_number */
3733 0, /* tp_as_sequence */
3734 0, /* tp_as_mapping */
3735 0, /* tp_hash */
3736 0, /* tp_call */
3737 0, /* tp_str */
3738 (getattrofunc)xmlparser_getattro, /* tp_getattro */
3739 0, /* tp_setattro */
3740 0, /* tp_as_buffer */
3741 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3742 /* tp_flags */
3743 0, /* tp_doc */
3744 (traverseproc)xmlparser_gc_traverse, /* tp_traverse */
3745 (inquiry)xmlparser_gc_clear, /* tp_clear */
3746 0, /* tp_richcompare */
3747 0, /* tp_weaklistoffset */
3748 0, /* tp_iter */
3749 0, /* tp_iternext */
3750 xmlparser_methods, /* tp_methods */
3751 0, /* tp_members */
3752 0, /* tp_getset */
3753 0, /* tp_base */
3754 0, /* tp_dict */
3755 0, /* tp_descr_get */
3756 0, /* tp_descr_set */
3757 0, /* tp_dictoffset */
3758 (initproc)xmlparser_init, /* tp_init */
3759 PyType_GenericAlloc, /* tp_alloc */
3760 xmlparser_new, /* tp_new */
3761 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003762};
3763
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003764/* ==================================================================== */
3765/* python module interface */
3766
3767static PyMethodDef _functions[] = {
Eli Benderskya8736902013-01-05 06:26:39 -08003768 {"SubElement", (PyCFunction) subelement, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003769 {NULL, NULL}
3770};
3771
Martin v. Löwis1a214512008-06-11 05:26:20 +00003772
Eli Bendersky532d03e2013-08-10 08:00:39 -07003773static struct PyModuleDef elementtreemodule = {
3774 PyModuleDef_HEAD_INIT,
3775 "_elementtree",
3776 NULL,
3777 sizeof(elementtreestate),
3778 _functions,
3779 NULL,
3780 elementtree_traverse,
3781 elementtree_clear,
3782 elementtree_free
Martin v. Löwis1a214512008-06-11 05:26:20 +00003783};
3784
Neal Norwitzf6657e62006-12-28 04:47:50 +00003785PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00003786PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003787{
Eli Bendersky64d11e62012-06-15 07:42:50 +03003788 PyObject *m, *temp;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003789 elementtreestate *st;
3790
3791 m = PyState_FindModule(&elementtreemodule);
3792 if (m) {
3793 Py_INCREF(m);
3794 return m;
3795 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003796
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003797 /* Initialize object types */
Ronald Oussoren138d0802013-07-19 11:11:25 +02003798 if (PyType_Ready(&ElementIter_Type) < 0)
3799 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003800 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003801 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003802 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003803 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003804 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003805 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003806
Eli Bendersky532d03e2013-08-10 08:00:39 -07003807 m = PyModule_Create(&elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003808 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00003809 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003810 st = ET_STATE(m);
Martin v. Löwis1a214512008-06-11 05:26:20 +00003811
Eli Bendersky828efde2012-04-05 05:40:58 +03003812 if (!(temp = PyImport_ImportModule("copy")))
3813 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003814 st->deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
Eli Bendersky828efde2012-04-05 05:40:58 +03003815 Py_XDECREF(temp);
3816
Eli Bendersky532d03e2013-08-10 08:00:39 -07003817 if (!(st->elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
Eli Bendersky828efde2012-04-05 05:40:58 +03003818 return NULL;
3819
Eli Bendersky20d41742012-06-01 09:48:37 +03003820 /* link against pyexpat */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003821 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
3822 if (expat_capi) {
3823 /* check that it's usable */
3824 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
3825 expat_capi->size < sizeof(struct PyExpat_CAPI) ||
3826 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
3827 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
Eli Bendersky52467b12012-06-01 07:13:08 +03003828 expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
Eli Benderskyef391ac2012-07-21 20:28:46 +03003829 PyErr_SetString(PyExc_ImportError,
3830 "pyexpat version is incompatible");
3831 return NULL;
Eli Bendersky52467b12012-06-01 07:13:08 +03003832 }
Eli Benderskyef391ac2012-07-21 20:28:46 +03003833 } else {
Eli Bendersky52467b12012-06-01 07:13:08 +03003834 return NULL;
Eli Benderskyef391ac2012-07-21 20:28:46 +03003835 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003836
Eli Bendersky532d03e2013-08-10 08:00:39 -07003837 st->parseerror_obj = PyErr_NewException(
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003838 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003839 );
Eli Bendersky532d03e2013-08-10 08:00:39 -07003840 Py_INCREF(st->parseerror_obj);
3841 PyModule_AddObject(m, "ParseError", st->parseerror_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003842
Eli Bendersky092af1f2012-03-04 07:14:03 +02003843 Py_INCREF((PyObject *)&Element_Type);
3844 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
3845
Eli Bendersky58d548d2012-05-29 15:45:16 +03003846 Py_INCREF((PyObject *)&TreeBuilder_Type);
3847 PyModule_AddObject(m, "TreeBuilder", (PyObject *)&TreeBuilder_Type);
3848
Eli Bendersky52467b12012-06-01 07:13:08 +03003849 Py_INCREF((PyObject *)&XMLParser_Type);
3850 PyModule_AddObject(m, "XMLParser", (PyObject *)&XMLParser_Type);
Eli Bendersky52467b12012-06-01 07:13:08 +03003851
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003852 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003853}