blob: 826342aa91b874bb9833a09ba97f86d726c3ce6e [file] [log] [blame]
Eli Benderskybf05df22013-04-20 05:44:01 -07001/*--------------------------------------------------------------------
2 * Licensed to PSF under a Contributor Agreement.
3 * See http://www.python.org/psf/license for licensing details.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
Eli Benderskybf05df22013-04-20 05:44:01 -07005 * _elementtree - C accelerator for xml.etree.ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00006 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
7 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00008 *
9 * info@pythonware.com
10 * http://www.pythonware.com
Eli Benderskybf05df22013-04-20 05:44:01 -070011 *--------------------------------------------------------------------
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000012 */
13
14#include "Python.h"
Eli Benderskyebf37a22012-04-03 22:02:37 +030015#include "structmember.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000016
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000017/* -------------------------------------------------------------------- */
18/* configuration */
19
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000020/* An element can hold this many children without extra memory
21 allocations. */
22#define STATIC_CHILDREN 4
23
24/* For best performance, chose a value so that 80-90% of all nodes
25 have no more than the given number of children. Set this to zero
26 to minimize the size of the element structure itself (this only
27 helps if you have lots of leaf nodes with attributes). */
28
29/* Also note that pymalloc always allocates blocks in multiples of
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010030 eight bytes. For the current C version of ElementTree, this means
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000031 that the number of children should be an even number, at least on
32 32-bit platforms. */
33
34/* -------------------------------------------------------------------- */
35
36#if 0
37static int memory = 0;
38#define ALLOC(size, comment)\
39do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
40#define RELEASE(size, comment)\
41do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
42#else
43#define ALLOC(size, comment)
44#define RELEASE(size, comment)
45#endif
46
47/* compiler tweaks */
48#if defined(_MSC_VER)
49#define LOCAL(type) static __inline type __fastcall
50#else
51#define LOCAL(type) static type
52#endif
53
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000054/* macros used to store 'join' flags in string object pointers. note
55 that all use of text and tail as object pointers must be wrapped in
56 JOIN_OBJ. see comments in the ElementObject definition for more
57 info. */
58#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
59#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
Antoine Pitrouca8aa4a2012-09-20 20:56:47 +020060#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~(Py_uintptr_t)1))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000061
Eli Benderskydd3661e2013-09-13 06:24:25 -070062/* Py_CLEAR for a PyObject* that uses a join flag. Pass the pointer by
63 * reference since this function sets it to NULL.
64*/
doko@ubuntu.com0648bf72013-09-18 12:12:28 +020065static void _clear_joined_ptr(PyObject **p)
Eli Benderskydd3661e2013-09-13 06:24:25 -070066{
67 if (*p) {
68 PyObject *tmp = JOIN_OBJ(*p);
69 *p = NULL;
70 Py_DECREF(tmp);
71 }
72}
73
Ronald Oussoren138d0802013-07-19 11:11:25 +020074/* Types defined by this extension */
75static PyTypeObject Element_Type;
76static PyTypeObject ElementIter_Type;
77static PyTypeObject TreeBuilder_Type;
78static PyTypeObject XMLParser_Type;
79
80
Eli Bendersky532d03e2013-08-10 08:00:39 -070081/* Per-module state; PEP 3121 */
82typedef struct {
83 PyObject *parseerror_obj;
84 PyObject *deepcopy_obj;
85 PyObject *elementpath_obj;
86} elementtreestate;
87
88static struct PyModuleDef elementtreemodule;
89
90/* Given a module object (assumed to be _elementtree), get its per-module
91 * state.
92 */
93#define ET_STATE(mod) ((elementtreestate *) PyModule_GetState(mod))
94
95/* Find the module instance imported in the currently running sub-interpreter
96 * and get its state.
97 */
98#define ET_STATE_GLOBAL \
99 ((elementtreestate *) PyModule_GetState(PyState_FindModule(&elementtreemodule)))
100
101static int
102elementtree_clear(PyObject *m)
103{
104 elementtreestate *st = ET_STATE(m);
105 Py_CLEAR(st->parseerror_obj);
106 Py_CLEAR(st->deepcopy_obj);
107 Py_CLEAR(st->elementpath_obj);
108 return 0;
109}
110
111static int
112elementtree_traverse(PyObject *m, visitproc visit, void *arg)
113{
114 elementtreestate *st = ET_STATE(m);
115 Py_VISIT(st->parseerror_obj);
116 Py_VISIT(st->deepcopy_obj);
117 Py_VISIT(st->elementpath_obj);
118 return 0;
119}
120
121static void
122elementtree_free(void *m)
123{
124 elementtree_clear((PyObject *)m);
125}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000126
127/* helpers */
128
129LOCAL(PyObject*)
130deepcopy(PyObject* object, PyObject* memo)
131{
132 /* do a deep copy of the given object */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000133 PyObject* args;
134 PyObject* result;
Eli Bendersky532d03e2013-08-10 08:00:39 -0700135 elementtreestate *st = ET_STATE_GLOBAL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000136
Eli Bendersky532d03e2013-08-10 08:00:39 -0700137 if (!st->deepcopy_obj) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000138 PyErr_SetString(
139 PyExc_RuntimeError,
140 "deepcopy helper not found"
141 );
142 return NULL;
143 }
144
Antoine Pitrouc1948842012-10-01 23:40:37 +0200145 args = PyTuple_Pack(2, object, memo);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000146 if (!args)
147 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -0700148 result = PyObject_CallObject(st->deepcopy_obj, args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000149 Py_DECREF(args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000150 return result;
151}
152
153LOCAL(PyObject*)
154list_join(PyObject* list)
155{
156 /* join list elements (destroying the list in the process) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000157 PyObject* joiner;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000158 PyObject* result;
159
Antoine Pitrouc1948842012-10-01 23:40:37 +0200160 joiner = PyUnicode_FromStringAndSize("", 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000161 if (!joiner)
162 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200163 result = PyUnicode_Join(joiner, list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000164 Py_DECREF(joiner);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200165 if (result)
166 Py_DECREF(list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000167 return result;
168}
169
Eli Bendersky48d358b2012-05-30 17:57:50 +0300170/* Is the given object an empty dictionary?
171*/
172static int
173is_empty_dict(PyObject *obj)
174{
175 return PyDict_CheckExact(obj) && PyDict_Size(obj) == 0;
176}
177
178
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000179/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200180/* the Element type */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000181
182typedef struct {
183
184 /* attributes (a dictionary object), or None if no attributes */
185 PyObject* attrib;
186
187 /* child elements */
188 int length; /* actual number of items */
189 int allocated; /* allocated items */
190
191 /* this either points to _children or to a malloced buffer */
192 PyObject* *children;
193
194 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100195
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000196} ElementObjectExtra;
197
198typedef struct {
199 PyObject_HEAD
200
201 /* element tag (a string). */
202 PyObject* tag;
203
204 /* text before first child. note that this is a tagged pointer;
205 use JOIN_OBJ to get the object pointer. the join flag is used
206 to distinguish lists created by the tree builder from lists
207 assigned to the attribute by application code; the former
208 should be joined before being returned to the user, the latter
209 should be left intact. */
210 PyObject* text;
211
212 /* text after this element, in parent. note that this is a tagged
213 pointer; use JOIN_OBJ to get the object pointer. */
214 PyObject* tail;
215
216 ElementObjectExtra* extra;
217
Eli Benderskyebf37a22012-04-03 22:02:37 +0300218 PyObject *weakreflist; /* For tp_weaklistoffset */
219
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000220} ElementObject;
221
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000222
Christian Heimes90aa7642007-12-19 02:45:37 +0000223#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000224
225/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200226/* Element constructors and destructor */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000227
228LOCAL(int)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200229create_extra(ElementObject* self, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000230{
231 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
Victor Stinner81aac732013-07-12 02:03:34 +0200232 if (!self->extra) {
233 PyErr_NoMemory();
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000234 return -1;
Victor Stinner81aac732013-07-12 02:03:34 +0200235 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000236
237 if (!attrib)
238 attrib = Py_None;
239
240 Py_INCREF(attrib);
241 self->extra->attrib = attrib;
242
243 self->extra->length = 0;
244 self->extra->allocated = STATIC_CHILDREN;
245 self->extra->children = self->extra->_children;
246
247 return 0;
248}
249
250LOCAL(void)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200251dealloc_extra(ElementObject* self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000252{
Eli Bendersky08b85292012-04-04 15:55:07 +0300253 ElementObjectExtra *myextra;
254 int i;
255
Eli Benderskyebf37a22012-04-03 22:02:37 +0300256 if (!self->extra)
257 return;
258
259 /* Avoid DECREFs calling into this code again (cycles, etc.)
260 */
Eli Bendersky08b85292012-04-04 15:55:07 +0300261 myextra = self->extra;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300262 self->extra = NULL;
263
264 Py_DECREF(myextra->attrib);
265
Eli Benderskyebf37a22012-04-03 22:02:37 +0300266 for (i = 0; i < myextra->length; i++)
267 Py_DECREF(myextra->children[i]);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000268
Eli Benderskyebf37a22012-04-03 22:02:37 +0300269 if (myextra->children != myextra->_children)
270 PyObject_Free(myextra->children);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000271
Eli Benderskyebf37a22012-04-03 22:02:37 +0300272 PyObject_Free(myextra);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000273}
274
Eli Bendersky092af1f2012-03-04 07:14:03 +0200275/* Convenience internal function to create new Element objects with the given
276 * tag and attributes.
277*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000278LOCAL(PyObject*)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200279create_new_element(PyObject* tag, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000280{
281 ElementObject* self;
282
Eli Bendersky0192ba32012-03-30 16:38:33 +0300283 self = PyObject_GC_New(ElementObject, &Element_Type);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000284 if (self == NULL)
285 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000286 self->extra = NULL;
287
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000288 Py_INCREF(tag);
289 self->tag = tag;
290
291 Py_INCREF(Py_None);
292 self->text = Py_None;
293
294 Py_INCREF(Py_None);
295 self->tail = Py_None;
296
Eli Benderskyebf37a22012-04-03 22:02:37 +0300297 self->weakreflist = NULL;
298
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200299 ALLOC(sizeof(ElementObject), "create element");
300 PyObject_GC_Track(self);
301
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200302 if (attrib != Py_None && !is_empty_dict(attrib)) {
303 if (create_extra(self, attrib) < 0) {
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200304 Py_DECREF(self);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200305 return NULL;
306 }
307 }
308
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000309 return (PyObject*) self;
310}
311
Eli Bendersky092af1f2012-03-04 07:14:03 +0200312static PyObject *
313element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
314{
315 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
316 if (e != NULL) {
317 Py_INCREF(Py_None);
318 e->tag = Py_None;
319
320 Py_INCREF(Py_None);
321 e->text = Py_None;
322
323 Py_INCREF(Py_None);
324 e->tail = Py_None;
325
326 e->extra = NULL;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300327 e->weakreflist = NULL;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200328 }
329 return (PyObject *)e;
330}
331
Eli Bendersky737b1732012-05-29 06:02:56 +0300332/* Helper function for extracting the attrib dictionary from a keywords dict.
333 * This is required by some constructors/functions in this module that can
Eli Bendersky45839902013-01-13 05:14:47 -0800334 * either accept attrib as a keyword argument or all attributes splashed
Eli Bendersky737b1732012-05-29 06:02:56 +0300335 * directly into *kwds.
Eli Benderskyd4cb4b72013-04-22 05:25:25 -0700336 *
337 * Return a dictionary with the content of kwds merged into the content of
338 * attrib. If there is no attrib keyword, return a copy of kwds.
Eli Bendersky737b1732012-05-29 06:02:56 +0300339 */
340static PyObject*
341get_attrib_from_keywords(PyObject *kwds)
342{
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700343 PyObject *attrib_str = PyUnicode_FromString("attrib");
344 PyObject *attrib = PyDict_GetItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300345
346 if (attrib) {
347 /* If attrib was found in kwds, copy its value and remove it from
348 * kwds
349 */
350 if (!PyDict_Check(attrib)) {
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700351 Py_DECREF(attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300352 PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
353 Py_TYPE(attrib)->tp_name);
354 return NULL;
355 }
356 attrib = PyDict_Copy(attrib);
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700357 PyDict_DelItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300358 } else {
359 attrib = PyDict_New();
360 }
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700361
362 Py_DECREF(attrib_str);
363
364 /* attrib can be NULL if PyDict_New failed */
365 if (attrib)
Christian Heimes7ed42942013-07-20 15:12:09 +0200366 if (PyDict_Update(attrib, kwds) < 0)
367 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300368 return attrib;
369}
370
Eli Bendersky092af1f2012-03-04 07:14:03 +0200371static int
372element_init(PyObject *self, PyObject *args, PyObject *kwds)
373{
374 PyObject *tag;
375 PyObject *tmp;
376 PyObject *attrib = NULL;
377 ElementObject *self_elem;
378
379 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
380 return -1;
381
Eli Bendersky737b1732012-05-29 06:02:56 +0300382 if (attrib) {
383 /* attrib passed as positional arg */
384 attrib = PyDict_Copy(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200385 if (!attrib)
386 return -1;
Eli Bendersky737b1732012-05-29 06:02:56 +0300387 if (kwds) {
388 if (PyDict_Update(attrib, kwds) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200389 Py_DECREF(attrib);
Eli Bendersky737b1732012-05-29 06:02:56 +0300390 return -1;
391 }
392 }
393 } else if (kwds) {
394 /* have keywords args */
395 attrib = get_attrib_from_keywords(kwds);
396 if (!attrib)
397 return -1;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200398 }
399
400 self_elem = (ElementObject *)self;
401
Antoine Pitrouc1948842012-10-01 23:40:37 +0200402 if (attrib != NULL && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200403 if (create_extra(self_elem, attrib) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200404 Py_DECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200405 return -1;
406 }
407 }
408
Eli Bendersky48d358b2012-05-30 17:57:50 +0300409 /* We own a reference to attrib here and it's no longer needed. */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200410 Py_XDECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200411
412 /* Replace the objects already pointed to by tag, text and tail. */
413 tmp = self_elem->tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200414 Py_INCREF(tag);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200415 self_elem->tag = tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200416 Py_DECREF(tmp);
417
418 tmp = self_elem->text;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200419 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200420 self_elem->text = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200421 Py_DECREF(JOIN_OBJ(tmp));
422
423 tmp = self_elem->tail;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200424 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200425 self_elem->tail = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200426 Py_DECREF(JOIN_OBJ(tmp));
427
428 return 0;
429}
430
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000431LOCAL(int)
432element_resize(ElementObject* self, int extra)
433{
434 int size;
435 PyObject* *children;
436
437 /* make sure self->children can hold the given number of extra
438 elements. set an exception and return -1 if allocation failed */
439
Victor Stinner5f0af232013-07-11 23:01:36 +0200440 if (!self->extra) {
441 if (create_extra(self, NULL) < 0)
442 return -1;
443 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000444
445 size = self->extra->length + extra;
446
447 if (size > self->extra->allocated) {
448 /* use Python 2.4's list growth strategy */
449 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000450 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100451 * which needs at least 4 bytes.
452 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000453 * be safe.
454 */
455 size = size ? size : 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000456 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000457 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100458 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000459 * false alarm always assume at least one child to be safe.
460 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000461 children = PyObject_Realloc(self->extra->children,
462 size * sizeof(PyObject*));
463 if (!children)
464 goto nomemory;
465 } else {
466 children = PyObject_Malloc(size * sizeof(PyObject*));
467 if (!children)
468 goto nomemory;
469 /* copy existing children from static area to malloc buffer */
470 memcpy(children, self->extra->children,
471 self->extra->length * sizeof(PyObject*));
472 }
473 self->extra->children = children;
474 self->extra->allocated = size;
475 }
476
477 return 0;
478
479 nomemory:
480 PyErr_NoMemory();
481 return -1;
482}
483
484LOCAL(int)
485element_add_subelement(ElementObject* self, PyObject* element)
486{
487 /* add a child element to a parent */
488
489 if (element_resize(self, 1) < 0)
490 return -1;
491
492 Py_INCREF(element);
493 self->extra->children[self->extra->length] = element;
494
495 self->extra->length++;
496
497 return 0;
498}
499
500LOCAL(PyObject*)
501element_get_attrib(ElementObject* self)
502{
503 /* return borrowed reference to attrib dictionary */
504 /* note: this function assumes that the extra section exists */
505
506 PyObject* res = self->extra->attrib;
507
508 if (res == Py_None) {
509 /* create missing dictionary */
510 res = PyDict_New();
511 if (!res)
512 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200513 Py_DECREF(Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000514 self->extra->attrib = res;
515 }
516
517 return res;
518}
519
520LOCAL(PyObject*)
521element_get_text(ElementObject* self)
522{
523 /* return borrowed reference to text attribute */
524
525 PyObject* res = self->text;
526
527 if (JOIN_GET(res)) {
528 res = JOIN_OBJ(res);
529 if (PyList_CheckExact(res)) {
530 res = list_join(res);
531 if (!res)
532 return NULL;
533 self->text = res;
534 }
535 }
536
537 return res;
538}
539
540LOCAL(PyObject*)
541element_get_tail(ElementObject* self)
542{
543 /* return borrowed reference to text attribute */
544
545 PyObject* res = self->tail;
546
547 if (JOIN_GET(res)) {
548 res = JOIN_OBJ(res);
549 if (PyList_CheckExact(res)) {
550 res = list_join(res);
551 if (!res)
552 return NULL;
553 self->tail = res;
554 }
555 }
556
557 return res;
558}
559
560static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300561subelement(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000562{
563 PyObject* elem;
564
565 ElementObject* parent;
566 PyObject* tag;
567 PyObject* attrib = NULL;
568 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
569 &Element_Type, &parent, &tag,
Eli Bendersky163d7f02013-11-24 06:55:04 -0800570 &PyDict_Type, &attrib)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000571 return NULL;
Eli Bendersky163d7f02013-11-24 06:55:04 -0800572 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000573
Eli Bendersky737b1732012-05-29 06:02:56 +0300574 if (attrib) {
575 /* attrib passed as positional arg */
576 attrib = PyDict_Copy(attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000577 if (!attrib)
578 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300579 if (kwds) {
580 if (PyDict_Update(attrib, kwds) < 0) {
581 return NULL;
582 }
583 }
584 } else if (kwds) {
585 /* have keyword args */
586 attrib = get_attrib_from_keywords(kwds);
587 if (!attrib)
588 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000589 } else {
Eli Bendersky737b1732012-05-29 06:02:56 +0300590 /* no attrib arg, no kwds, so no attribute */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000591 Py_INCREF(Py_None);
592 attrib = Py_None;
593 }
594
Eli Bendersky092af1f2012-03-04 07:14:03 +0200595 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000596 Py_DECREF(attrib);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200597 if (elem == NULL)
598 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000599
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000600 if (element_add_subelement(parent, elem) < 0) {
601 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000602 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000603 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000604
605 return elem;
606}
607
Eli Bendersky0192ba32012-03-30 16:38:33 +0300608static int
609element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
610{
611 Py_VISIT(self->tag);
612 Py_VISIT(JOIN_OBJ(self->text));
613 Py_VISIT(JOIN_OBJ(self->tail));
614
615 if (self->extra) {
616 int i;
617 Py_VISIT(self->extra->attrib);
618
619 for (i = 0; i < self->extra->length; ++i)
620 Py_VISIT(self->extra->children[i]);
621 }
622 return 0;
623}
624
625static int
626element_gc_clear(ElementObject *self)
627{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300628 Py_CLEAR(self->tag);
Eli Benderskydd3661e2013-09-13 06:24:25 -0700629 _clear_joined_ptr(&self->text);
630 _clear_joined_ptr(&self->tail);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300631
632 /* After dropping all references from extra, it's no longer valid anyway,
Eli Benderskyebf37a22012-04-03 22:02:37 +0300633 * so fully deallocate it.
Eli Bendersky0192ba32012-03-30 16:38:33 +0300634 */
Eli Benderskyebf37a22012-04-03 22:02:37 +0300635 dealloc_extra(self);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300636 return 0;
637}
638
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000639static void
640element_dealloc(ElementObject* self)
641{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300642 PyObject_GC_UnTrack(self);
Eli Benderskyebf37a22012-04-03 22:02:37 +0300643
644 if (self->weakreflist != NULL)
645 PyObject_ClearWeakRefs((PyObject *) self);
646
Eli Bendersky0192ba32012-03-30 16:38:33 +0300647 /* element_gc_clear clears all references and deallocates extra
648 */
649 element_gc_clear(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000650
651 RELEASE(sizeof(ElementObject), "destroy element");
Eli Bendersky092af1f2012-03-04 07:14:03 +0200652 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000653}
654
655/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000656
657static PyObject*
658element_append(ElementObject* self, PyObject* args)
659{
660 PyObject* element;
661 if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element))
662 return NULL;
663
664 if (element_add_subelement(self, element) < 0)
665 return NULL;
666
667 Py_RETURN_NONE;
668}
669
670static PyObject*
Eli Bendersky0192ba32012-03-30 16:38:33 +0300671element_clearmethod(ElementObject* self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000672{
673 if (!PyArg_ParseTuple(args, ":clear"))
674 return NULL;
675
Eli Benderskyebf37a22012-04-03 22:02:37 +0300676 dealloc_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000677
678 Py_INCREF(Py_None);
679 Py_DECREF(JOIN_OBJ(self->text));
680 self->text = Py_None;
681
682 Py_INCREF(Py_None);
683 Py_DECREF(JOIN_OBJ(self->tail));
684 self->tail = Py_None;
685
686 Py_RETURN_NONE;
687}
688
689static PyObject*
690element_copy(ElementObject* self, PyObject* args)
691{
692 int i;
693 ElementObject* element;
694
695 if (!PyArg_ParseTuple(args, ":__copy__"))
696 return NULL;
697
Eli Bendersky092af1f2012-03-04 07:14:03 +0200698 element = (ElementObject*) create_new_element(
Eli Bendersky163d7f02013-11-24 06:55:04 -0800699 self->tag, (self->extra) ? self->extra->attrib : Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000700 if (!element)
701 return NULL;
702
703 Py_DECREF(JOIN_OBJ(element->text));
704 element->text = self->text;
705 Py_INCREF(JOIN_OBJ(element->text));
706
707 Py_DECREF(JOIN_OBJ(element->tail));
708 element->tail = self->tail;
709 Py_INCREF(JOIN_OBJ(element->tail));
710
711 if (self->extra) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000712 if (element_resize(element, self->extra->length) < 0) {
713 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000714 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000715 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000716
717 for (i = 0; i < self->extra->length; i++) {
718 Py_INCREF(self->extra->children[i]);
719 element->extra->children[i] = self->extra->children[i];
720 }
721
722 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000723 }
724
725 return (PyObject*) element;
726}
727
728static PyObject*
729element_deepcopy(ElementObject* self, PyObject* args)
730{
731 int i;
732 ElementObject* element;
733 PyObject* tag;
734 PyObject* attrib;
735 PyObject* text;
736 PyObject* tail;
737 PyObject* id;
738
739 PyObject* memo;
740 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
741 return NULL;
742
743 tag = deepcopy(self->tag, memo);
744 if (!tag)
745 return NULL;
746
747 if (self->extra) {
748 attrib = deepcopy(self->extra->attrib, memo);
749 if (!attrib) {
750 Py_DECREF(tag);
751 return NULL;
752 }
753 } else {
754 Py_INCREF(Py_None);
755 attrib = Py_None;
756 }
757
Eli Bendersky092af1f2012-03-04 07:14:03 +0200758 element = (ElementObject*) create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000759
760 Py_DECREF(tag);
761 Py_DECREF(attrib);
762
763 if (!element)
764 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100765
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000766 text = deepcopy(JOIN_OBJ(self->text), memo);
767 if (!text)
768 goto error;
769 Py_DECREF(element->text);
770 element->text = JOIN_SET(text, JOIN_GET(self->text));
771
772 tail = deepcopy(JOIN_OBJ(self->tail), memo);
773 if (!tail)
774 goto error;
775 Py_DECREF(element->tail);
776 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
777
778 if (self->extra) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000779 if (element_resize(element, self->extra->length) < 0)
780 goto error;
781
782 for (i = 0; i < self->extra->length; i++) {
783 PyObject* child = deepcopy(self->extra->children[i], memo);
784 if (!child) {
785 element->extra->length = i;
786 goto error;
787 }
788 element->extra->children[i] = child;
789 }
790
791 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000792 }
793
794 /* add object to memo dictionary (so deepcopy won't visit it again) */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200795 id = PyLong_FromSsize_t((Py_uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000796 if (!id)
797 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000798
799 i = PyDict_SetItem(memo, id, (PyObject*) element);
800
801 Py_DECREF(id);
802
803 if (i < 0)
804 goto error;
805
806 return (PyObject*) element;
807
808 error:
809 Py_DECREF(element);
810 return NULL;
811}
812
Martin v. Löwisbce16662012-06-17 10:41:22 +0200813static PyObject*
Antoine Pitrou09fcb722013-10-23 19:20:21 +0200814element_sizeof(PyObject* myself, PyObject* args)
Martin v. Löwisbce16662012-06-17 10:41:22 +0200815{
Antoine Pitrou09fcb722013-10-23 19:20:21 +0200816 ElementObject *self = (ElementObject*)myself;
Martin v. Löwisbce16662012-06-17 10:41:22 +0200817 Py_ssize_t result = sizeof(ElementObject);
818 if (self->extra) {
819 result += sizeof(ElementObjectExtra);
820 if (self->extra->children != self->extra->_children)
821 result += sizeof(PyObject*) * self->extra->allocated;
822 }
823 return PyLong_FromSsize_t(result);
824}
825
Eli Bendersky698bdb22013-01-10 06:01:06 -0800826/* dict keys for getstate/setstate. */
827#define PICKLED_TAG "tag"
828#define PICKLED_CHILDREN "_children"
829#define PICKLED_ATTRIB "attrib"
830#define PICKLED_TAIL "tail"
831#define PICKLED_TEXT "text"
832
833/* __getstate__ returns a fabricated instance dict as in the pure-Python
834 * Element implementation, for interoperability/interchangeability. This
835 * makes the pure-Python implementation details an API, but (a) there aren't
836 * any unnecessary structures there; and (b) it buys compatibility with 3.2
837 * pickles. See issue #16076.
838 */
839static PyObject *
840element_getstate(ElementObject *self)
841{
842 int i, noattrib;
843 PyObject *instancedict = NULL, *children;
844
845 /* Build a list of children. */
846 children = PyList_New(self->extra ? self->extra->length : 0);
847 if (!children)
848 return NULL;
849 for (i = 0; i < PyList_GET_SIZE(children); i++) {
850 PyObject *child = self->extra->children[i];
851 Py_INCREF(child);
852 PyList_SET_ITEM(children, i, child);
853 }
854
855 /* Construct the state object. */
856 noattrib = (self->extra == NULL || self->extra->attrib == Py_None);
857 if (noattrib)
858 instancedict = Py_BuildValue("{sOsOs{}sOsO}",
859 PICKLED_TAG, self->tag,
860 PICKLED_CHILDREN, children,
861 PICKLED_ATTRIB,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700862 PICKLED_TEXT, JOIN_OBJ(self->text),
863 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800864 else
865 instancedict = Py_BuildValue("{sOsOsOsOsO}",
866 PICKLED_TAG, self->tag,
867 PICKLED_CHILDREN, children,
868 PICKLED_ATTRIB, self->extra->attrib,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700869 PICKLED_TEXT, JOIN_OBJ(self->text),
870 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800871 if (instancedict) {
872 Py_DECREF(children);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800873 return instancedict;
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800874 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800875 else {
876 for (i = 0; i < PyList_GET_SIZE(children); i++)
877 Py_DECREF(PyList_GET_ITEM(children, i));
878 Py_DECREF(children);
879
880 return NULL;
881 }
882}
883
884static PyObject *
885element_setstate_from_attributes(ElementObject *self,
886 PyObject *tag,
887 PyObject *attrib,
888 PyObject *text,
889 PyObject *tail,
890 PyObject *children)
891{
892 Py_ssize_t i, nchildren;
893
894 if (!tag) {
895 PyErr_SetString(PyExc_TypeError, "tag may not be NULL");
896 return NULL;
897 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800898
899 Py_CLEAR(self->tag);
900 self->tag = tag;
901 Py_INCREF(self->tag);
902
Eli Benderskydd3661e2013-09-13 06:24:25 -0700903 _clear_joined_ptr(&self->text);
904 self->text = text ? JOIN_SET(text, PyList_CheckExact(text)) : Py_None;
905 Py_INCREF(JOIN_OBJ(self->text));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800906
Eli Benderskydd3661e2013-09-13 06:24:25 -0700907 _clear_joined_ptr(&self->tail);
908 self->tail = tail ? JOIN_SET(tail, PyList_CheckExact(tail)) : Py_None;
909 Py_INCREF(JOIN_OBJ(self->tail));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800910
911 /* Handle ATTRIB and CHILDREN. */
912 if (!children && !attrib)
913 Py_RETURN_NONE;
914
915 /* Compute 'nchildren'. */
916 if (children) {
917 if (!PyList_Check(children)) {
918 PyErr_SetString(PyExc_TypeError, "'_children' is not a list");
919 return NULL;
920 }
921 nchildren = PyList_Size(children);
922 }
923 else {
924 nchildren = 0;
925 }
926
927 /* Allocate 'extra'. */
928 if (element_resize(self, nchildren)) {
929 return NULL;
930 }
931 assert(self->extra && self->extra->allocated >= nchildren);
932
933 /* Copy children */
934 for (i = 0; i < nchildren; i++) {
935 self->extra->children[i] = PyList_GET_ITEM(children, i);
936 Py_INCREF(self->extra->children[i]);
937 }
938
939 self->extra->length = nchildren;
940 self->extra->allocated = nchildren;
941
942 /* Stash attrib. */
943 if (attrib) {
944 Py_CLEAR(self->extra->attrib);
945 self->extra->attrib = attrib;
946 Py_INCREF(attrib);
947 }
948
949 Py_RETURN_NONE;
950}
951
952/* __setstate__ for Element instance from the Python implementation.
953 * 'state' should be the instance dict.
954 */
955static PyObject *
956element_setstate_from_Python(ElementObject *self, PyObject *state)
957{
958 static char *kwlist[] = {PICKLED_TAG, PICKLED_ATTRIB, PICKLED_TEXT,
959 PICKLED_TAIL, PICKLED_CHILDREN, 0};
960 PyObject *args;
961 PyObject *tag, *attrib, *text, *tail, *children;
Eli Bendersky799e3ed2013-01-12 05:42:38 -0800962 PyObject *retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800963
Eli Bendersky698bdb22013-01-10 06:01:06 -0800964 tag = attrib = text = tail = children = NULL;
965 args = PyTuple_New(0);
Eli Bendersky799e3ed2013-01-12 05:42:38 -0800966 if (!args)
Eli Bendersky698bdb22013-01-10 06:01:06 -0800967 return NULL;
Eli Bendersky799e3ed2013-01-12 05:42:38 -0800968
969 if (PyArg_ParseTupleAndKeywords(args, state, "|$OOOOO", kwlist, &tag,
970 &attrib, &text, &tail, &children))
971 retval = element_setstate_from_attributes(self, tag, attrib, text,
972 tail, children);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800973 else
Eli Bendersky799e3ed2013-01-12 05:42:38 -0800974 retval = NULL;
975
976 Py_DECREF(args);
977 return retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800978}
979
980static PyObject *
981element_setstate(ElementObject *self, PyObject *state)
982{
983 if (!PyDict_CheckExact(state)) {
984 PyErr_Format(PyExc_TypeError,
985 "Don't know how to unpickle \"%.200R\" as an Element",
986 state);
987 return NULL;
988 }
989 else
990 return element_setstate_from_Python(self, state);
991}
992
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000993LOCAL(int)
994checkpath(PyObject* tag)
995{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000996 Py_ssize_t i;
997 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000998
999 /* check if a tag contains an xpath character */
1000
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001001#define PATHCHAR(ch) \
1002 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001003
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001004 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001005 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
1006 void *data = PyUnicode_DATA(tag);
1007 unsigned int kind = PyUnicode_KIND(tag);
1008 for (i = 0; i < len; i++) {
1009 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1010 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001011 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001012 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001013 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001014 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001015 return 1;
1016 }
1017 return 0;
1018 }
Christian Heimes72b710a2008-05-26 13:28:38 +00001019 if (PyBytes_Check(tag)) {
1020 char *p = PyBytes_AS_STRING(tag);
1021 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001022 if (p[i] == '{')
1023 check = 0;
1024 else if (p[i] == '}')
1025 check = 1;
1026 else if (check && PATHCHAR(p[i]))
1027 return 1;
1028 }
1029 return 0;
1030 }
1031
1032 return 1; /* unknown type; might be path expression */
1033}
1034
1035static PyObject*
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001036element_extend(ElementObject* self, PyObject* args)
1037{
1038 PyObject* seq;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001039 Py_ssize_t i;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001040
1041 PyObject* seq_in;
1042 if (!PyArg_ParseTuple(args, "O:extend", &seq_in))
1043 return NULL;
1044
1045 seq = PySequence_Fast(seq_in, "");
1046 if (!seq) {
1047 PyErr_Format(
1048 PyExc_TypeError,
1049 "expected sequence, not \"%.200s\"", Py_TYPE(seq_in)->tp_name
1050 );
1051 return NULL;
1052 }
1053
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001054 for (i = 0; i < PySequence_Fast_GET_SIZE(seq); i++) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001055 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001056 Py_INCREF(element);
1057 if (!PyObject_TypeCheck(element, (PyTypeObject *)&Element_Type)) {
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001058 PyErr_Format(
1059 PyExc_TypeError,
1060 "expected an Element, not \"%.200s\"",
1061 Py_TYPE(element)->tp_name);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001062 Py_DECREF(seq);
1063 Py_DECREF(element);
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001064 return NULL;
1065 }
1066
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001067 if (element_add_subelement(self, element) < 0) {
1068 Py_DECREF(seq);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001069 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001070 return NULL;
1071 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001072 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001073 }
1074
1075 Py_DECREF(seq);
1076
1077 Py_RETURN_NONE;
1078}
1079
1080static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001081element_find(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001082{
1083 int i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001084 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001085 PyObject* namespaces = Py_None;
Eli Bendersky737b1732012-05-29 06:02:56 +03001086 static char *kwlist[] = {"path", "namespaces", 0};
Eli Bendersky532d03e2013-08-10 08:00:39 -07001087 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001088
Eli Bendersky737b1732012-05-29 06:02:56 +03001089 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:find", kwlist,
1090 &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001091 return NULL;
1092
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001093 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001094 _Py_IDENTIFIER(find);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001095 return _PyObject_CallMethodId(
Eli Bendersky532d03e2013-08-10 08:00:39 -07001096 st->elementpath_obj, &PyId_find, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001097 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001098 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001099
1100 if (!self->extra)
1101 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001102
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001103 for (i = 0; i < self->extra->length; i++) {
1104 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001105 int rc;
1106 if (!Element_CheckExact(item))
1107 continue;
1108 Py_INCREF(item);
1109 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ);
1110 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001111 return item;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001112 Py_DECREF(item);
1113 if (rc < 0)
1114 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001115 }
1116
1117 Py_RETURN_NONE;
1118}
1119
1120static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001121element_findtext(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001122{
1123 int i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001124 PyObject* tag;
1125 PyObject* default_value = Py_None;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001126 PyObject* namespaces = Py_None;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001127 _Py_IDENTIFIER(findtext);
Eli Bendersky737b1732012-05-29 06:02:56 +03001128 static char *kwlist[] = {"path", "default", "namespaces", 0};
Eli Bendersky532d03e2013-08-10 08:00:39 -07001129 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001130
Eli Bendersky737b1732012-05-29 06:02:56 +03001131 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO:findtext", kwlist,
1132 &tag, &default_value, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001133 return NULL;
1134
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001135 if (checkpath(tag) || namespaces != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001136 return _PyObject_CallMethodId(
Eli Bendersky532d03e2013-08-10 08:00:39 -07001137 st->elementpath_obj, &PyId_findtext, "OOOO", self, tag, default_value, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001138 );
1139
1140 if (!self->extra) {
1141 Py_INCREF(default_value);
1142 return default_value;
1143 }
1144
1145 for (i = 0; i < self->extra->length; i++) {
1146 ElementObject* item = (ElementObject*) self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001147 int rc;
1148 if (!Element_CheckExact(item))
1149 continue;
1150 Py_INCREF(item);
1151 rc = PyObject_RichCompareBool(item->tag, tag, Py_EQ);
1152 if (rc > 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001153 PyObject* text = element_get_text(item);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001154 if (text == Py_None) {
1155 Py_DECREF(item);
Eli Bendersky25771b32013-01-13 05:26:07 -08001156 return PyUnicode_New(0, 0);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001157 }
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001158 Py_XINCREF(text);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001159 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001160 return text;
1161 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001162 Py_DECREF(item);
1163 if (rc < 0)
1164 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001165 }
1166
1167 Py_INCREF(default_value);
1168 return default_value;
1169}
1170
1171static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001172element_findall(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001173{
1174 int i;
1175 PyObject* out;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001176 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001177 PyObject* namespaces = Py_None;
Eli Bendersky737b1732012-05-29 06:02:56 +03001178 static char *kwlist[] = {"path", "namespaces", 0};
Eli Bendersky532d03e2013-08-10 08:00:39 -07001179 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001180
Eli Bendersky737b1732012-05-29 06:02:56 +03001181 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:findall", kwlist,
1182 &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001183 return NULL;
1184
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001185 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001186 _Py_IDENTIFIER(findall);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001187 return _PyObject_CallMethodId(
Eli Bendersky532d03e2013-08-10 08:00:39 -07001188 st->elementpath_obj, &PyId_findall, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001189 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001190 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001191
1192 out = PyList_New(0);
1193 if (!out)
1194 return NULL;
1195
1196 if (!self->extra)
1197 return out;
1198
1199 for (i = 0; i < self->extra->length; i++) {
1200 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001201 int rc;
1202 if (!Element_CheckExact(item))
1203 continue;
1204 Py_INCREF(item);
1205 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ);
1206 if (rc != 0 && (rc < 0 || PyList_Append(out, item) < 0)) {
1207 Py_DECREF(item);
1208 Py_DECREF(out);
1209 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001210 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001211 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001212 }
1213
1214 return out;
1215}
1216
1217static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001218element_iterfind(ElementObject *self, PyObject *args, PyObject *kwds)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001219{
1220 PyObject* tag;
1221 PyObject* namespaces = Py_None;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001222 _Py_IDENTIFIER(iterfind);
Eli Bendersky737b1732012-05-29 06:02:56 +03001223 static char *kwlist[] = {"path", "namespaces", 0};
Eli Bendersky532d03e2013-08-10 08:00:39 -07001224 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001225
Eli Bendersky737b1732012-05-29 06:02:56 +03001226 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:iterfind", kwlist,
Eli Bendersky163d7f02013-11-24 06:55:04 -08001227 &tag, &namespaces)) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001228 return NULL;
Eli Bendersky163d7f02013-11-24 06:55:04 -08001229 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001230
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001231 return _PyObject_CallMethodId(
Eli Bendersky163d7f02013-11-24 06:55:04 -08001232 st->elementpath_obj, &PyId_iterfind, "OOO", self, tag, namespaces);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001233}
1234
1235static PyObject*
Eli Benderskya8736902013-01-05 06:26:39 -08001236element_get(ElementObject* self, PyObject* args, PyObject* kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001237{
1238 PyObject* value;
Eli Benderskya8736902013-01-05 06:26:39 -08001239 static char* kwlist[] = {"key", "default", 0};
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001240
1241 PyObject* key;
1242 PyObject* default_value = Py_None;
Eli Benderskya8736902013-01-05 06:26:39 -08001243
1244 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:get", kwlist, &key,
1245 &default_value))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001246 return NULL;
1247
1248 if (!self->extra || self->extra->attrib == Py_None)
1249 value = default_value;
1250 else {
1251 value = PyDict_GetItem(self->extra->attrib, key);
1252 if (!value)
1253 value = default_value;
1254 }
1255
1256 Py_INCREF(value);
1257 return value;
1258}
1259
1260static PyObject*
1261element_getchildren(ElementObject* self, PyObject* args)
1262{
1263 int i;
1264 PyObject* list;
1265
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001266 /* FIXME: report as deprecated? */
1267
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001268 if (!PyArg_ParseTuple(args, ":getchildren"))
1269 return NULL;
1270
1271 if (!self->extra)
1272 return PyList_New(0);
1273
1274 list = PyList_New(self->extra->length);
1275 if (!list)
1276 return NULL;
1277
1278 for (i = 0; i < self->extra->length; i++) {
1279 PyObject* item = self->extra->children[i];
1280 Py_INCREF(item);
1281 PyList_SET_ITEM(list, i, item);
1282 }
1283
1284 return list;
1285}
1286
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001287
Eli Bendersky64d11e62012-06-15 07:42:50 +03001288static PyObject *
1289create_elementiter(ElementObject *self, PyObject *tag, int gettext);
1290
1291
1292static PyObject *
Eli Benderskya8736902013-01-05 06:26:39 -08001293element_iter(ElementObject *self, PyObject *args, PyObject *kwds)
Eli Bendersky64d11e62012-06-15 07:42:50 +03001294{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001295 PyObject* tag = Py_None;
Eli Benderskya8736902013-01-05 06:26:39 -08001296 static char* kwlist[] = {"tag", 0};
1297
1298 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:iter", kwlist, &tag))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001299 return NULL;
1300
Eli Bendersky64d11e62012-06-15 07:42:50 +03001301 return create_elementiter(self, tag, 0);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001302}
1303
1304
1305static PyObject*
1306element_itertext(ElementObject* self, PyObject* args)
1307{
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001308 if (!PyArg_ParseTuple(args, ":itertext"))
1309 return NULL;
1310
Eli Bendersky64d11e62012-06-15 07:42:50 +03001311 return create_elementiter(self, Py_None, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001312}
1313
Eli Bendersky64d11e62012-06-15 07:42:50 +03001314
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001315static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001316element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001317{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001318 ElementObject* self = (ElementObject*) self_;
1319
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001320 if (!self->extra || index < 0 || index >= self->extra->length) {
1321 PyErr_SetString(
1322 PyExc_IndexError,
1323 "child index out of range"
1324 );
1325 return NULL;
1326 }
1327
1328 Py_INCREF(self->extra->children[index]);
1329 return self->extra->children[index];
1330}
1331
1332static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001333element_insert(ElementObject* self, PyObject* args)
1334{
1335 int i;
1336
1337 int index;
1338 PyObject* element;
1339 if (!PyArg_ParseTuple(args, "iO!:insert", &index,
1340 &Element_Type, &element))
1341 return NULL;
1342
Victor Stinner5f0af232013-07-11 23:01:36 +02001343 if (!self->extra) {
1344 if (create_extra(self, NULL) < 0)
1345 return NULL;
1346 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001347
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001348 if (index < 0) {
1349 index += self->extra->length;
1350 if (index < 0)
1351 index = 0;
1352 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001353 if (index > self->extra->length)
1354 index = self->extra->length;
1355
1356 if (element_resize(self, 1) < 0)
1357 return NULL;
1358
1359 for (i = self->extra->length; i > index; i--)
1360 self->extra->children[i] = self->extra->children[i-1];
1361
1362 Py_INCREF(element);
1363 self->extra->children[index] = element;
1364
1365 self->extra->length++;
1366
1367 Py_RETURN_NONE;
1368}
1369
1370static PyObject*
1371element_items(ElementObject* self, PyObject* args)
1372{
1373 if (!PyArg_ParseTuple(args, ":items"))
1374 return NULL;
1375
1376 if (!self->extra || self->extra->attrib == Py_None)
1377 return PyList_New(0);
1378
1379 return PyDict_Items(self->extra->attrib);
1380}
1381
1382static PyObject*
1383element_keys(ElementObject* self, PyObject* args)
1384{
1385 if (!PyArg_ParseTuple(args, ":keys"))
1386 return NULL;
1387
1388 if (!self->extra || self->extra->attrib == Py_None)
1389 return PyList_New(0);
1390
1391 return PyDict_Keys(self->extra->attrib);
1392}
1393
Martin v. Löwis18e16552006-02-15 17:27:45 +00001394static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001395element_length(ElementObject* self)
1396{
1397 if (!self->extra)
1398 return 0;
1399
1400 return self->extra->length;
1401}
1402
1403static PyObject*
1404element_makeelement(PyObject* self, PyObject* args, PyObject* kw)
1405{
1406 PyObject* elem;
1407
1408 PyObject* tag;
1409 PyObject* attrib;
1410 if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib))
1411 return NULL;
1412
1413 attrib = PyDict_Copy(attrib);
1414 if (!attrib)
1415 return NULL;
1416
Eli Bendersky092af1f2012-03-04 07:14:03 +02001417 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001418
1419 Py_DECREF(attrib);
1420
1421 return elem;
1422}
1423
1424static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001425element_remove(ElementObject* self, PyObject* args)
1426{
1427 int i;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001428 int rc;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001429 PyObject* element;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001430 PyObject* found;
1431
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001432 if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element))
1433 return NULL;
1434
1435 if (!self->extra) {
1436 /* element has no children, so raise exception */
1437 PyErr_SetString(
1438 PyExc_ValueError,
1439 "list.remove(x): x not in list"
1440 );
1441 return NULL;
1442 }
1443
1444 for (i = 0; i < self->extra->length; i++) {
1445 if (self->extra->children[i] == element)
1446 break;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001447 rc = PyObject_RichCompareBool(self->extra->children[i], element, Py_EQ);
1448 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001449 break;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001450 if (rc < 0)
1451 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001452 }
1453
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001454 if (i >= self->extra->length) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001455 /* element is not in children, so raise exception */
1456 PyErr_SetString(
1457 PyExc_ValueError,
1458 "list.remove(x): x not in list"
1459 );
1460 return NULL;
1461 }
1462
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001463 found = self->extra->children[i];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001464
1465 self->extra->length--;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001466 for (; i < self->extra->length; i++)
1467 self->extra->children[i] = self->extra->children[i+1];
1468
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001469 Py_DECREF(found);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001470 Py_RETURN_NONE;
1471}
1472
1473static PyObject*
1474element_repr(ElementObject* self)
1475{
Eli Bendersky092af1f2012-03-04 07:14:03 +02001476 if (self->tag)
1477 return PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1478 else
1479 return PyUnicode_FromFormat("<Element at %p>", self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001480}
1481
1482static PyObject*
1483element_set(ElementObject* self, PyObject* args)
1484{
1485 PyObject* attrib;
1486
1487 PyObject* key;
1488 PyObject* value;
1489 if (!PyArg_ParseTuple(args, "OO:set", &key, &value))
1490 return NULL;
1491
Victor Stinner5f0af232013-07-11 23:01:36 +02001492 if (!self->extra) {
1493 if (create_extra(self, NULL) < 0)
1494 return NULL;
1495 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001496
1497 attrib = element_get_attrib(self);
1498 if (!attrib)
1499 return NULL;
1500
1501 if (PyDict_SetItem(attrib, key, value) < 0)
1502 return NULL;
1503
1504 Py_RETURN_NONE;
1505}
1506
1507static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001508element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001509{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001510 ElementObject* self = (ElementObject*) self_;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001511 int i;
1512 PyObject* old;
1513
1514 if (!self->extra || index < 0 || index >= self->extra->length) {
1515 PyErr_SetString(
1516 PyExc_IndexError,
1517 "child assignment index out of range");
1518 return -1;
1519 }
1520
1521 old = self->extra->children[index];
1522
1523 if (item) {
1524 Py_INCREF(item);
1525 self->extra->children[index] = item;
1526 } else {
1527 self->extra->length--;
1528 for (i = index; i < self->extra->length; i++)
1529 self->extra->children[i] = self->extra->children[i+1];
1530 }
1531
1532 Py_DECREF(old);
1533
1534 return 0;
1535}
1536
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001537static PyObject*
1538element_subscr(PyObject* self_, PyObject* item)
1539{
1540 ElementObject* self = (ElementObject*) self_;
1541
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001542 if (PyIndex_Check(item)) {
1543 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001544
1545 if (i == -1 && PyErr_Occurred()) {
1546 return NULL;
1547 }
1548 if (i < 0 && self->extra)
1549 i += self->extra->length;
1550 return element_getitem(self_, i);
1551 }
1552 else if (PySlice_Check(item)) {
1553 Py_ssize_t start, stop, step, slicelen, cur, i;
1554 PyObject* list;
1555
1556 if (!self->extra)
1557 return PyList_New(0);
1558
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001559 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001560 self->extra->length,
1561 &start, &stop, &step, &slicelen) < 0) {
1562 return NULL;
1563 }
1564
1565 if (slicelen <= 0)
1566 return PyList_New(0);
1567 else {
1568 list = PyList_New(slicelen);
1569 if (!list)
1570 return NULL;
1571
1572 for (cur = start, i = 0; i < slicelen;
1573 cur += step, i++) {
1574 PyObject* item = self->extra->children[cur];
1575 Py_INCREF(item);
1576 PyList_SET_ITEM(list, i, item);
1577 }
1578
1579 return list;
1580 }
1581 }
1582 else {
1583 PyErr_SetString(PyExc_TypeError,
1584 "element indices must be integers");
1585 return NULL;
1586 }
1587}
1588
1589static int
1590element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1591{
1592 ElementObject* self = (ElementObject*) self_;
1593
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001594 if (PyIndex_Check(item)) {
1595 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001596
1597 if (i == -1 && PyErr_Occurred()) {
1598 return -1;
1599 }
1600 if (i < 0 && self->extra)
1601 i += self->extra->length;
1602 return element_setitem(self_, i, value);
1603 }
1604 else if (PySlice_Check(item)) {
1605 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1606
1607 PyObject* recycle = NULL;
1608 PyObject* seq = NULL;
1609
Victor Stinner5f0af232013-07-11 23:01:36 +02001610 if (!self->extra) {
1611 if (create_extra(self, NULL) < 0)
1612 return -1;
1613 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001614
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001615 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001616 self->extra->length,
1617 &start, &stop, &step, &slicelen) < 0) {
1618 return -1;
1619 }
1620
Eli Bendersky865756a2012-03-09 13:38:15 +02001621 if (value == NULL) {
1622 /* Delete slice */
1623 size_t cur;
1624 Py_ssize_t i;
1625
1626 if (slicelen <= 0)
1627 return 0;
1628
1629 /* Since we're deleting, the direction of the range doesn't matter,
1630 * so for simplicity make it always ascending.
1631 */
1632 if (step < 0) {
1633 stop = start + 1;
1634 start = stop + step * (slicelen - 1) - 1;
1635 step = -step;
1636 }
1637
1638 assert((size_t)slicelen <= PY_SIZE_MAX / sizeof(PyObject *));
1639
1640 /* recycle is a list that will contain all the children
1641 * scheduled for removal.
1642 */
1643 if (!(recycle = PyList_New(slicelen))) {
1644 PyErr_NoMemory();
1645 return -1;
1646 }
1647
1648 /* This loop walks over all the children that have to be deleted,
1649 * with cur pointing at them. num_moved is the amount of children
1650 * until the next deleted child that have to be "shifted down" to
1651 * occupy the deleted's places.
1652 * Note that in the ith iteration, shifting is done i+i places down
1653 * because i children were already removed.
1654 */
1655 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1656 /* Compute how many children have to be moved, clipping at the
1657 * list end.
1658 */
1659 Py_ssize_t num_moved = step - 1;
1660 if (cur + step >= (size_t)self->extra->length) {
1661 num_moved = self->extra->length - cur - 1;
1662 }
1663
1664 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1665
1666 memmove(
1667 self->extra->children + cur - i,
1668 self->extra->children + cur + 1,
1669 num_moved * sizeof(PyObject *));
1670 }
1671
1672 /* Leftover "tail" after the last removed child */
1673 cur = start + (size_t)slicelen * step;
1674 if (cur < (size_t)self->extra->length) {
1675 memmove(
1676 self->extra->children + cur - slicelen,
1677 self->extra->children + cur,
1678 (self->extra->length - cur) * sizeof(PyObject *));
1679 }
1680
1681 self->extra->length -= slicelen;
1682
1683 /* Discard the recycle list with all the deleted sub-elements */
1684 Py_XDECREF(recycle);
1685 return 0;
1686 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001687 else {
Eli Bendersky865756a2012-03-09 13:38:15 +02001688 /* A new slice is actually being assigned */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001689 seq = PySequence_Fast(value, "");
1690 if (!seq) {
1691 PyErr_Format(
1692 PyExc_TypeError,
1693 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1694 );
1695 return -1;
1696 }
1697 newlen = PySequence_Size(seq);
1698 }
1699
1700 if (step != 1 && newlen != slicelen)
1701 {
1702 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001703 "attempt to assign sequence of size %zd "
1704 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001705 newlen, slicelen
1706 );
1707 return -1;
1708 }
1709
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001710 /* Resize before creating the recycle bin, to prevent refleaks. */
1711 if (newlen > slicelen) {
1712 if (element_resize(self, newlen - slicelen) < 0) {
1713 if (seq) {
1714 Py_DECREF(seq);
1715 }
1716 return -1;
1717 }
1718 }
1719
1720 if (slicelen > 0) {
1721 /* to avoid recursive calls to this method (via decref), move
1722 old items to the recycle bin here, and get rid of them when
1723 we're done modifying the element */
1724 recycle = PyList_New(slicelen);
1725 if (!recycle) {
1726 if (seq) {
1727 Py_DECREF(seq);
1728 }
1729 return -1;
1730 }
1731 for (cur = start, i = 0; i < slicelen;
1732 cur += step, i++)
1733 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1734 }
1735
1736 if (newlen < slicelen) {
1737 /* delete slice */
1738 for (i = stop; i < self->extra->length; i++)
1739 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1740 } else if (newlen > slicelen) {
1741 /* insert slice */
1742 for (i = self->extra->length-1; i >= stop; i--)
1743 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1744 }
1745
1746 /* replace the slice */
1747 for (cur = start, i = 0; i < newlen;
1748 cur += step, i++) {
1749 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1750 Py_INCREF(element);
1751 self->extra->children[cur] = element;
1752 }
1753
1754 self->extra->length += newlen - slicelen;
1755
1756 if (seq) {
1757 Py_DECREF(seq);
1758 }
1759
1760 /* discard the recycle bin, and everything in it */
1761 Py_XDECREF(recycle);
1762
1763 return 0;
1764 }
1765 else {
1766 PyErr_SetString(PyExc_TypeError,
1767 "element indices must be integers");
1768 return -1;
1769 }
1770}
1771
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001772static PyMethodDef element_methods[] = {
1773
Eli Bendersky0192ba32012-03-30 16:38:33 +03001774 {"clear", (PyCFunction) element_clearmethod, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001775
Eli Benderskya8736902013-01-05 06:26:39 -08001776 {"get", (PyCFunction) element_get, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001777 {"set", (PyCFunction) element_set, METH_VARARGS},
1778
Eli Bendersky737b1732012-05-29 06:02:56 +03001779 {"find", (PyCFunction) element_find, METH_VARARGS | METH_KEYWORDS},
1780 {"findtext", (PyCFunction) element_findtext, METH_VARARGS | METH_KEYWORDS},
1781 {"findall", (PyCFunction) element_findall, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001782
1783 {"append", (PyCFunction) element_append, METH_VARARGS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001784 {"extend", (PyCFunction) element_extend, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001785 {"insert", (PyCFunction) element_insert, METH_VARARGS},
1786 {"remove", (PyCFunction) element_remove, METH_VARARGS},
1787
Eli Benderskya8736902013-01-05 06:26:39 -08001788 {"iter", (PyCFunction) element_iter, METH_VARARGS | METH_KEYWORDS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001789 {"itertext", (PyCFunction) element_itertext, METH_VARARGS},
Eli Bendersky737b1732012-05-29 06:02:56 +03001790 {"iterfind", (PyCFunction) element_iterfind, METH_VARARGS | METH_KEYWORDS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001791
Eli Benderskya8736902013-01-05 06:26:39 -08001792 {"getiterator", (PyCFunction) element_iter, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001793 {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS},
1794
1795 {"items", (PyCFunction) element_items, METH_VARARGS},
1796 {"keys", (PyCFunction) element_keys, METH_VARARGS},
1797
1798 {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS},
1799
1800 {"__copy__", (PyCFunction) element_copy, METH_VARARGS},
1801 {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS},
Martin v. Löwisbce16662012-06-17 10:41:22 +02001802 {"__sizeof__", element_sizeof, METH_NOARGS},
Eli Bendersky698bdb22013-01-10 06:01:06 -08001803 {"__getstate__", (PyCFunction)element_getstate, METH_NOARGS},
1804 {"__setstate__", (PyCFunction)element_setstate, METH_O},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001805
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001806 {NULL, NULL}
1807};
1808
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001809static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001810element_getattro(ElementObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001811{
1812 PyObject* res;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001813 char *name = "";
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001814
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001815 if (PyUnicode_Check(nameobj))
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001816 name = _PyUnicode_AsString(nameobj);
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001817
Alexander Belopolskye239d232010-12-08 23:31:48 +00001818 if (name == NULL)
1819 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001820
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001821 /* handle common attributes first */
1822 if (strcmp(name, "tag") == 0) {
1823 res = self->tag;
1824 Py_INCREF(res);
1825 return res;
1826 } else if (strcmp(name, "text") == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001827 res = element_get_text(self);
Victor Stinner71c8b7e2013-07-11 23:08:39 +02001828 Py_XINCREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001829 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001830 }
1831
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001832 /* methods */
1833 res = PyObject_GenericGetAttr((PyObject*) self, nameobj);
1834 if (res)
1835 return res;
1836
1837 /* less common attributes */
1838 if (strcmp(name, "tail") == 0) {
1839 PyErr_Clear();
1840 res = element_get_tail(self);
1841 } else if (strcmp(name, "attrib") == 0) {
1842 PyErr_Clear();
Victor Stinner5f0af232013-07-11 23:01:36 +02001843 if (!self->extra) {
1844 if (create_extra(self, NULL) < 0)
1845 return NULL;
1846 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001847 res = element_get_attrib(self);
1848 }
1849
1850 if (!res)
1851 return NULL;
1852
1853 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001854 return res;
1855}
1856
Eli Benderskyef9683b2013-05-18 07:52:34 -07001857static int
Eli Benderskyb20df952012-05-20 06:33:29 +03001858element_setattro(ElementObject* self, PyObject* nameobj, PyObject* value)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001859{
Eli Benderskyb20df952012-05-20 06:33:29 +03001860 char *name = "";
1861 if (PyUnicode_Check(nameobj))
1862 name = _PyUnicode_AsString(nameobj);
Victor Stinner4d463432013-07-11 23:05:03 +02001863 if (name == NULL)
Eli Benderskyef9683b2013-05-18 07:52:34 -07001864 return -1;
Victor Stinner4d463432013-07-11 23:05:03 +02001865
1866 if (strcmp(name, "tag") == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001867 Py_DECREF(self->tag);
1868 self->tag = value;
1869 Py_INCREF(self->tag);
1870 } else if (strcmp(name, "text") == 0) {
1871 Py_DECREF(JOIN_OBJ(self->text));
1872 self->text = value;
1873 Py_INCREF(self->text);
1874 } else if (strcmp(name, "tail") == 0) {
1875 Py_DECREF(JOIN_OBJ(self->tail));
1876 self->tail = value;
1877 Py_INCREF(self->tail);
1878 } else if (strcmp(name, "attrib") == 0) {
Victor Stinner5f0af232013-07-11 23:01:36 +02001879 if (!self->extra) {
1880 if (create_extra(self, NULL) < 0)
1881 return -1;
1882 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001883 Py_DECREF(self->extra->attrib);
1884 self->extra->attrib = value;
1885 Py_INCREF(self->extra->attrib);
1886 } else {
Eli Benderskyef9683b2013-05-18 07:52:34 -07001887 PyErr_SetString(PyExc_AttributeError,
Eli Bendersky6a55dc32013-05-19 16:59:59 -07001888 "Can't set arbitrary attributes on Element");
Eli Benderskyef9683b2013-05-18 07:52:34 -07001889 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001890 }
1891
Eli Benderskyef9683b2013-05-18 07:52:34 -07001892 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001893}
1894
1895static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001896 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001897 0, /* sq_concat */
1898 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001899 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001900 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001901 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001902 0,
1903};
1904
1905static PyMappingMethods element_as_mapping = {
1906 (lenfunc) element_length,
1907 (binaryfunc) element_subscr,
1908 (objobjargproc) element_ass_subscr,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001909};
1910
Neal Norwitz227b5332006-03-22 09:28:35 +00001911static PyTypeObject Element_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001912 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08001913 "xml.etree.ElementTree.Element", sizeof(ElementObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001914 /* methods */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001915 (destructor)element_dealloc, /* tp_dealloc */
1916 0, /* tp_print */
1917 0, /* tp_getattr */
Eli Benderskyb20df952012-05-20 06:33:29 +03001918 0, /* tp_setattr */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001919 0, /* tp_reserved */
1920 (reprfunc)element_repr, /* tp_repr */
1921 0, /* tp_as_number */
1922 &element_as_sequence, /* tp_as_sequence */
1923 &element_as_mapping, /* tp_as_mapping */
1924 0, /* tp_hash */
1925 0, /* tp_call */
1926 0, /* tp_str */
1927 (getattrofunc)element_getattro, /* tp_getattro */
Eli Benderskyb20df952012-05-20 06:33:29 +03001928 (setattrofunc)element_setattro, /* tp_setattro */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001929 0, /* tp_as_buffer */
Eli Bendersky0192ba32012-03-30 16:38:33 +03001930 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
1931 /* tp_flags */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001932 0, /* tp_doc */
Eli Bendersky0192ba32012-03-30 16:38:33 +03001933 (traverseproc)element_gc_traverse, /* tp_traverse */
1934 (inquiry)element_gc_clear, /* tp_clear */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001935 0, /* tp_richcompare */
Eli Benderskyebf37a22012-04-03 22:02:37 +03001936 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001937 0, /* tp_iter */
1938 0, /* tp_iternext */
1939 element_methods, /* tp_methods */
1940 0, /* tp_members */
1941 0, /* tp_getset */
1942 0, /* tp_base */
1943 0, /* tp_dict */
1944 0, /* tp_descr_get */
1945 0, /* tp_descr_set */
1946 0, /* tp_dictoffset */
1947 (initproc)element_init, /* tp_init */
1948 PyType_GenericAlloc, /* tp_alloc */
1949 element_new, /* tp_new */
1950 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001951};
1952
Eli Bendersky64d11e62012-06-15 07:42:50 +03001953/******************************* Element iterator ****************************/
1954
1955/* ElementIterObject represents the iteration state over an XML element in
1956 * pre-order traversal. To keep track of which sub-element should be returned
1957 * next, a stack of parents is maintained. This is a standard stack-based
1958 * iterative pre-order traversal of a tree.
1959 * The stack is managed using a single-linked list starting at parent_stack.
1960 * Each stack node contains the saved parent to which we should return after
1961 * the current one is exhausted, and the next child to examine in that parent.
1962 */
1963typedef struct ParentLocator_t {
1964 ElementObject *parent;
1965 Py_ssize_t child_index;
1966 struct ParentLocator_t *next;
1967} ParentLocator;
1968
1969typedef struct {
1970 PyObject_HEAD
1971 ParentLocator *parent_stack;
1972 ElementObject *root_element;
1973 PyObject *sought_tag;
1974 int root_done;
1975 int gettext;
1976} ElementIterObject;
1977
1978
1979static void
1980elementiter_dealloc(ElementIterObject *it)
1981{
1982 ParentLocator *p = it->parent_stack;
1983 while (p) {
1984 ParentLocator *temp = p;
1985 Py_XDECREF(p->parent);
1986 p = p->next;
1987 PyObject_Free(temp);
1988 }
1989
1990 Py_XDECREF(it->sought_tag);
1991 Py_XDECREF(it->root_element);
1992
1993 PyObject_GC_UnTrack(it);
1994 PyObject_GC_Del(it);
1995}
1996
1997static int
1998elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
1999{
2000 ParentLocator *p = it->parent_stack;
2001 while (p) {
2002 Py_VISIT(p->parent);
2003 p = p->next;
2004 }
2005
2006 Py_VISIT(it->root_element);
2007 Py_VISIT(it->sought_tag);
2008 return 0;
2009}
2010
2011/* Helper function for elementiter_next. Add a new parent to the parent stack.
2012 */
2013static ParentLocator *
2014parent_stack_push_new(ParentLocator *stack, ElementObject *parent)
2015{
2016 ParentLocator *new_node = PyObject_Malloc(sizeof(ParentLocator));
2017 if (new_node) {
2018 new_node->parent = parent;
2019 Py_INCREF(parent);
2020 new_node->child_index = 0;
2021 new_node->next = stack;
2022 }
2023 return new_node;
2024}
2025
2026static PyObject *
2027elementiter_next(ElementIterObject *it)
2028{
2029 /* Sub-element iterator.
Eli Bendersky45839902013-01-13 05:14:47 -08002030 *
Eli Bendersky64d11e62012-06-15 07:42:50 +03002031 * A short note on gettext: this function serves both the iter() and
2032 * itertext() methods to avoid code duplication. However, there are a few
2033 * small differences in the way these iterations work. Namely:
2034 * - itertext() only yields text from nodes that have it, and continues
2035 * iterating when a node doesn't have text (so it doesn't return any
2036 * node like iter())
2037 * - itertext() also has to handle tail, after finishing with all the
2038 * children of a node.
2039 */
Eli Bendersky113da642012-06-15 07:52:49 +03002040 ElementObject *cur_parent;
2041 Py_ssize_t child_index;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002042 int rc;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002043
2044 while (1) {
2045 /* Handle the case reached in the beginning and end of iteration, where
2046 * the parent stack is empty. The root_done flag gives us indication
2047 * whether we've just started iterating (so root_done is 0), in which
2048 * case the root is returned. If root_done is 1 and we're here, the
2049 * iterator is exhausted.
2050 */
2051 if (!it->parent_stack->parent) {
2052 if (it->root_done) {
2053 PyErr_SetNone(PyExc_StopIteration);
2054 return NULL;
2055 } else {
2056 it->parent_stack = parent_stack_push_new(it->parent_stack,
2057 it->root_element);
2058 if (!it->parent_stack) {
2059 PyErr_NoMemory();
2060 return NULL;
2061 }
2062
2063 it->root_done = 1;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002064 rc = (it->sought_tag == Py_None);
2065 if (!rc) {
2066 rc = PyObject_RichCompareBool(it->root_element->tag,
2067 it->sought_tag, Py_EQ);
2068 if (rc < 0)
2069 return NULL;
2070 }
2071 if (rc) {
Eli Bendersky64d11e62012-06-15 07:42:50 +03002072 if (it->gettext) {
Eli Benderskye6174ca2013-01-10 06:27:53 -08002073 PyObject *text = element_get_text(it->root_element);
2074 if (!text)
2075 return NULL;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002076 rc = PyObject_IsTrue(text);
2077 if (rc < 0)
2078 return NULL;
2079 if (rc) {
Eli Bendersky64d11e62012-06-15 07:42:50 +03002080 Py_INCREF(text);
2081 return text;
2082 }
2083 } else {
2084 Py_INCREF(it->root_element);
2085 return (PyObject *)it->root_element;
2086 }
2087 }
2088 }
2089 }
2090
2091 /* See if there are children left to traverse in the current parent. If
2092 * yes, visit the next child. If not, pop the stack and try again.
2093 */
Eli Bendersky113da642012-06-15 07:52:49 +03002094 cur_parent = it->parent_stack->parent;
2095 child_index = it->parent_stack->child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002096 if (cur_parent->extra && child_index < cur_parent->extra->length) {
2097 ElementObject *child = (ElementObject *)
2098 cur_parent->extra->children[child_index];
2099 it->parent_stack->child_index++;
2100 it->parent_stack = parent_stack_push_new(it->parent_stack,
2101 child);
2102 if (!it->parent_stack) {
2103 PyErr_NoMemory();
2104 return NULL;
2105 }
2106
2107 if (it->gettext) {
Eli Benderskye6174ca2013-01-10 06:27:53 -08002108 PyObject *text = element_get_text(child);
2109 if (!text)
2110 return NULL;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002111 rc = PyObject_IsTrue(text);
2112 if (rc < 0)
2113 return NULL;
2114 if (rc) {
Eli Bendersky64d11e62012-06-15 07:42:50 +03002115 Py_INCREF(text);
2116 return text;
2117 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002118 } else {
2119 rc = (it->sought_tag == Py_None);
2120 if (!rc) {
2121 rc = PyObject_RichCompareBool(child->tag,
2122 it->sought_tag, Py_EQ);
2123 if (rc < 0)
2124 return NULL;
2125 }
2126 if (rc) {
2127 Py_INCREF(child);
2128 return (PyObject *)child;
2129 }
Eli Bendersky64d11e62012-06-15 07:42:50 +03002130 }
Eli Bendersky64d11e62012-06-15 07:42:50 +03002131 }
2132 else {
Eli Benderskye6174ca2013-01-10 06:27:53 -08002133 PyObject *tail;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002134 ParentLocator *next = it->parent_stack->next;
Eli Benderskye6174ca2013-01-10 06:27:53 -08002135 if (it->gettext) {
2136 tail = element_get_tail(cur_parent);
2137 if (!tail)
2138 return NULL;
2139 }
2140 else
2141 tail = Py_None;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002142 Py_XDECREF(it->parent_stack->parent);
2143 PyObject_Free(it->parent_stack);
2144 it->parent_stack = next;
2145
2146 /* Note that extra condition on it->parent_stack->parent here;
2147 * this is because itertext() is supposed to only return *inner*
2148 * text, not text following the element it began iteration with.
2149 */
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002150 if (it->parent_stack->parent) {
2151 rc = PyObject_IsTrue(tail);
2152 if (rc < 0)
2153 return NULL;
2154 if (rc) {
2155 Py_INCREF(tail);
2156 return tail;
2157 }
Eli Bendersky64d11e62012-06-15 07:42:50 +03002158 }
2159 }
2160 }
2161
2162 return NULL;
2163}
2164
2165
2166static PyTypeObject ElementIter_Type = {
2167 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002168 /* Using the module's name since the pure-Python implementation does not
2169 have such a type. */
Eli Bendersky64d11e62012-06-15 07:42:50 +03002170 "_elementtree._element_iterator", /* tp_name */
2171 sizeof(ElementIterObject), /* tp_basicsize */
2172 0, /* tp_itemsize */
2173 /* methods */
2174 (destructor)elementiter_dealloc, /* tp_dealloc */
2175 0, /* tp_print */
2176 0, /* tp_getattr */
2177 0, /* tp_setattr */
2178 0, /* tp_reserved */
2179 0, /* tp_repr */
2180 0, /* tp_as_number */
2181 0, /* tp_as_sequence */
2182 0, /* tp_as_mapping */
2183 0, /* tp_hash */
2184 0, /* tp_call */
2185 0, /* tp_str */
2186 0, /* tp_getattro */
2187 0, /* tp_setattro */
2188 0, /* tp_as_buffer */
2189 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2190 0, /* tp_doc */
2191 (traverseproc)elementiter_traverse, /* tp_traverse */
2192 0, /* tp_clear */
2193 0, /* tp_richcompare */
2194 0, /* tp_weaklistoffset */
2195 PyObject_SelfIter, /* tp_iter */
2196 (iternextfunc)elementiter_next, /* tp_iternext */
2197 0, /* tp_methods */
2198 0, /* tp_members */
2199 0, /* tp_getset */
2200 0, /* tp_base */
2201 0, /* tp_dict */
2202 0, /* tp_descr_get */
2203 0, /* tp_descr_set */
2204 0, /* tp_dictoffset */
2205 0, /* tp_init */
2206 0, /* tp_alloc */
2207 0, /* tp_new */
2208};
2209
2210
2211static PyObject *
2212create_elementiter(ElementObject *self, PyObject *tag, int gettext)
2213{
2214 ElementIterObject *it;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002215
2216 it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
2217 if (!it)
2218 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002219
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002220 if (PyUnicode_Check(tag)) {
2221 if (PyUnicode_READY(tag) < 0)
2222 return NULL;
2223 if (PyUnicode_GET_LENGTH(tag) == 1 && PyUnicode_READ_CHAR(tag, 0) == '*')
2224 tag = Py_None;
2225 }
2226 else if (PyBytes_Check(tag)) {
2227 if (PyBytes_GET_SIZE(tag) == 1 && *PyBytes_AS_STRING(tag) == '*')
2228 tag = Py_None;
2229 }
Victor Stinner4d463432013-07-11 23:05:03 +02002230
2231 Py_INCREF(tag);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002232 it->sought_tag = tag;
2233 it->root_done = 0;
2234 it->gettext = gettext;
Victor Stinner4d463432013-07-11 23:05:03 +02002235 Py_INCREF(self);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002236 it->root_element = self;
2237
Eli Bendersky64d11e62012-06-15 07:42:50 +03002238 PyObject_GC_Track(it);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002239
2240 it->parent_stack = PyObject_Malloc(sizeof(ParentLocator));
2241 if (it->parent_stack == NULL) {
2242 Py_DECREF(it);
2243 PyErr_NoMemory();
2244 return NULL;
2245 }
2246 it->parent_stack->parent = NULL;
2247 it->parent_stack->child_index = 0;
2248 it->parent_stack->next = NULL;
2249
Eli Bendersky64d11e62012-06-15 07:42:50 +03002250 return (PyObject *)it;
2251}
2252
2253
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002254/* ==================================================================== */
2255/* the tree builder type */
2256
2257typedef struct {
2258 PyObject_HEAD
2259
Eli Bendersky58d548d2012-05-29 15:45:16 +03002260 PyObject *root; /* root node (first created node) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002261
Antoine Pitrouee329312012-10-04 19:53:29 +02002262 PyObject *this; /* current node */
2263 PyObject *last; /* most recently created node */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002264
Eli Bendersky58d548d2012-05-29 15:45:16 +03002265 PyObject *data; /* data collector (string or list), or NULL */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002266
Eli Bendersky58d548d2012-05-29 15:45:16 +03002267 PyObject *stack; /* element stack */
2268 Py_ssize_t index; /* current stack size (0 means empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002269
Eli Bendersky48d358b2012-05-30 17:57:50 +03002270 PyObject *element_factory;
2271
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002272 /* element tracing */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002273 PyObject *events; /* list of events, or NULL if not collecting */
2274 PyObject *start_event_obj; /* event objects (NULL to ignore) */
2275 PyObject *end_event_obj;
2276 PyObject *start_ns_event_obj;
2277 PyObject *end_ns_event_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002278} TreeBuilderObject;
2279
Christian Heimes90aa7642007-12-19 02:45:37 +00002280#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002281
2282/* -------------------------------------------------------------------- */
2283/* constructor and destructor */
2284
Eli Bendersky58d548d2012-05-29 15:45:16 +03002285static PyObject *
2286treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002287{
Eli Bendersky58d548d2012-05-29 15:45:16 +03002288 TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
2289 if (t != NULL) {
2290 t->root = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002291
Eli Bendersky58d548d2012-05-29 15:45:16 +03002292 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002293 t->this = Py_None;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002294 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002295 t->last = Py_None;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002296
Eli Bendersky58d548d2012-05-29 15:45:16 +03002297 t->data = NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002298 t->element_factory = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002299 t->stack = PyList_New(20);
2300 if (!t->stack) {
2301 Py_DECREF(t->this);
2302 Py_DECREF(t->last);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002303 Py_DECREF((PyObject *) t);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002304 return NULL;
2305 }
2306 t->index = 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002307
Eli Bendersky58d548d2012-05-29 15:45:16 +03002308 t->events = NULL;
2309 t->start_event_obj = t->end_event_obj = NULL;
2310 t->start_ns_event_obj = t->end_ns_event_obj = NULL;
2311 }
2312 return (PyObject *)t;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002313}
2314
Eli Bendersky58d548d2012-05-29 15:45:16 +03002315static int
2316treebuilder_init(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002317{
Eli Benderskyc68e1362012-06-03 06:09:42 +03002318 static char *kwlist[] = {"element_factory", 0};
Eli Bendersky48d358b2012-05-30 17:57:50 +03002319 PyObject *element_factory = NULL;
2320 TreeBuilderObject *self_tb = (TreeBuilderObject *)self;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002321 PyObject *tmp;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002322
2323 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:TreeBuilder", kwlist,
2324 &element_factory)) {
2325 return -1;
2326 }
2327
2328 if (element_factory) {
2329 Py_INCREF(element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002330 tmp = self_tb->element_factory;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002331 self_tb->element_factory = element_factory;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002332 Py_XDECREF(tmp);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002333 }
2334
Eli Bendersky58d548d2012-05-29 15:45:16 +03002335 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002336}
2337
Eli Bendersky48d358b2012-05-30 17:57:50 +03002338static int
2339treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
2340{
2341 Py_VISIT(self->root);
2342 Py_VISIT(self->this);
2343 Py_VISIT(self->last);
2344 Py_VISIT(self->data);
2345 Py_VISIT(self->stack);
2346 Py_VISIT(self->element_factory);
2347 return 0;
2348}
2349
2350static int
2351treebuilder_gc_clear(TreeBuilderObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002352{
Antoine Pitrouc1948842012-10-01 23:40:37 +02002353 Py_CLEAR(self->end_ns_event_obj);
2354 Py_CLEAR(self->start_ns_event_obj);
2355 Py_CLEAR(self->end_event_obj);
2356 Py_CLEAR(self->start_event_obj);
2357 Py_CLEAR(self->events);
2358 Py_CLEAR(self->stack);
2359 Py_CLEAR(self->data);
2360 Py_CLEAR(self->last);
2361 Py_CLEAR(self->this);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002362 Py_CLEAR(self->element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002363 Py_CLEAR(self->root);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002364 return 0;
2365}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002366
Eli Bendersky48d358b2012-05-30 17:57:50 +03002367static void
2368treebuilder_dealloc(TreeBuilderObject *self)
2369{
2370 PyObject_GC_UnTrack(self);
2371 treebuilder_gc_clear(self);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002372 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002373}
2374
2375/* -------------------------------------------------------------------- */
Antoine Pitrouee329312012-10-04 19:53:29 +02002376/* helpers for handling of arbitrary element-like objects */
2377
2378static int
2379treebuilder_set_element_text_or_tail(PyObject *element, PyObject *data,
2380 PyObject **dest, _Py_Identifier *name)
2381{
2382 if (Element_CheckExact(element)) {
2383 Py_DECREF(JOIN_OBJ(*dest));
2384 *dest = JOIN_SET(data, PyList_CheckExact(data));
2385 return 0;
2386 }
2387 else {
2388 PyObject *joined = list_join(data);
2389 int r;
2390 if (joined == NULL)
2391 return -1;
2392 r = _PyObject_SetAttrId(element, name, joined);
2393 Py_DECREF(joined);
2394 return r;
2395 }
2396}
2397
2398/* These two functions steal a reference to data */
2399static int
2400treebuilder_set_element_text(PyObject *element, PyObject *data)
2401{
2402 _Py_IDENTIFIER(text);
2403 return treebuilder_set_element_text_or_tail(
2404 element, data, &((ElementObject *) element)->text, &PyId_text);
2405}
2406
2407static int
2408treebuilder_set_element_tail(PyObject *element, PyObject *data)
2409{
2410 _Py_IDENTIFIER(tail);
2411 return treebuilder_set_element_text_or_tail(
2412 element, data, &((ElementObject *) element)->tail, &PyId_tail);
2413}
2414
2415static int
2416treebuilder_add_subelement(PyObject *element, PyObject *child)
2417{
2418 _Py_IDENTIFIER(append);
2419 if (Element_CheckExact(element)) {
2420 ElementObject *elem = (ElementObject *) element;
2421 return element_add_subelement(elem, child);
2422 }
2423 else {
2424 PyObject *res;
2425 res = _PyObject_CallMethodId(element, &PyId_append, "O", child);
2426 if (res == NULL)
2427 return -1;
2428 Py_DECREF(res);
2429 return 0;
2430 }
2431}
2432
2433/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002434/* handlers */
2435
2436LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002437treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
2438 PyObject* attrib)
2439{
2440 PyObject* node;
2441 PyObject* this;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002442 elementtreestate *st = ET_STATE_GLOBAL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002443
2444 if (self->data) {
2445 if (self->this == self->last) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002446 if (treebuilder_set_element_text(self->last, self->data))
2447 return NULL;
2448 }
2449 else {
2450 if (treebuilder_set_element_tail(self->last, self->data))
2451 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002452 }
2453 self->data = NULL;
2454 }
2455
Eli Bendersky08231a92013-05-18 15:47:16 -07002456 if (self->element_factory && self->element_factory != Py_None) {
Eli Bendersky48d358b2012-05-30 17:57:50 +03002457 node = PyObject_CallFunction(self->element_factory, "OO", tag, attrib);
2458 } else {
2459 node = create_new_element(tag, attrib);
2460 }
2461 if (!node) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002462 return NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002463 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002464
Antoine Pitrouee329312012-10-04 19:53:29 +02002465 this = self->this;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002466
2467 if (this != Py_None) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002468 if (treebuilder_add_subelement(this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002469 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002470 } else {
2471 if (self->root) {
2472 PyErr_SetString(
Eli Bendersky532d03e2013-08-10 08:00:39 -07002473 st->parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002474 "multiple elements on top level"
2475 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002476 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002477 }
2478 Py_INCREF(node);
2479 self->root = node;
2480 }
2481
2482 if (self->index < PyList_GET_SIZE(self->stack)) {
2483 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002484 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002485 Py_INCREF(this);
2486 } else {
2487 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002488 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002489 }
2490 self->index++;
2491
2492 Py_DECREF(this);
2493 Py_INCREF(node);
Antoine Pitrouee329312012-10-04 19:53:29 +02002494 self->this = node;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002495
2496 Py_DECREF(self->last);
2497 Py_INCREF(node);
Antoine Pitrouee329312012-10-04 19:53:29 +02002498 self->last = node;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002499
2500 if (self->start_event_obj) {
2501 PyObject* res;
2502 PyObject* action = self->start_event_obj;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002503 res = PyTuple_Pack(2, action, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002504 if (res) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002505 PyList_Append(self->events, res);
2506 Py_DECREF(res);
2507 } else
2508 PyErr_Clear(); /* FIXME: propagate error */
2509 }
2510
2511 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002512
2513 error:
2514 Py_DECREF(node);
2515 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002516}
2517
2518LOCAL(PyObject*)
2519treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
2520{
2521 if (!self->data) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002522 if (self->last == Py_None) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00002523 /* ignore calls to data before the first call to start */
2524 Py_RETURN_NONE;
2525 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002526 /* store the first item as is */
2527 Py_INCREF(data); self->data = data;
2528 } else {
2529 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00002530 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
2531 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002532 /* XXX this code path unused in Python 3? */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002533 /* expat often generates single character data sections; handle
2534 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00002535 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
2536 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002537 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00002538 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002539 } else if (PyList_CheckExact(self->data)) {
2540 if (PyList_Append(self->data, data) < 0)
2541 return NULL;
2542 } else {
2543 PyObject* list = PyList_New(2);
2544 if (!list)
2545 return NULL;
2546 PyList_SET_ITEM(list, 0, self->data);
2547 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2548 self->data = list;
2549 }
2550 }
2551
2552 Py_RETURN_NONE;
2553}
2554
2555LOCAL(PyObject*)
2556treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2557{
2558 PyObject* item;
2559
2560 if (self->data) {
2561 if (self->this == self->last) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002562 if (treebuilder_set_element_text(self->last, self->data))
2563 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002564 } else {
Antoine Pitrouee329312012-10-04 19:53:29 +02002565 if (treebuilder_set_element_tail(self->last, self->data))
2566 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002567 }
2568 self->data = NULL;
2569 }
2570
2571 if (self->index == 0) {
2572 PyErr_SetString(
2573 PyExc_IndexError,
2574 "pop from empty stack"
2575 );
2576 return NULL;
2577 }
2578
2579 self->index--;
2580
2581 item = PyList_GET_ITEM(self->stack, self->index);
2582 Py_INCREF(item);
2583
2584 Py_DECREF(self->last);
2585
Antoine Pitrouee329312012-10-04 19:53:29 +02002586 self->last = self->this;
2587 self->this = item;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002588
2589 if (self->end_event_obj) {
2590 PyObject* res;
2591 PyObject* action = self->end_event_obj;
2592 PyObject* node = (PyObject*) self->last;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002593 res = PyTuple_Pack(2, action, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002594 if (res) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002595 PyList_Append(self->events, res);
2596 Py_DECREF(res);
2597 } else
2598 PyErr_Clear(); /* FIXME: propagate error */
2599 }
2600
2601 Py_INCREF(self->last);
2602 return (PyObject*) self->last;
2603}
2604
2605LOCAL(void)
2606treebuilder_handle_namespace(TreeBuilderObject* self, int start,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002607 PyObject *prefix, PyObject *uri)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002608{
2609 PyObject* res;
2610 PyObject* action;
2611 PyObject* parcel;
2612
2613 if (!self->events)
2614 return;
2615
2616 if (start) {
2617 if (!self->start_ns_event_obj)
2618 return;
2619 action = self->start_ns_event_obj;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002620 parcel = Py_BuildValue("OO", prefix, uri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002621 if (!parcel)
2622 return;
2623 Py_INCREF(action);
2624 } else {
2625 if (!self->end_ns_event_obj)
2626 return;
2627 action = self->end_ns_event_obj;
2628 Py_INCREF(action);
2629 parcel = Py_None;
2630 Py_INCREF(parcel);
2631 }
2632
2633 res = PyTuple_New(2);
2634
2635 if (res) {
2636 PyTuple_SET_ITEM(res, 0, action);
2637 PyTuple_SET_ITEM(res, 1, parcel);
2638 PyList_Append(self->events, res);
2639 Py_DECREF(res);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002640 }
2641 else {
2642 Py_DECREF(action);
2643 Py_DECREF(parcel);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002644 PyErr_Clear(); /* FIXME: propagate error */
Antoine Pitrouc1948842012-10-01 23:40:37 +02002645 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002646}
2647
2648/* -------------------------------------------------------------------- */
2649/* methods (in alphabetical order) */
2650
2651static PyObject*
2652treebuilder_data(TreeBuilderObject* self, PyObject* args)
2653{
2654 PyObject* data;
2655 if (!PyArg_ParseTuple(args, "O:data", &data))
2656 return NULL;
2657
2658 return treebuilder_handle_data(self, data);
2659}
2660
2661static PyObject*
2662treebuilder_end(TreeBuilderObject* self, PyObject* args)
2663{
2664 PyObject* tag;
2665 if (!PyArg_ParseTuple(args, "O:end", &tag))
2666 return NULL;
2667
2668 return treebuilder_handle_end(self, tag);
2669}
2670
2671LOCAL(PyObject*)
2672treebuilder_done(TreeBuilderObject* self)
2673{
2674 PyObject* res;
2675
2676 /* FIXME: check stack size? */
2677
2678 if (self->root)
2679 res = self->root;
2680 else
2681 res = Py_None;
2682
2683 Py_INCREF(res);
2684 return res;
2685}
2686
2687static PyObject*
2688treebuilder_close(TreeBuilderObject* self, PyObject* args)
2689{
2690 if (!PyArg_ParseTuple(args, ":close"))
2691 return NULL;
2692
2693 return treebuilder_done(self);
2694}
2695
2696static PyObject*
2697treebuilder_start(TreeBuilderObject* self, PyObject* args)
2698{
2699 PyObject* tag;
2700 PyObject* attrib = Py_None;
2701 if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib))
2702 return NULL;
2703
2704 return treebuilder_handle_start(self, tag, attrib);
2705}
2706
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002707static PyMethodDef treebuilder_methods[] = {
2708 {"data", (PyCFunction) treebuilder_data, METH_VARARGS},
2709 {"start", (PyCFunction) treebuilder_start, METH_VARARGS},
2710 {"end", (PyCFunction) treebuilder_end, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002711 {"close", (PyCFunction) treebuilder_close, METH_VARARGS},
2712 {NULL, NULL}
2713};
2714
Neal Norwitz227b5332006-03-22 09:28:35 +00002715static PyTypeObject TreeBuilder_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002716 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002717 "xml.etree.ElementTree.TreeBuilder", sizeof(TreeBuilderObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002718 /* methods */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002719 (destructor)treebuilder_dealloc, /* tp_dealloc */
2720 0, /* tp_print */
2721 0, /* tp_getattr */
2722 0, /* tp_setattr */
2723 0, /* tp_reserved */
2724 0, /* tp_repr */
2725 0, /* tp_as_number */
2726 0, /* tp_as_sequence */
2727 0, /* tp_as_mapping */
2728 0, /* tp_hash */
2729 0, /* tp_call */
2730 0, /* tp_str */
2731 0, /* tp_getattro */
2732 0, /* tp_setattro */
2733 0, /* tp_as_buffer */
Eli Bendersky48d358b2012-05-30 17:57:50 +03002734 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
2735 /* tp_flags */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002736 0, /* tp_doc */
Eli Bendersky48d358b2012-05-30 17:57:50 +03002737 (traverseproc)treebuilder_gc_traverse, /* tp_traverse */
2738 (inquiry)treebuilder_gc_clear, /* tp_clear */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002739 0, /* tp_richcompare */
2740 0, /* tp_weaklistoffset */
2741 0, /* tp_iter */
2742 0, /* tp_iternext */
2743 treebuilder_methods, /* tp_methods */
2744 0, /* tp_members */
2745 0, /* tp_getset */
2746 0, /* tp_base */
2747 0, /* tp_dict */
2748 0, /* tp_descr_get */
2749 0, /* tp_descr_set */
2750 0, /* tp_dictoffset */
2751 (initproc)treebuilder_init, /* tp_init */
2752 PyType_GenericAlloc, /* tp_alloc */
2753 treebuilder_new, /* tp_new */
2754 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002755};
2756
2757/* ==================================================================== */
2758/* the expat interface */
2759
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002760#include "expat.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002761#include "pyexpat.h"
Eli Bendersky532d03e2013-08-10 08:00:39 -07002762
2763/* The PyExpat_CAPI structure is an immutable dispatch table, so it can be
2764 * cached globally without being in per-module state.
2765 */
Eli Bendersky20d41742012-06-01 09:48:37 +03002766static struct PyExpat_CAPI *expat_capi;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002767#define EXPAT(func) (expat_capi->func)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002768
Eli Bendersky52467b12012-06-01 07:13:08 +03002769static XML_Memory_Handling_Suite ExpatMemoryHandler = {
2770 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
2771
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002772typedef struct {
2773 PyObject_HEAD
2774
2775 XML_Parser parser;
2776
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002777 PyObject *target;
2778 PyObject *entity;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002779
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002780 PyObject *names;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002781
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002782 PyObject *handle_start;
2783 PyObject *handle_data;
2784 PyObject *handle_end;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002785
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002786 PyObject *handle_comment;
2787 PyObject *handle_pi;
2788 PyObject *handle_doctype;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002789
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002790 PyObject *handle_close;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002791
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002792} XMLParserObject;
2793
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03002794static PyObject* xmlparser_doctype(XMLParserObject* self, PyObject* args);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002795
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002796/* helpers */
2797
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002798LOCAL(PyObject*)
2799makeuniversal(XMLParserObject* self, const char* string)
2800{
2801 /* convert a UTF-8 tag/attribute name from the expat parser
2802 to a universal name string */
2803
Antoine Pitrouc1948842012-10-01 23:40:37 +02002804 Py_ssize_t size = (Py_ssize_t) strlen(string);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002805 PyObject* key;
2806 PyObject* value;
2807
2808 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00002809 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002810 if (!key)
2811 return NULL;
2812
2813 value = PyDict_GetItem(self->names, key);
2814
2815 if (value) {
2816 Py_INCREF(value);
2817 } else {
2818 /* new name. convert to universal name, and decode as
2819 necessary */
2820
2821 PyObject* tag;
2822 char* p;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002823 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002824
2825 /* look for namespace separator */
2826 for (i = 0; i < size; i++)
2827 if (string[i] == '}')
2828 break;
2829 if (i != size) {
2830 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002831 tag = PyBytes_FromStringAndSize(NULL, size+1);
Victor Stinner71c8b7e2013-07-11 23:08:39 +02002832 if (tag == NULL) {
2833 Py_DECREF(key);
2834 return NULL;
2835 }
Christian Heimes72b710a2008-05-26 13:28:38 +00002836 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002837 p[0] = '{';
2838 memcpy(p+1, string, size);
2839 size++;
2840 } else {
2841 /* plain name; use key as tag */
2842 Py_INCREF(key);
2843 tag = key;
2844 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002845
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002846 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002847 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00002848 value = PyUnicode_DecodeUTF8(p, size, "strict");
2849 Py_DECREF(tag);
2850 if (!value) {
2851 Py_DECREF(key);
2852 return NULL;
2853 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002854
2855 /* add to names dictionary */
2856 if (PyDict_SetItem(self->names, key, value) < 0) {
2857 Py_DECREF(key);
2858 Py_DECREF(value);
2859 return NULL;
2860 }
2861 }
2862
2863 Py_DECREF(key);
2864 return value;
2865}
2866
Eli Bendersky5b77d812012-03-16 08:20:05 +02002867/* Set the ParseError exception with the given parameters.
2868 * If message is not NULL, it's used as the error string. Otherwise, the
2869 * message string is the default for the given error_code.
2870*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002871static void
Eli Bendersky5b77d812012-03-16 08:20:05 +02002872expat_set_error(enum XML_Error error_code, int line, int column, char *message)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002873{
Eli Bendersky5b77d812012-03-16 08:20:05 +02002874 PyObject *errmsg, *error, *position, *code;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002875 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002876
Victor Stinner499dfcf2011-03-21 13:26:24 +01002877 errmsg = PyUnicode_FromFormat("%s: line %d, column %d",
Eli Bendersky5b77d812012-03-16 08:20:05 +02002878 message ? message : EXPAT(ErrorString)(error_code),
2879 line, column);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002880 if (errmsg == NULL)
2881 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002882
Eli Bendersky532d03e2013-08-10 08:00:39 -07002883 error = PyObject_CallFunction(st->parseerror_obj, "O", errmsg);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002884 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002885 if (!error)
2886 return;
2887
Eli Bendersky5b77d812012-03-16 08:20:05 +02002888 /* Add code and position attributes */
2889 code = PyLong_FromLong((long)error_code);
2890 if (!code) {
2891 Py_DECREF(error);
2892 return;
2893 }
2894 if (PyObject_SetAttrString(error, "code", code) == -1) {
2895 Py_DECREF(error);
2896 Py_DECREF(code);
2897 return;
2898 }
2899 Py_DECREF(code);
2900
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002901 position = Py_BuildValue("(ii)", line, column);
2902 if (!position) {
2903 Py_DECREF(error);
2904 return;
2905 }
2906 if (PyObject_SetAttrString(error, "position", position) == -1) {
2907 Py_DECREF(error);
2908 Py_DECREF(position);
2909 return;
2910 }
2911 Py_DECREF(position);
2912
Eli Bendersky532d03e2013-08-10 08:00:39 -07002913 PyErr_SetObject(st->parseerror_obj, error);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002914 Py_DECREF(error);
2915}
2916
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002917/* -------------------------------------------------------------------- */
2918/* handlers */
2919
2920static void
2921expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2922 int data_len)
2923{
2924 PyObject* key;
2925 PyObject* value;
2926 PyObject* res;
2927
2928 if (data_len < 2 || data_in[0] != '&')
2929 return;
2930
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002931 if (PyErr_Occurred())
2932 return;
2933
Neal Norwitz0269b912007-08-08 06:56:02 +00002934 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002935 if (!key)
2936 return;
2937
2938 value = PyDict_GetItem(self->entity, key);
2939
2940 if (value) {
2941 if (TreeBuilder_CheckExact(self->target))
2942 res = treebuilder_handle_data(
2943 (TreeBuilderObject*) self->target, value
2944 );
2945 else if (self->handle_data)
2946 res = PyObject_CallFunction(self->handle_data, "O", value);
2947 else
2948 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002949 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002950 } else if (!PyErr_Occurred()) {
2951 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00002952 char message[128] = "undefined entity ";
2953 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002954 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02002955 XML_ERROR_UNDEFINED_ENTITY,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002956 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02002957 EXPAT(GetErrorColumnNumber)(self->parser),
2958 message
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002959 );
2960 }
2961
2962 Py_DECREF(key);
2963}
2964
2965static void
2966expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2967 const XML_Char **attrib_in)
2968{
2969 PyObject* res;
2970 PyObject* tag;
2971 PyObject* attrib;
2972 int ok;
2973
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002974 if (PyErr_Occurred())
2975 return;
2976
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002977 /* tag name */
2978 tag = makeuniversal(self, tag_in);
2979 if (!tag)
2980 return; /* parser will look for errors */
2981
2982 /* attributes */
2983 if (attrib_in[0]) {
2984 attrib = PyDict_New();
2985 if (!attrib)
2986 return;
2987 while (attrib_in[0] && attrib_in[1]) {
2988 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00002989 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002990 if (!key || !value) {
2991 Py_XDECREF(value);
2992 Py_XDECREF(key);
2993 Py_DECREF(attrib);
2994 return;
2995 }
2996 ok = PyDict_SetItem(attrib, key, value);
2997 Py_DECREF(value);
2998 Py_DECREF(key);
2999 if (ok < 0) {
3000 Py_DECREF(attrib);
3001 return;
3002 }
3003 attrib_in += 2;
3004 }
3005 } else {
Antoine Pitrouc1948842012-10-01 23:40:37 +02003006 /* Pass an empty dictionary on */
Eli Bendersky48d358b2012-05-30 17:57:50 +03003007 attrib = PyDict_New();
3008 if (!attrib)
3009 return;
3010 }
3011
3012 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003013 /* shortcut */
3014 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
3015 tag, attrib);
Eli Bendersky48d358b2012-05-30 17:57:50 +03003016 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003017 else if (self->handle_start) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003018 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003019 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003020 res = NULL;
3021
3022 Py_DECREF(tag);
3023 Py_DECREF(attrib);
3024
3025 Py_XDECREF(res);
3026}
3027
3028static void
3029expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
3030 int data_len)
3031{
3032 PyObject* data;
3033 PyObject* res;
3034
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003035 if (PyErr_Occurred())
3036 return;
3037
Neal Norwitz0269b912007-08-08 06:56:02 +00003038 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003039 if (!data)
3040 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003041
3042 if (TreeBuilder_CheckExact(self->target))
3043 /* shortcut */
3044 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
3045 else if (self->handle_data)
3046 res = PyObject_CallFunction(self->handle_data, "O", data);
3047 else
3048 res = NULL;
3049
3050 Py_DECREF(data);
3051
3052 Py_XDECREF(res);
3053}
3054
3055static void
3056expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
3057{
3058 PyObject* tag;
3059 PyObject* res = NULL;
3060
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003061 if (PyErr_Occurred())
3062 return;
3063
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003064 if (TreeBuilder_CheckExact(self->target))
3065 /* shortcut */
3066 /* the standard tree builder doesn't look at the end tag */
3067 res = treebuilder_handle_end(
3068 (TreeBuilderObject*) self->target, Py_None
3069 );
3070 else if (self->handle_end) {
3071 tag = makeuniversal(self, tag_in);
3072 if (tag) {
3073 res = PyObject_CallFunction(self->handle_end, "O", tag);
3074 Py_DECREF(tag);
3075 }
3076 }
3077
3078 Py_XDECREF(res);
3079}
3080
3081static void
3082expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
3083 const XML_Char *uri)
3084{
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003085 PyObject* sprefix = NULL;
3086 PyObject* suri = NULL;
3087
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003088 if (PyErr_Occurred())
3089 return;
3090
Eli Bendersky5dd40e52013-11-28 06:31:58 -08003091 if (uri)
Eli Bendersky4b795182013-11-28 06:33:21 -08003092 suri = PyUnicode_DecodeUTF8(uri, strlen(uri), "strict");
Eli Bendersky5dd40e52013-11-28 06:31:58 -08003093 else
Eli Bendersky4b795182013-11-28 06:33:21 -08003094 suri = PyUnicode_FromString("");
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003095 if (!suri)
3096 return;
3097
3098 if (prefix)
3099 sprefix = PyUnicode_DecodeUTF8(prefix, strlen(prefix), "strict");
3100 else
3101 sprefix = PyUnicode_FromString("");
3102 if (!sprefix) {
3103 Py_DECREF(suri);
3104 return;
3105 }
3106
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003107 treebuilder_handle_namespace(
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003108 (TreeBuilderObject*) self->target, 1, sprefix, suri
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003109 );
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003110
3111 Py_DECREF(sprefix);
3112 Py_DECREF(suri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003113}
3114
3115static void
3116expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
3117{
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003118 if (PyErr_Occurred())
3119 return;
3120
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003121 treebuilder_handle_namespace(
3122 (TreeBuilderObject*) self->target, 0, NULL, NULL
3123 );
3124}
3125
3126static void
3127expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
3128{
3129 PyObject* comment;
3130 PyObject* res;
3131
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003132 if (PyErr_Occurred())
3133 return;
3134
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003135 if (self->handle_comment) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003136 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003137 if (comment) {
3138 res = PyObject_CallFunction(self->handle_comment, "O", comment);
3139 Py_XDECREF(res);
3140 Py_DECREF(comment);
3141 }
3142 }
3143}
3144
Eli Bendersky45839902013-01-13 05:14:47 -08003145static void
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003146expat_start_doctype_handler(XMLParserObject *self,
3147 const XML_Char *doctype_name,
3148 const XML_Char *sysid,
3149 const XML_Char *pubid,
3150 int has_internal_subset)
3151{
3152 PyObject *self_pyobj = (PyObject *)self;
3153 PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
3154 PyObject *parser_doctype = NULL;
3155 PyObject *res = NULL;
3156
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003157 if (PyErr_Occurred())
3158 return;
3159
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003160 doctype_name_obj = makeuniversal(self, doctype_name);
3161 if (!doctype_name_obj)
3162 return;
3163
3164 if (sysid) {
3165 sysid_obj = makeuniversal(self, sysid);
3166 if (!sysid_obj) {
3167 Py_DECREF(doctype_name_obj);
3168 return;
3169 }
3170 } else {
3171 Py_INCREF(Py_None);
3172 sysid_obj = Py_None;
3173 }
3174
3175 if (pubid) {
3176 pubid_obj = makeuniversal(self, pubid);
3177 if (!pubid_obj) {
3178 Py_DECREF(doctype_name_obj);
3179 Py_DECREF(sysid_obj);
3180 return;
3181 }
3182 } else {
3183 Py_INCREF(Py_None);
3184 pubid_obj = Py_None;
3185 }
3186
3187 /* If the target has a handler for doctype, call it. */
3188 if (self->handle_doctype) {
3189 res = PyObject_CallFunction(self->handle_doctype, "OOO",
3190 doctype_name_obj, pubid_obj, sysid_obj);
3191 Py_CLEAR(res);
3192 }
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003193 else {
3194 /* Now see if the parser itself has a doctype method. If yes and it's
3195 * a custom method, call it but warn about deprecation. If it's only
3196 * the vanilla XMLParser method, do nothing.
3197 */
3198 parser_doctype = PyObject_GetAttrString(self_pyobj, "doctype");
3199 if (parser_doctype &&
3200 !(PyCFunction_Check(parser_doctype) &&
3201 PyCFunction_GET_SELF(parser_doctype) == self_pyobj &&
3202 PyCFunction_GET_FUNCTION(parser_doctype) ==
3203 (PyCFunction) xmlparser_doctype)) {
3204 res = xmlparser_doctype(self, NULL);
3205 if (!res)
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003206 goto clear;
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003207 Py_DECREF(res);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003208 res = PyObject_CallFunction(parser_doctype, "OOO",
3209 doctype_name_obj, pubid_obj, sysid_obj);
3210 Py_CLEAR(res);
3211 }
3212 }
3213
3214clear:
3215 Py_XDECREF(parser_doctype);
3216 Py_DECREF(doctype_name_obj);
3217 Py_DECREF(pubid_obj);
3218 Py_DECREF(sysid_obj);
3219}
3220
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003221static void
3222expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
3223 const XML_Char* data_in)
3224{
3225 PyObject* target;
3226 PyObject* data;
3227 PyObject* res;
3228
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003229 if (PyErr_Occurred())
3230 return;
3231
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003232 if (self->handle_pi) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003233 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3234 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003235 if (target && data) {
3236 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
3237 Py_XDECREF(res);
3238 Py_DECREF(data);
3239 Py_DECREF(target);
3240 } else {
3241 Py_XDECREF(data);
3242 Py_XDECREF(target);
3243 }
3244 }
3245}
3246
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003247/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003248
Eli Bendersky52467b12012-06-01 07:13:08 +03003249static PyObject *
3250xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003251{
Eli Bendersky52467b12012-06-01 07:13:08 +03003252 XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
3253 if (self) {
3254 self->parser = NULL;
3255 self->target = self->entity = self->names = NULL;
3256 self->handle_start = self->handle_data = self->handle_end = NULL;
3257 self->handle_comment = self->handle_pi = self->handle_close = NULL;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003258 self->handle_doctype = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003259 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003260 return (PyObject *)self;
3261}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003262
Eli Bendersky52467b12012-06-01 07:13:08 +03003263static int
3264xmlparser_init(PyObject *self, PyObject *args, PyObject *kwds)
3265{
3266 XMLParserObject *self_xp = (XMLParserObject *)self;
3267 PyObject *target = NULL, *html = NULL;
3268 char *encoding = NULL;
Eli Benderskyc68e1362012-06-03 06:09:42 +03003269 static char *kwlist[] = {"html", "target", "encoding", 0};
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003270
Eli Bendersky52467b12012-06-01 07:13:08 +03003271 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|OOz:XMLParser", kwlist,
3272 &html, &target, &encoding)) {
3273 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003274 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003275
Eli Bendersky52467b12012-06-01 07:13:08 +03003276 self_xp->entity = PyDict_New();
3277 if (!self_xp->entity)
3278 return -1;
3279
3280 self_xp->names = PyDict_New();
3281 if (!self_xp->names) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02003282 Py_CLEAR(self_xp->entity);
Eli Bendersky52467b12012-06-01 07:13:08 +03003283 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003284 }
3285
Eli Bendersky52467b12012-06-01 07:13:08 +03003286 self_xp->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
3287 if (!self_xp->parser) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02003288 Py_CLEAR(self_xp->entity);
3289 Py_CLEAR(self_xp->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003290 PyErr_NoMemory();
Eli Bendersky52467b12012-06-01 07:13:08 +03003291 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003292 }
3293
Eli Bendersky52467b12012-06-01 07:13:08 +03003294 if (target) {
3295 Py_INCREF(target);
3296 } else {
Eli Bendersky58d548d2012-05-29 15:45:16 +03003297 target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003298 if (!target) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02003299 Py_CLEAR(self_xp->entity);
3300 Py_CLEAR(self_xp->names);
Eli Bendersky52467b12012-06-01 07:13:08 +03003301 EXPAT(ParserFree)(self_xp->parser);
3302 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003303 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003304 }
3305 self_xp->target = target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003306
Eli Bendersky52467b12012-06-01 07:13:08 +03003307 self_xp->handle_start = PyObject_GetAttrString(target, "start");
3308 self_xp->handle_data = PyObject_GetAttrString(target, "data");
3309 self_xp->handle_end = PyObject_GetAttrString(target, "end");
3310 self_xp->handle_comment = PyObject_GetAttrString(target, "comment");
3311 self_xp->handle_pi = PyObject_GetAttrString(target, "pi");
3312 self_xp->handle_close = PyObject_GetAttrString(target, "close");
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003313 self_xp->handle_doctype = PyObject_GetAttrString(target, "doctype");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003314
3315 PyErr_Clear();
Eli Bendersky45839902013-01-13 05:14:47 -08003316
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003317 /* configure parser */
Eli Bendersky52467b12012-06-01 07:13:08 +03003318 EXPAT(SetUserData)(self_xp->parser, self_xp);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003319 EXPAT(SetElementHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003320 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003321 (XML_StartElementHandler) expat_start_handler,
3322 (XML_EndElementHandler) expat_end_handler
3323 );
3324 EXPAT(SetDefaultHandlerExpand)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003325 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003326 (XML_DefaultHandler) expat_default_handler
3327 );
3328 EXPAT(SetCharacterDataHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003329 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003330 (XML_CharacterDataHandler) expat_data_handler
3331 );
Eli Bendersky52467b12012-06-01 07:13:08 +03003332 if (self_xp->handle_comment)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003333 EXPAT(SetCommentHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003334 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003335 (XML_CommentHandler) expat_comment_handler
3336 );
Eli Bendersky52467b12012-06-01 07:13:08 +03003337 if (self_xp->handle_pi)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003338 EXPAT(SetProcessingInstructionHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003339 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003340 (XML_ProcessingInstructionHandler) expat_pi_handler
3341 );
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003342 EXPAT(SetStartDoctypeDeclHandler)(
3343 self_xp->parser,
3344 (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
3345 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003346 EXPAT(SetUnknownEncodingHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003347 self_xp->parser,
Eli Bendersky6dc32b32013-05-25 05:25:48 -07003348 EXPAT(DefaultUnknownEncodingHandler), NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003349 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003350
Eli Bendersky52467b12012-06-01 07:13:08 +03003351 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003352}
3353
Eli Bendersky52467b12012-06-01 07:13:08 +03003354static int
3355xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
3356{
3357 Py_VISIT(self->handle_close);
3358 Py_VISIT(self->handle_pi);
3359 Py_VISIT(self->handle_comment);
3360 Py_VISIT(self->handle_end);
3361 Py_VISIT(self->handle_data);
3362 Py_VISIT(self->handle_start);
3363
3364 Py_VISIT(self->target);
3365 Py_VISIT(self->entity);
3366 Py_VISIT(self->names);
3367
3368 return 0;
3369}
3370
3371static int
3372xmlparser_gc_clear(XMLParserObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003373{
3374 EXPAT(ParserFree)(self->parser);
3375
Antoine Pitrouc1948842012-10-01 23:40:37 +02003376 Py_CLEAR(self->handle_close);
3377 Py_CLEAR(self->handle_pi);
3378 Py_CLEAR(self->handle_comment);
3379 Py_CLEAR(self->handle_end);
3380 Py_CLEAR(self->handle_data);
3381 Py_CLEAR(self->handle_start);
3382 Py_CLEAR(self->handle_doctype);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003383
Antoine Pitrouc1948842012-10-01 23:40:37 +02003384 Py_CLEAR(self->target);
3385 Py_CLEAR(self->entity);
3386 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003387
Eli Bendersky52467b12012-06-01 07:13:08 +03003388 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003389}
3390
Eli Bendersky52467b12012-06-01 07:13:08 +03003391static void
3392xmlparser_dealloc(XMLParserObject* self)
3393{
3394 PyObject_GC_UnTrack(self);
3395 xmlparser_gc_clear(self);
3396 Py_TYPE(self)->tp_free((PyObject *)self);
3397}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003398
3399LOCAL(PyObject*)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003400expat_parse(XMLParserObject* self, const char* data, int data_len, int final)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003401{
3402 int ok;
3403
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003404 assert(!PyErr_Occurred());
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003405 ok = EXPAT(Parse)(self->parser, data, data_len, final);
3406
3407 if (PyErr_Occurred())
3408 return NULL;
3409
3410 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003411 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02003412 EXPAT(GetErrorCode)(self->parser),
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003413 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02003414 EXPAT(GetErrorColumnNumber)(self->parser),
3415 NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003416 );
3417 return NULL;
3418 }
3419
3420 Py_RETURN_NONE;
3421}
3422
3423static PyObject*
3424xmlparser_close(XMLParserObject* self, PyObject* args)
3425{
3426 /* end feeding data to parser */
3427
3428 PyObject* res;
3429 if (!PyArg_ParseTuple(args, ":close"))
3430 return NULL;
3431
3432 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003433 if (!res)
3434 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003435
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003436 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003437 Py_DECREF(res);
3438 return treebuilder_done((TreeBuilderObject*) self->target);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003439 }
3440 else if (self->handle_close) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003441 Py_DECREF(res);
3442 return PyObject_CallFunction(self->handle_close, "");
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003443 }
3444 else {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003445 return res;
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003446 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003447}
3448
3449static PyObject*
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003450xmlparser_feed(XMLParserObject* self, PyObject* arg)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003451{
3452 /* feed data to parser */
3453
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003454 if (PyUnicode_Check(arg)) {
3455 Py_ssize_t data_len;
3456 const char *data = PyUnicode_AsUTF8AndSize(arg, &data_len);
3457 if (data == NULL)
3458 return NULL;
3459 if (data_len > INT_MAX) {
3460 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3461 return NULL;
3462 }
3463 /* Explicitly set UTF-8 encoding. Return code ignored. */
3464 (void)EXPAT(SetEncoding)(self->parser, "utf-8");
3465 return expat_parse(self, data, (int)data_len, 0);
3466 }
3467 else {
3468 Py_buffer view;
3469 PyObject *res;
3470 if (PyObject_GetBuffer(arg, &view, PyBUF_SIMPLE) < 0)
3471 return NULL;
3472 if (view.len > INT_MAX) {
3473 PyBuffer_Release(&view);
3474 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3475 return NULL;
3476 }
3477 res = expat_parse(self, view.buf, (int)view.len, 0);
3478 PyBuffer_Release(&view);
3479 return res;
3480 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003481}
3482
3483static PyObject*
Eli Benderskya3699232013-05-19 18:47:23 -07003484xmlparser_parse_whole(XMLParserObject* self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003485{
Eli Benderskya3699232013-05-19 18:47:23 -07003486 /* (internal) parse the whole input, until end of stream */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003487 PyObject* reader;
3488 PyObject* buffer;
Eli Benderskyf996e772012-03-16 05:53:30 +02003489 PyObject* temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003490 PyObject* res;
3491
3492 PyObject* fileobj;
3493 if (!PyArg_ParseTuple(args, "O:_parse", &fileobj))
3494 return NULL;
3495
3496 reader = PyObject_GetAttrString(fileobj, "read");
3497 if (!reader)
3498 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003499
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003500 /* read from open file object */
3501 for (;;) {
3502
3503 buffer = PyObject_CallFunction(reader, "i", 64*1024);
3504
3505 if (!buffer) {
3506 /* read failed (e.g. due to KeyboardInterrupt) */
3507 Py_DECREF(reader);
3508 return NULL;
3509 }
3510
Eli Benderskyf996e772012-03-16 05:53:30 +02003511 if (PyUnicode_CheckExact(buffer)) {
3512 /* A unicode object is encoded into bytes using UTF-8 */
Victor Stinner59799a82013-11-13 14:17:30 +01003513 if (PyUnicode_GET_LENGTH(buffer) == 0) {
Eli Benderskyf996e772012-03-16 05:53:30 +02003514 Py_DECREF(buffer);
3515 break;
3516 }
3517 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
Antoine Pitrouc1948842012-10-01 23:40:37 +02003518 Py_DECREF(buffer);
Eli Benderskyf996e772012-03-16 05:53:30 +02003519 if (!temp) {
3520 /* Propagate exception from PyUnicode_AsEncodedString */
Eli Benderskyf996e772012-03-16 05:53:30 +02003521 Py_DECREF(reader);
3522 return NULL;
3523 }
Eli Benderskyf996e772012-03-16 05:53:30 +02003524 buffer = temp;
3525 }
3526 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003527 Py_DECREF(buffer);
3528 break;
3529 }
3530
3531 res = expat_parse(
Christian Heimes72b710a2008-05-26 13:28:38 +00003532 self, PyBytes_AS_STRING(buffer), PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003533 );
3534
3535 Py_DECREF(buffer);
3536
3537 if (!res) {
3538 Py_DECREF(reader);
3539 return NULL;
3540 }
3541 Py_DECREF(res);
3542
3543 }
3544
3545 Py_DECREF(reader);
3546
3547 res = expat_parse(self, "", 0, 1);
3548
3549 if (res && TreeBuilder_CheckExact(self->target)) {
3550 Py_DECREF(res);
3551 return treebuilder_done((TreeBuilderObject*) self->target);
3552 }
3553
3554 return res;
3555}
3556
3557static PyObject*
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003558xmlparser_doctype(XMLParserObject *self, PyObject *args)
3559{
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003560 if (PyErr_WarnEx(PyExc_DeprecationWarning,
3561 "This method of XMLParser is deprecated. Define"
3562 " doctype() method on the TreeBuilder target.",
3563 1) < 0) {
3564 return NULL;
3565 }
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003566 Py_RETURN_NONE;
3567}
3568
3569static PyObject*
3570xmlparser_setevents(XMLParserObject *self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003571{
3572 /* activate element event reporting */
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003573 Py_ssize_t i, seqlen;
3574 TreeBuilderObject *target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003575
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003576 PyObject *events_queue;
3577 PyObject *events_to_report = Py_None;
3578 PyObject *events_seq;
3579 if (!PyArg_ParseTuple(args, "O!|O:_setevents", &PyList_Type, &events_queue,
3580 &events_to_report))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003581 return NULL;
3582
3583 if (!TreeBuilder_CheckExact(self->target)) {
3584 PyErr_SetString(
3585 PyExc_TypeError,
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003586 "event handling only supported for ElementTree.TreeBuilder "
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003587 "targets"
3588 );
3589 return NULL;
3590 }
3591
3592 target = (TreeBuilderObject*) self->target;
3593
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003594 Py_INCREF(events_queue);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003595 Py_XDECREF(target->events);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003596 target->events = events_queue;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003597
3598 /* clear out existing events */
Antoine Pitrouc1948842012-10-01 23:40:37 +02003599 Py_CLEAR(target->start_event_obj);
3600 Py_CLEAR(target->end_event_obj);
3601 Py_CLEAR(target->start_ns_event_obj);
3602 Py_CLEAR(target->end_ns_event_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003603
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003604 if (events_to_report == Py_None) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003605 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003606 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003607 Py_RETURN_NONE;
3608 }
3609
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003610 if (!(events_seq = PySequence_Fast(events_to_report,
3611 "events must be a sequence"))) {
3612 return NULL;
3613 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003614
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003615 seqlen = PySequence_Size(events_seq);
3616 for (i = 0; i < seqlen; ++i) {
3617 PyObject *event_name_obj = PySequence_Fast_GET_ITEM(events_seq, i);
3618 char *event_name = NULL;
3619 if (PyUnicode_Check(event_name_obj)) {
3620 event_name = _PyUnicode_AsString(event_name_obj);
3621 } else if (PyBytes_Check(event_name_obj)) {
3622 event_name = PyBytes_AS_STRING(event_name_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003623 }
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003624
3625 if (event_name == NULL) {
3626 Py_DECREF(events_seq);
3627 PyErr_Format(PyExc_ValueError, "invalid events sequence");
3628 return NULL;
3629 } else if (strcmp(event_name, "start") == 0) {
3630 Py_INCREF(event_name_obj);
3631 target->start_event_obj = event_name_obj;
3632 } else if (strcmp(event_name, "end") == 0) {
3633 Py_INCREF(event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003634 Py_XDECREF(target->end_event_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003635 target->end_event_obj = event_name_obj;
3636 } else if (strcmp(event_name, "start-ns") == 0) {
3637 Py_INCREF(event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003638 Py_XDECREF(target->start_ns_event_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003639 target->start_ns_event_obj = event_name_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003640 EXPAT(SetNamespaceDeclHandler)(
3641 self->parser,
3642 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3643 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3644 );
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003645 } else if (strcmp(event_name, "end-ns") == 0) {
3646 Py_INCREF(event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003647 Py_XDECREF(target->end_ns_event_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003648 target->end_ns_event_obj = event_name_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003649 EXPAT(SetNamespaceDeclHandler)(
3650 self->parser,
3651 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3652 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3653 );
3654 } else {
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003655 Py_DECREF(events_seq);
3656 PyErr_Format(PyExc_ValueError, "unknown event '%s'", event_name);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003657 return NULL;
3658 }
3659 }
3660
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003661 Py_DECREF(events_seq);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003662 Py_RETURN_NONE;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003663}
3664
3665static PyMethodDef xmlparser_methods[] = {
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003666 {"feed", (PyCFunction) xmlparser_feed, METH_O},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003667 {"close", (PyCFunction) xmlparser_close, METH_VARARGS},
Eli Benderskya3699232013-05-19 18:47:23 -07003668 {"_parse_whole", (PyCFunction) xmlparser_parse_whole, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003669 {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003670 {"doctype", (PyCFunction) xmlparser_doctype, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003671 {NULL, NULL}
3672};
3673
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003674static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003675xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003676{
Alexander Belopolskye239d232010-12-08 23:31:48 +00003677 if (PyUnicode_Check(nameobj)) {
3678 PyObject* res;
3679 if (PyUnicode_CompareWithASCIIString(nameobj, "entity") == 0)
3680 res = self->entity;
3681 else if (PyUnicode_CompareWithASCIIString(nameobj, "target") == 0)
3682 res = self->target;
3683 else if (PyUnicode_CompareWithASCIIString(nameobj, "version") == 0) {
3684 return PyUnicode_FromFormat(
3685 "Expat %d.%d.%d", XML_MAJOR_VERSION,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003686 XML_MINOR_VERSION, XML_MICRO_VERSION);
Alexander Belopolskye239d232010-12-08 23:31:48 +00003687 }
3688 else
3689 goto generic;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003690
Alexander Belopolskye239d232010-12-08 23:31:48 +00003691 Py_INCREF(res);
3692 return res;
3693 }
3694 generic:
3695 return PyObject_GenericGetAttr((PyObject*) self, nameobj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003696}
3697
Neal Norwitz227b5332006-03-22 09:28:35 +00003698static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003699 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08003700 "xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003701 /* methods */
Eli Bendersky52467b12012-06-01 07:13:08 +03003702 (destructor)xmlparser_dealloc, /* tp_dealloc */
3703 0, /* tp_print */
3704 0, /* tp_getattr */
3705 0, /* tp_setattr */
3706 0, /* tp_reserved */
3707 0, /* tp_repr */
3708 0, /* tp_as_number */
3709 0, /* tp_as_sequence */
3710 0, /* tp_as_mapping */
3711 0, /* tp_hash */
3712 0, /* tp_call */
3713 0, /* tp_str */
3714 (getattrofunc)xmlparser_getattro, /* tp_getattro */
3715 0, /* tp_setattro */
3716 0, /* tp_as_buffer */
3717 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3718 /* tp_flags */
3719 0, /* tp_doc */
3720 (traverseproc)xmlparser_gc_traverse, /* tp_traverse */
3721 (inquiry)xmlparser_gc_clear, /* tp_clear */
3722 0, /* tp_richcompare */
3723 0, /* tp_weaklistoffset */
3724 0, /* tp_iter */
3725 0, /* tp_iternext */
3726 xmlparser_methods, /* tp_methods */
3727 0, /* tp_members */
3728 0, /* tp_getset */
3729 0, /* tp_base */
3730 0, /* tp_dict */
3731 0, /* tp_descr_get */
3732 0, /* tp_descr_set */
3733 0, /* tp_dictoffset */
3734 (initproc)xmlparser_init, /* tp_init */
3735 PyType_GenericAlloc, /* tp_alloc */
3736 xmlparser_new, /* tp_new */
3737 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003738};
3739
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003740/* ==================================================================== */
3741/* python module interface */
3742
3743static PyMethodDef _functions[] = {
Eli Benderskya8736902013-01-05 06:26:39 -08003744 {"SubElement", (PyCFunction) subelement, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003745 {NULL, NULL}
3746};
3747
Martin v. Löwis1a214512008-06-11 05:26:20 +00003748
Eli Bendersky532d03e2013-08-10 08:00:39 -07003749static struct PyModuleDef elementtreemodule = {
3750 PyModuleDef_HEAD_INIT,
3751 "_elementtree",
3752 NULL,
3753 sizeof(elementtreestate),
3754 _functions,
3755 NULL,
3756 elementtree_traverse,
3757 elementtree_clear,
3758 elementtree_free
Martin v. Löwis1a214512008-06-11 05:26:20 +00003759};
3760
Neal Norwitzf6657e62006-12-28 04:47:50 +00003761PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00003762PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003763{
Eli Bendersky64d11e62012-06-15 07:42:50 +03003764 PyObject *m, *temp;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003765 elementtreestate *st;
3766
3767 m = PyState_FindModule(&elementtreemodule);
3768 if (m) {
3769 Py_INCREF(m);
3770 return m;
3771 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003772
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003773 /* Initialize object types */
Ronald Oussoren138d0802013-07-19 11:11:25 +02003774 if (PyType_Ready(&ElementIter_Type) < 0)
3775 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003776 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003777 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003778 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003779 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003780 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003781 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003782
Eli Bendersky532d03e2013-08-10 08:00:39 -07003783 m = PyModule_Create(&elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003784 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00003785 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003786 st = ET_STATE(m);
Martin v. Löwis1a214512008-06-11 05:26:20 +00003787
Eli Bendersky828efde2012-04-05 05:40:58 +03003788 if (!(temp = PyImport_ImportModule("copy")))
3789 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003790 st->deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
Eli Bendersky828efde2012-04-05 05:40:58 +03003791 Py_XDECREF(temp);
3792
Eli Bendersky532d03e2013-08-10 08:00:39 -07003793 if (!(st->elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
Eli Bendersky828efde2012-04-05 05:40:58 +03003794 return NULL;
3795
Eli Bendersky20d41742012-06-01 09:48:37 +03003796 /* link against pyexpat */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003797 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
3798 if (expat_capi) {
3799 /* check that it's usable */
3800 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
3801 expat_capi->size < sizeof(struct PyExpat_CAPI) ||
3802 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
3803 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
Eli Bendersky52467b12012-06-01 07:13:08 +03003804 expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
Eli Benderskyef391ac2012-07-21 20:28:46 +03003805 PyErr_SetString(PyExc_ImportError,
3806 "pyexpat version is incompatible");
3807 return NULL;
Eli Bendersky52467b12012-06-01 07:13:08 +03003808 }
Eli Benderskyef391ac2012-07-21 20:28:46 +03003809 } else {
Eli Bendersky52467b12012-06-01 07:13:08 +03003810 return NULL;
Eli Benderskyef391ac2012-07-21 20:28:46 +03003811 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003812
Eli Bendersky532d03e2013-08-10 08:00:39 -07003813 st->parseerror_obj = PyErr_NewException(
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003814 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003815 );
Eli Bendersky532d03e2013-08-10 08:00:39 -07003816 Py_INCREF(st->parseerror_obj);
3817 PyModule_AddObject(m, "ParseError", st->parseerror_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003818
Eli Bendersky092af1f2012-03-04 07:14:03 +02003819 Py_INCREF((PyObject *)&Element_Type);
3820 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
3821
Eli Bendersky58d548d2012-05-29 15:45:16 +03003822 Py_INCREF((PyObject *)&TreeBuilder_Type);
3823 PyModule_AddObject(m, "TreeBuilder", (PyObject *)&TreeBuilder_Type);
3824
Eli Bendersky52467b12012-06-01 07:13:08 +03003825 Py_INCREF((PyObject *)&XMLParser_Type);
3826 PyModule_AddObject(m, "XMLParser", (PyObject *)&XMLParser_Type);
Eli Bendersky52467b12012-06-01 07:13:08 +03003827
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003828 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003829}