blob: b3b69767086c2797cda702c7bec9688860c445be [file] [log] [blame]
Eli Benderskybf05df22013-04-20 05:44:01 -07001/*--------------------------------------------------------------------
2 * Licensed to PSF under a Contributor Agreement.
3 * See http://www.python.org/psf/license for licensing details.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
Eli Benderskybf05df22013-04-20 05:44:01 -07005 * _elementtree - C accelerator for xml.etree.ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00006 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
7 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00008 *
9 * info@pythonware.com
10 * http://www.pythonware.com
Eli Benderskybf05df22013-04-20 05:44:01 -070011 *--------------------------------------------------------------------
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000012 */
13
14#include "Python.h"
Eli Benderskyebf37a22012-04-03 22:02:37 +030015#include "structmember.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000016
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000017/* -------------------------------------------------------------------- */
18/* configuration */
19
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000020/* An element can hold this many children without extra memory
21 allocations. */
22#define STATIC_CHILDREN 4
23
24/* For best performance, chose a value so that 80-90% of all nodes
25 have no more than the given number of children. Set this to zero
26 to minimize the size of the element structure itself (this only
27 helps if you have lots of leaf nodes with attributes). */
28
29/* Also note that pymalloc always allocates blocks in multiples of
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010030 eight bytes. For the current C version of ElementTree, this means
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000031 that the number of children should be an even number, at least on
32 32-bit platforms. */
33
34/* -------------------------------------------------------------------- */
35
36#if 0
37static int memory = 0;
38#define ALLOC(size, comment)\
39do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
40#define RELEASE(size, comment)\
41do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
42#else
43#define ALLOC(size, comment)
44#define RELEASE(size, comment)
45#endif
46
47/* compiler tweaks */
48#if defined(_MSC_VER)
49#define LOCAL(type) static __inline type __fastcall
50#else
51#define LOCAL(type) static type
52#endif
53
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000054/* macros used to store 'join' flags in string object pointers. note
55 that all use of text and tail as object pointers must be wrapped in
56 JOIN_OBJ. see comments in the ElementObject definition for more
57 info. */
58#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
59#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
Antoine Pitrouca8aa4a2012-09-20 20:56:47 +020060#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~(Py_uintptr_t)1))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000061
Eli Benderskydd3661e2013-09-13 06:24:25 -070062/* Py_CLEAR for a PyObject* that uses a join flag. Pass the pointer by
63 * reference since this function sets it to NULL.
64*/
doko@ubuntu.com0648bf72013-09-18 12:12:28 +020065static void _clear_joined_ptr(PyObject **p)
Eli Benderskydd3661e2013-09-13 06:24:25 -070066{
67 if (*p) {
68 PyObject *tmp = JOIN_OBJ(*p);
69 *p = NULL;
70 Py_DECREF(tmp);
71 }
72}
73
Ronald Oussoren138d0802013-07-19 11:11:25 +020074/* Types defined by this extension */
75static PyTypeObject Element_Type;
76static PyTypeObject ElementIter_Type;
77static PyTypeObject TreeBuilder_Type;
78static PyTypeObject XMLParser_Type;
79
80
Eli Bendersky532d03e2013-08-10 08:00:39 -070081/* Per-module state; PEP 3121 */
82typedef struct {
83 PyObject *parseerror_obj;
84 PyObject *deepcopy_obj;
85 PyObject *elementpath_obj;
86} elementtreestate;
87
88static struct PyModuleDef elementtreemodule;
89
90/* Given a module object (assumed to be _elementtree), get its per-module
91 * state.
92 */
93#define ET_STATE(mod) ((elementtreestate *) PyModule_GetState(mod))
94
95/* Find the module instance imported in the currently running sub-interpreter
96 * and get its state.
97 */
98#define ET_STATE_GLOBAL \
99 ((elementtreestate *) PyModule_GetState(PyState_FindModule(&elementtreemodule)))
100
101static int
102elementtree_clear(PyObject *m)
103{
104 elementtreestate *st = ET_STATE(m);
105 Py_CLEAR(st->parseerror_obj);
106 Py_CLEAR(st->deepcopy_obj);
107 Py_CLEAR(st->elementpath_obj);
108 return 0;
109}
110
111static int
112elementtree_traverse(PyObject *m, visitproc visit, void *arg)
113{
114 elementtreestate *st = ET_STATE(m);
115 Py_VISIT(st->parseerror_obj);
116 Py_VISIT(st->deepcopy_obj);
117 Py_VISIT(st->elementpath_obj);
118 return 0;
119}
120
121static void
122elementtree_free(void *m)
123{
124 elementtree_clear((PyObject *)m);
125}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000126
127/* helpers */
128
129LOCAL(PyObject*)
130deepcopy(PyObject* object, PyObject* memo)
131{
132 /* do a deep copy of the given object */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000133 PyObject* args;
134 PyObject* result;
Eli Bendersky532d03e2013-08-10 08:00:39 -0700135 elementtreestate *st = ET_STATE_GLOBAL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000136
Eli Bendersky532d03e2013-08-10 08:00:39 -0700137 if (!st->deepcopy_obj) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000138 PyErr_SetString(
139 PyExc_RuntimeError,
140 "deepcopy helper not found"
141 );
142 return NULL;
143 }
144
Antoine Pitrouc1948842012-10-01 23:40:37 +0200145 args = PyTuple_Pack(2, object, memo);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000146 if (!args)
147 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -0700148 result = PyObject_CallObject(st->deepcopy_obj, args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000149 Py_DECREF(args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000150 return result;
151}
152
153LOCAL(PyObject*)
154list_join(PyObject* list)
155{
156 /* join list elements (destroying the list in the process) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000157 PyObject* joiner;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000158 PyObject* result;
159
Antoine Pitrouc1948842012-10-01 23:40:37 +0200160 joiner = PyUnicode_FromStringAndSize("", 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000161 if (!joiner)
162 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200163 result = PyUnicode_Join(joiner, list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000164 Py_DECREF(joiner);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200165 if (result)
166 Py_DECREF(list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000167 return result;
168}
169
Eli Bendersky48d358b2012-05-30 17:57:50 +0300170/* Is the given object an empty dictionary?
171*/
172static int
173is_empty_dict(PyObject *obj)
174{
175 return PyDict_CheckExact(obj) && PyDict_Size(obj) == 0;
176}
177
178
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000179/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200180/* the Element type */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000181
182typedef struct {
183
184 /* attributes (a dictionary object), or None if no attributes */
185 PyObject* attrib;
186
187 /* child elements */
188 int length; /* actual number of items */
189 int allocated; /* allocated items */
190
191 /* this either points to _children or to a malloced buffer */
192 PyObject* *children;
193
194 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100195
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000196} ElementObjectExtra;
197
198typedef struct {
199 PyObject_HEAD
200
201 /* element tag (a string). */
202 PyObject* tag;
203
204 /* text before first child. note that this is a tagged pointer;
205 use JOIN_OBJ to get the object pointer. the join flag is used
206 to distinguish lists created by the tree builder from lists
207 assigned to the attribute by application code; the former
208 should be joined before being returned to the user, the latter
209 should be left intact. */
210 PyObject* text;
211
212 /* text after this element, in parent. note that this is a tagged
213 pointer; use JOIN_OBJ to get the object pointer. */
214 PyObject* tail;
215
216 ElementObjectExtra* extra;
217
Eli Benderskyebf37a22012-04-03 22:02:37 +0300218 PyObject *weakreflist; /* For tp_weaklistoffset */
219
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000220} ElementObject;
221
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000222
Christian Heimes90aa7642007-12-19 02:45:37 +0000223#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000224
225/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200226/* Element constructors and destructor */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000227
228LOCAL(int)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200229create_extra(ElementObject* self, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000230{
231 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
Victor Stinner81aac732013-07-12 02:03:34 +0200232 if (!self->extra) {
233 PyErr_NoMemory();
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000234 return -1;
Victor Stinner81aac732013-07-12 02:03:34 +0200235 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000236
237 if (!attrib)
238 attrib = Py_None;
239
240 Py_INCREF(attrib);
241 self->extra->attrib = attrib;
242
243 self->extra->length = 0;
244 self->extra->allocated = STATIC_CHILDREN;
245 self->extra->children = self->extra->_children;
246
247 return 0;
248}
249
250LOCAL(void)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200251dealloc_extra(ElementObject* self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000252{
Eli Bendersky08b85292012-04-04 15:55:07 +0300253 ElementObjectExtra *myextra;
254 int i;
255
Eli Benderskyebf37a22012-04-03 22:02:37 +0300256 if (!self->extra)
257 return;
258
259 /* Avoid DECREFs calling into this code again (cycles, etc.)
260 */
Eli Bendersky08b85292012-04-04 15:55:07 +0300261 myextra = self->extra;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300262 self->extra = NULL;
263
264 Py_DECREF(myextra->attrib);
265
Eli Benderskyebf37a22012-04-03 22:02:37 +0300266 for (i = 0; i < myextra->length; i++)
267 Py_DECREF(myextra->children[i]);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000268
Eli Benderskyebf37a22012-04-03 22:02:37 +0300269 if (myextra->children != myextra->_children)
270 PyObject_Free(myextra->children);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000271
Eli Benderskyebf37a22012-04-03 22:02:37 +0300272 PyObject_Free(myextra);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000273}
274
Eli Bendersky092af1f2012-03-04 07:14:03 +0200275/* Convenience internal function to create new Element objects with the given
276 * tag and attributes.
277*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000278LOCAL(PyObject*)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200279create_new_element(PyObject* tag, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000280{
281 ElementObject* self;
282
Eli Bendersky0192ba32012-03-30 16:38:33 +0300283 self = PyObject_GC_New(ElementObject, &Element_Type);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000284 if (self == NULL)
285 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000286 self->extra = NULL;
287
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000288 Py_INCREF(tag);
289 self->tag = tag;
290
291 Py_INCREF(Py_None);
292 self->text = Py_None;
293
294 Py_INCREF(Py_None);
295 self->tail = Py_None;
296
Eli Benderskyebf37a22012-04-03 22:02:37 +0300297 self->weakreflist = NULL;
298
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200299 ALLOC(sizeof(ElementObject), "create element");
300 PyObject_GC_Track(self);
301
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200302 if (attrib != Py_None && !is_empty_dict(attrib)) {
303 if (create_extra(self, attrib) < 0) {
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200304 Py_DECREF(self);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200305 return NULL;
306 }
307 }
308
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000309 return (PyObject*) self;
310}
311
Eli Bendersky092af1f2012-03-04 07:14:03 +0200312static PyObject *
313element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
314{
315 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
316 if (e != NULL) {
317 Py_INCREF(Py_None);
318 e->tag = Py_None;
319
320 Py_INCREF(Py_None);
321 e->text = Py_None;
322
323 Py_INCREF(Py_None);
324 e->tail = Py_None;
325
326 e->extra = NULL;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300327 e->weakreflist = NULL;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200328 }
329 return (PyObject *)e;
330}
331
Eli Bendersky737b1732012-05-29 06:02:56 +0300332/* Helper function for extracting the attrib dictionary from a keywords dict.
333 * This is required by some constructors/functions in this module that can
Eli Bendersky45839902013-01-13 05:14:47 -0800334 * either accept attrib as a keyword argument or all attributes splashed
Eli Bendersky737b1732012-05-29 06:02:56 +0300335 * directly into *kwds.
Eli Benderskyd4cb4b72013-04-22 05:25:25 -0700336 *
337 * Return a dictionary with the content of kwds merged into the content of
338 * attrib. If there is no attrib keyword, return a copy of kwds.
Eli Bendersky737b1732012-05-29 06:02:56 +0300339 */
340static PyObject*
341get_attrib_from_keywords(PyObject *kwds)
342{
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700343 PyObject *attrib_str = PyUnicode_FromString("attrib");
344 PyObject *attrib = PyDict_GetItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300345
346 if (attrib) {
347 /* If attrib was found in kwds, copy its value and remove it from
348 * kwds
349 */
350 if (!PyDict_Check(attrib)) {
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700351 Py_DECREF(attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300352 PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
353 Py_TYPE(attrib)->tp_name);
354 return NULL;
355 }
356 attrib = PyDict_Copy(attrib);
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700357 PyDict_DelItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300358 } else {
359 attrib = PyDict_New();
360 }
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700361
362 Py_DECREF(attrib_str);
363
364 /* attrib can be NULL if PyDict_New failed */
365 if (attrib)
Christian Heimes7ed42942013-07-20 15:12:09 +0200366 if (PyDict_Update(attrib, kwds) < 0)
367 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300368 return attrib;
369}
370
Eli Bendersky092af1f2012-03-04 07:14:03 +0200371static int
372element_init(PyObject *self, PyObject *args, PyObject *kwds)
373{
374 PyObject *tag;
375 PyObject *tmp;
376 PyObject *attrib = NULL;
377 ElementObject *self_elem;
378
379 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
380 return -1;
381
Eli Bendersky737b1732012-05-29 06:02:56 +0300382 if (attrib) {
383 /* attrib passed as positional arg */
384 attrib = PyDict_Copy(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200385 if (!attrib)
386 return -1;
Eli Bendersky737b1732012-05-29 06:02:56 +0300387 if (kwds) {
388 if (PyDict_Update(attrib, kwds) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200389 Py_DECREF(attrib);
Eli Bendersky737b1732012-05-29 06:02:56 +0300390 return -1;
391 }
392 }
393 } else if (kwds) {
394 /* have keywords args */
395 attrib = get_attrib_from_keywords(kwds);
396 if (!attrib)
397 return -1;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200398 }
399
400 self_elem = (ElementObject *)self;
401
Antoine Pitrouc1948842012-10-01 23:40:37 +0200402 if (attrib != NULL && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200403 if (create_extra(self_elem, attrib) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200404 Py_DECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200405 return -1;
406 }
407 }
408
Eli Bendersky48d358b2012-05-30 17:57:50 +0300409 /* We own a reference to attrib here and it's no longer needed. */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200410 Py_XDECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200411
412 /* Replace the objects already pointed to by tag, text and tail. */
413 tmp = self_elem->tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200414 Py_INCREF(tag);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200415 self_elem->tag = tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200416 Py_DECREF(tmp);
417
418 tmp = self_elem->text;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200419 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200420 self_elem->text = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200421 Py_DECREF(JOIN_OBJ(tmp));
422
423 tmp = self_elem->tail;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200424 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200425 self_elem->tail = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200426 Py_DECREF(JOIN_OBJ(tmp));
427
428 return 0;
429}
430
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000431LOCAL(int)
432element_resize(ElementObject* self, int extra)
433{
434 int size;
435 PyObject* *children;
436
437 /* make sure self->children can hold the given number of extra
438 elements. set an exception and return -1 if allocation failed */
439
Victor Stinner5f0af232013-07-11 23:01:36 +0200440 if (!self->extra) {
441 if (create_extra(self, NULL) < 0)
442 return -1;
443 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000444
445 size = self->extra->length + extra;
446
447 if (size > self->extra->allocated) {
448 /* use Python 2.4's list growth strategy */
449 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000450 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100451 * which needs at least 4 bytes.
452 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000453 * be safe.
454 */
455 size = size ? size : 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000456 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000457 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100458 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000459 * false alarm always assume at least one child to be safe.
460 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000461 children = PyObject_Realloc(self->extra->children,
462 size * sizeof(PyObject*));
463 if (!children)
464 goto nomemory;
465 } else {
466 children = PyObject_Malloc(size * sizeof(PyObject*));
467 if (!children)
468 goto nomemory;
469 /* copy existing children from static area to malloc buffer */
470 memcpy(children, self->extra->children,
471 self->extra->length * sizeof(PyObject*));
472 }
473 self->extra->children = children;
474 self->extra->allocated = size;
475 }
476
477 return 0;
478
479 nomemory:
480 PyErr_NoMemory();
481 return -1;
482}
483
484LOCAL(int)
485element_add_subelement(ElementObject* self, PyObject* element)
486{
487 /* add a child element to a parent */
488
489 if (element_resize(self, 1) < 0)
490 return -1;
491
492 Py_INCREF(element);
493 self->extra->children[self->extra->length] = element;
494
495 self->extra->length++;
496
497 return 0;
498}
499
500LOCAL(PyObject*)
501element_get_attrib(ElementObject* self)
502{
503 /* return borrowed reference to attrib dictionary */
504 /* note: this function assumes that the extra section exists */
505
506 PyObject* res = self->extra->attrib;
507
508 if (res == Py_None) {
509 /* create missing dictionary */
510 res = PyDict_New();
511 if (!res)
512 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200513 Py_DECREF(Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000514 self->extra->attrib = res;
515 }
516
517 return res;
518}
519
520LOCAL(PyObject*)
521element_get_text(ElementObject* self)
522{
523 /* return borrowed reference to text attribute */
524
525 PyObject* res = self->text;
526
527 if (JOIN_GET(res)) {
528 res = JOIN_OBJ(res);
529 if (PyList_CheckExact(res)) {
530 res = list_join(res);
531 if (!res)
532 return NULL;
533 self->text = res;
534 }
535 }
536
537 return res;
538}
539
540LOCAL(PyObject*)
541element_get_tail(ElementObject* self)
542{
543 /* return borrowed reference to text attribute */
544
545 PyObject* res = self->tail;
546
547 if (JOIN_GET(res)) {
548 res = JOIN_OBJ(res);
549 if (PyList_CheckExact(res)) {
550 res = list_join(res);
551 if (!res)
552 return NULL;
553 self->tail = res;
554 }
555 }
556
557 return res;
558}
559
560static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300561subelement(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000562{
563 PyObject* elem;
564
565 ElementObject* parent;
566 PyObject* tag;
567 PyObject* attrib = NULL;
568 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
569 &Element_Type, &parent, &tag,
Eli Bendersky163d7f02013-11-24 06:55:04 -0800570 &PyDict_Type, &attrib)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000571 return NULL;
Eli Bendersky163d7f02013-11-24 06:55:04 -0800572 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000573
Eli Bendersky737b1732012-05-29 06:02:56 +0300574 if (attrib) {
575 /* attrib passed as positional arg */
576 attrib = PyDict_Copy(attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000577 if (!attrib)
578 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300579 if (kwds) {
580 if (PyDict_Update(attrib, kwds) < 0) {
581 return NULL;
582 }
583 }
584 } else if (kwds) {
585 /* have keyword args */
586 attrib = get_attrib_from_keywords(kwds);
587 if (!attrib)
588 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000589 } else {
Eli Bendersky737b1732012-05-29 06:02:56 +0300590 /* no attrib arg, no kwds, so no attribute */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000591 Py_INCREF(Py_None);
592 attrib = Py_None;
593 }
594
Eli Bendersky092af1f2012-03-04 07:14:03 +0200595 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000596 Py_DECREF(attrib);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200597 if (elem == NULL)
598 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000599
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000600 if (element_add_subelement(parent, elem) < 0) {
601 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000602 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000603 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000604
605 return elem;
606}
607
Eli Bendersky0192ba32012-03-30 16:38:33 +0300608static int
609element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
610{
611 Py_VISIT(self->tag);
612 Py_VISIT(JOIN_OBJ(self->text));
613 Py_VISIT(JOIN_OBJ(self->tail));
614
615 if (self->extra) {
616 int i;
617 Py_VISIT(self->extra->attrib);
618
619 for (i = 0; i < self->extra->length; ++i)
620 Py_VISIT(self->extra->children[i]);
621 }
622 return 0;
623}
624
625static int
626element_gc_clear(ElementObject *self)
627{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300628 Py_CLEAR(self->tag);
Eli Benderskydd3661e2013-09-13 06:24:25 -0700629 _clear_joined_ptr(&self->text);
630 _clear_joined_ptr(&self->tail);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300631
632 /* After dropping all references from extra, it's no longer valid anyway,
Eli Benderskyebf37a22012-04-03 22:02:37 +0300633 * so fully deallocate it.
Eli Bendersky0192ba32012-03-30 16:38:33 +0300634 */
Eli Benderskyebf37a22012-04-03 22:02:37 +0300635 dealloc_extra(self);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300636 return 0;
637}
638
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000639static void
640element_dealloc(ElementObject* self)
641{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300642 PyObject_GC_UnTrack(self);
Eli Benderskyebf37a22012-04-03 22:02:37 +0300643
644 if (self->weakreflist != NULL)
645 PyObject_ClearWeakRefs((PyObject *) self);
646
Eli Bendersky0192ba32012-03-30 16:38:33 +0300647 /* element_gc_clear clears all references and deallocates extra
648 */
649 element_gc_clear(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000650
651 RELEASE(sizeof(ElementObject), "destroy element");
Eli Bendersky092af1f2012-03-04 07:14:03 +0200652 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000653}
654
655/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000656
657static PyObject*
658element_append(ElementObject* self, PyObject* args)
659{
660 PyObject* element;
661 if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element))
662 return NULL;
663
664 if (element_add_subelement(self, element) < 0)
665 return NULL;
666
667 Py_RETURN_NONE;
668}
669
670static PyObject*
Eli Bendersky0192ba32012-03-30 16:38:33 +0300671element_clearmethod(ElementObject* self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000672{
673 if (!PyArg_ParseTuple(args, ":clear"))
674 return NULL;
675
Eli Benderskyebf37a22012-04-03 22:02:37 +0300676 dealloc_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000677
678 Py_INCREF(Py_None);
679 Py_DECREF(JOIN_OBJ(self->text));
680 self->text = Py_None;
681
682 Py_INCREF(Py_None);
683 Py_DECREF(JOIN_OBJ(self->tail));
684 self->tail = Py_None;
685
686 Py_RETURN_NONE;
687}
688
689static PyObject*
690element_copy(ElementObject* self, PyObject* args)
691{
692 int i;
693 ElementObject* element;
694
695 if (!PyArg_ParseTuple(args, ":__copy__"))
696 return NULL;
697
Eli Bendersky092af1f2012-03-04 07:14:03 +0200698 element = (ElementObject*) create_new_element(
Eli Bendersky163d7f02013-11-24 06:55:04 -0800699 self->tag, (self->extra) ? self->extra->attrib : Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000700 if (!element)
701 return NULL;
702
703 Py_DECREF(JOIN_OBJ(element->text));
704 element->text = self->text;
705 Py_INCREF(JOIN_OBJ(element->text));
706
707 Py_DECREF(JOIN_OBJ(element->tail));
708 element->tail = self->tail;
709 Py_INCREF(JOIN_OBJ(element->tail));
710
711 if (self->extra) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000712 if (element_resize(element, self->extra->length) < 0) {
713 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000714 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000715 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000716
717 for (i = 0; i < self->extra->length; i++) {
718 Py_INCREF(self->extra->children[i]);
719 element->extra->children[i] = self->extra->children[i];
720 }
721
722 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000723 }
724
725 return (PyObject*) element;
726}
727
728static PyObject*
729element_deepcopy(ElementObject* self, PyObject* args)
730{
731 int i;
732 ElementObject* element;
733 PyObject* tag;
734 PyObject* attrib;
735 PyObject* text;
736 PyObject* tail;
737 PyObject* id;
738
739 PyObject* memo;
740 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
741 return NULL;
742
743 tag = deepcopy(self->tag, memo);
744 if (!tag)
745 return NULL;
746
747 if (self->extra) {
748 attrib = deepcopy(self->extra->attrib, memo);
749 if (!attrib) {
750 Py_DECREF(tag);
751 return NULL;
752 }
753 } else {
754 Py_INCREF(Py_None);
755 attrib = Py_None;
756 }
757
Eli Bendersky092af1f2012-03-04 07:14:03 +0200758 element = (ElementObject*) create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000759
760 Py_DECREF(tag);
761 Py_DECREF(attrib);
762
763 if (!element)
764 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100765
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000766 text = deepcopy(JOIN_OBJ(self->text), memo);
767 if (!text)
768 goto error;
769 Py_DECREF(element->text);
770 element->text = JOIN_SET(text, JOIN_GET(self->text));
771
772 tail = deepcopy(JOIN_OBJ(self->tail), memo);
773 if (!tail)
774 goto error;
775 Py_DECREF(element->tail);
776 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
777
778 if (self->extra) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000779 if (element_resize(element, self->extra->length) < 0)
780 goto error;
781
782 for (i = 0; i < self->extra->length; i++) {
783 PyObject* child = deepcopy(self->extra->children[i], memo);
784 if (!child) {
785 element->extra->length = i;
786 goto error;
787 }
788 element->extra->children[i] = child;
789 }
790
791 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000792 }
793
794 /* add object to memo dictionary (so deepcopy won't visit it again) */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200795 id = PyLong_FromSsize_t((Py_uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000796 if (!id)
797 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000798
799 i = PyDict_SetItem(memo, id, (PyObject*) element);
800
801 Py_DECREF(id);
802
803 if (i < 0)
804 goto error;
805
806 return (PyObject*) element;
807
808 error:
809 Py_DECREF(element);
810 return NULL;
811}
812
Martin v. Löwisbce16662012-06-17 10:41:22 +0200813static PyObject*
Antoine Pitrou09fcb722013-10-23 19:20:21 +0200814element_sizeof(PyObject* myself, PyObject* args)
Martin v. Löwisbce16662012-06-17 10:41:22 +0200815{
Antoine Pitrou09fcb722013-10-23 19:20:21 +0200816 ElementObject *self = (ElementObject*)myself;
Martin v. Löwisbce16662012-06-17 10:41:22 +0200817 Py_ssize_t result = sizeof(ElementObject);
818 if (self->extra) {
819 result += sizeof(ElementObjectExtra);
820 if (self->extra->children != self->extra->_children)
821 result += sizeof(PyObject*) * self->extra->allocated;
822 }
823 return PyLong_FromSsize_t(result);
824}
825
Eli Bendersky698bdb22013-01-10 06:01:06 -0800826/* dict keys for getstate/setstate. */
827#define PICKLED_TAG "tag"
828#define PICKLED_CHILDREN "_children"
829#define PICKLED_ATTRIB "attrib"
830#define PICKLED_TAIL "tail"
831#define PICKLED_TEXT "text"
832
833/* __getstate__ returns a fabricated instance dict as in the pure-Python
834 * Element implementation, for interoperability/interchangeability. This
835 * makes the pure-Python implementation details an API, but (a) there aren't
836 * any unnecessary structures there; and (b) it buys compatibility with 3.2
837 * pickles. See issue #16076.
838 */
839static PyObject *
840element_getstate(ElementObject *self)
841{
842 int i, noattrib;
843 PyObject *instancedict = NULL, *children;
844
845 /* Build a list of children. */
846 children = PyList_New(self->extra ? self->extra->length : 0);
847 if (!children)
848 return NULL;
849 for (i = 0; i < PyList_GET_SIZE(children); i++) {
850 PyObject *child = self->extra->children[i];
851 Py_INCREF(child);
852 PyList_SET_ITEM(children, i, child);
853 }
854
855 /* Construct the state object. */
856 noattrib = (self->extra == NULL || self->extra->attrib == Py_None);
857 if (noattrib)
858 instancedict = Py_BuildValue("{sOsOs{}sOsO}",
859 PICKLED_TAG, self->tag,
860 PICKLED_CHILDREN, children,
861 PICKLED_ATTRIB,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700862 PICKLED_TEXT, JOIN_OBJ(self->text),
863 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800864 else
865 instancedict = Py_BuildValue("{sOsOsOsOsO}",
866 PICKLED_TAG, self->tag,
867 PICKLED_CHILDREN, children,
868 PICKLED_ATTRIB, self->extra->attrib,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700869 PICKLED_TEXT, JOIN_OBJ(self->text),
870 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800871 if (instancedict) {
872 Py_DECREF(children);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800873 return instancedict;
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800874 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800875 else {
876 for (i = 0; i < PyList_GET_SIZE(children); i++)
877 Py_DECREF(PyList_GET_ITEM(children, i));
878 Py_DECREF(children);
879
880 return NULL;
881 }
882}
883
884static PyObject *
885element_setstate_from_attributes(ElementObject *self,
886 PyObject *tag,
887 PyObject *attrib,
888 PyObject *text,
889 PyObject *tail,
890 PyObject *children)
891{
892 Py_ssize_t i, nchildren;
893
894 if (!tag) {
895 PyErr_SetString(PyExc_TypeError, "tag may not be NULL");
896 return NULL;
897 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800898
899 Py_CLEAR(self->tag);
900 self->tag = tag;
901 Py_INCREF(self->tag);
902
Eli Benderskydd3661e2013-09-13 06:24:25 -0700903 _clear_joined_ptr(&self->text);
904 self->text = text ? JOIN_SET(text, PyList_CheckExact(text)) : Py_None;
905 Py_INCREF(JOIN_OBJ(self->text));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800906
Eli Benderskydd3661e2013-09-13 06:24:25 -0700907 _clear_joined_ptr(&self->tail);
908 self->tail = tail ? JOIN_SET(tail, PyList_CheckExact(tail)) : Py_None;
909 Py_INCREF(JOIN_OBJ(self->tail));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800910
911 /* Handle ATTRIB and CHILDREN. */
912 if (!children && !attrib)
913 Py_RETURN_NONE;
914
915 /* Compute 'nchildren'. */
916 if (children) {
917 if (!PyList_Check(children)) {
918 PyErr_SetString(PyExc_TypeError, "'_children' is not a list");
919 return NULL;
920 }
921 nchildren = PyList_Size(children);
922 }
923 else {
924 nchildren = 0;
925 }
926
927 /* Allocate 'extra'. */
928 if (element_resize(self, nchildren)) {
929 return NULL;
930 }
931 assert(self->extra && self->extra->allocated >= nchildren);
932
933 /* Copy children */
934 for (i = 0; i < nchildren; i++) {
935 self->extra->children[i] = PyList_GET_ITEM(children, i);
936 Py_INCREF(self->extra->children[i]);
937 }
938
939 self->extra->length = nchildren;
940 self->extra->allocated = nchildren;
941
942 /* Stash attrib. */
943 if (attrib) {
944 Py_CLEAR(self->extra->attrib);
945 self->extra->attrib = attrib;
946 Py_INCREF(attrib);
947 }
948
949 Py_RETURN_NONE;
950}
951
952/* __setstate__ for Element instance from the Python implementation.
953 * 'state' should be the instance dict.
954 */
955static PyObject *
956element_setstate_from_Python(ElementObject *self, PyObject *state)
957{
958 static char *kwlist[] = {PICKLED_TAG, PICKLED_ATTRIB, PICKLED_TEXT,
959 PICKLED_TAIL, PICKLED_CHILDREN, 0};
960 PyObject *args;
961 PyObject *tag, *attrib, *text, *tail, *children;
Eli Bendersky799e3ed2013-01-12 05:42:38 -0800962 PyObject *retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800963
Eli Bendersky698bdb22013-01-10 06:01:06 -0800964 tag = attrib = text = tail = children = NULL;
965 args = PyTuple_New(0);
Eli Bendersky799e3ed2013-01-12 05:42:38 -0800966 if (!args)
Eli Bendersky698bdb22013-01-10 06:01:06 -0800967 return NULL;
Eli Bendersky799e3ed2013-01-12 05:42:38 -0800968
969 if (PyArg_ParseTupleAndKeywords(args, state, "|$OOOOO", kwlist, &tag,
970 &attrib, &text, &tail, &children))
971 retval = element_setstate_from_attributes(self, tag, attrib, text,
972 tail, children);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800973 else
Eli Bendersky799e3ed2013-01-12 05:42:38 -0800974 retval = NULL;
975
976 Py_DECREF(args);
977 return retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800978}
979
980static PyObject *
981element_setstate(ElementObject *self, PyObject *state)
982{
983 if (!PyDict_CheckExact(state)) {
984 PyErr_Format(PyExc_TypeError,
985 "Don't know how to unpickle \"%.200R\" as an Element",
986 state);
987 return NULL;
988 }
989 else
990 return element_setstate_from_Python(self, state);
991}
992
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000993LOCAL(int)
994checkpath(PyObject* tag)
995{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000996 Py_ssize_t i;
997 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000998
999 /* check if a tag contains an xpath character */
1000
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001001#define PATHCHAR(ch) \
1002 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001003
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001004 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001005 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
1006 void *data = PyUnicode_DATA(tag);
1007 unsigned int kind = PyUnicode_KIND(tag);
1008 for (i = 0; i < len; i++) {
1009 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1010 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001011 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001012 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001013 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001014 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001015 return 1;
1016 }
1017 return 0;
1018 }
Christian Heimes72b710a2008-05-26 13:28:38 +00001019 if (PyBytes_Check(tag)) {
1020 char *p = PyBytes_AS_STRING(tag);
1021 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001022 if (p[i] == '{')
1023 check = 0;
1024 else if (p[i] == '}')
1025 check = 1;
1026 else if (check && PATHCHAR(p[i]))
1027 return 1;
1028 }
1029 return 0;
1030 }
1031
1032 return 1; /* unknown type; might be path expression */
1033}
1034
1035static PyObject*
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001036element_extend(ElementObject* self, PyObject* args)
1037{
1038 PyObject* seq;
1039 Py_ssize_t i, seqlen = 0;
1040
1041 PyObject* seq_in;
1042 if (!PyArg_ParseTuple(args, "O:extend", &seq_in))
1043 return NULL;
1044
1045 seq = PySequence_Fast(seq_in, "");
1046 if (!seq) {
1047 PyErr_Format(
1048 PyExc_TypeError,
1049 "expected sequence, not \"%.200s\"", Py_TYPE(seq_in)->tp_name
1050 );
1051 return NULL;
1052 }
1053
1054 seqlen = PySequence_Size(seq);
1055 for (i = 0; i < seqlen; i++) {
1056 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001057 if (!PyObject_IsInstance(element, (PyObject *)&Element_Type)) {
1058 Py_DECREF(seq);
1059 PyErr_Format(
1060 PyExc_TypeError,
1061 "expected an Element, not \"%.200s\"",
1062 Py_TYPE(element)->tp_name);
1063 return NULL;
1064 }
1065
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001066 if (element_add_subelement(self, element) < 0) {
1067 Py_DECREF(seq);
1068 return NULL;
1069 }
1070 }
1071
1072 Py_DECREF(seq);
1073
1074 Py_RETURN_NONE;
1075}
1076
1077static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001078element_find(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001079{
1080 int i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001081 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001082 PyObject* namespaces = Py_None;
Eli Bendersky737b1732012-05-29 06:02:56 +03001083 static char *kwlist[] = {"path", "namespaces", 0};
Eli Bendersky532d03e2013-08-10 08:00:39 -07001084 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001085
Eli Bendersky737b1732012-05-29 06:02:56 +03001086 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:find", kwlist,
1087 &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001088 return NULL;
1089
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001090 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001091 _Py_IDENTIFIER(find);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001092 return _PyObject_CallMethodId(
Eli Bendersky532d03e2013-08-10 08:00:39 -07001093 st->elementpath_obj, &PyId_find, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001094 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001095 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001096
1097 if (!self->extra)
1098 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001099
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001100 for (i = 0; i < self->extra->length; i++) {
1101 PyObject* item = self->extra->children[i];
1102 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +00001103 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001104 Py_INCREF(item);
1105 return item;
1106 }
1107 }
1108
1109 Py_RETURN_NONE;
1110}
1111
1112static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001113element_findtext(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001114{
1115 int i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001116 PyObject* tag;
1117 PyObject* default_value = Py_None;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001118 PyObject* namespaces = Py_None;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001119 _Py_IDENTIFIER(findtext);
Eli Bendersky737b1732012-05-29 06:02:56 +03001120 static char *kwlist[] = {"path", "default", "namespaces", 0};
Eli Bendersky532d03e2013-08-10 08:00:39 -07001121 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001122
Eli Bendersky737b1732012-05-29 06:02:56 +03001123 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO:findtext", kwlist,
1124 &tag, &default_value, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001125 return NULL;
1126
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001127 if (checkpath(tag) || namespaces != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001128 return _PyObject_CallMethodId(
Eli Bendersky532d03e2013-08-10 08:00:39 -07001129 st->elementpath_obj, &PyId_findtext, "OOOO", self, tag, default_value, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001130 );
1131
1132 if (!self->extra) {
1133 Py_INCREF(default_value);
1134 return default_value;
1135 }
1136
1137 for (i = 0; i < self->extra->length; i++) {
1138 ElementObject* item = (ElementObject*) self->extra->children[i];
Eli Bendersky163d7f02013-11-24 06:55:04 -08001139 if (Element_CheckExact(item) &&
1140 (PyObject_RichCompareBool(item->tag, tag, Py_EQ) == 1)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001141 PyObject* text = element_get_text(item);
1142 if (text == Py_None)
Eli Bendersky25771b32013-01-13 05:26:07 -08001143 return PyUnicode_New(0, 0);
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001144 Py_XINCREF(text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001145 return text;
1146 }
1147 }
1148
1149 Py_INCREF(default_value);
1150 return default_value;
1151}
1152
1153static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001154element_findall(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001155{
1156 int i;
1157 PyObject* out;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001158 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001159 PyObject* namespaces = Py_None;
Eli Bendersky737b1732012-05-29 06:02:56 +03001160 static char *kwlist[] = {"path", "namespaces", 0};
Eli Bendersky532d03e2013-08-10 08:00:39 -07001161 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001162
Eli Bendersky737b1732012-05-29 06:02:56 +03001163 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:findall", kwlist,
1164 &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001165 return NULL;
1166
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001167 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001168 _Py_IDENTIFIER(findall);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001169 return _PyObject_CallMethodId(
Eli Bendersky532d03e2013-08-10 08:00:39 -07001170 st->elementpath_obj, &PyId_findall, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001171 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001172 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001173
1174 out = PyList_New(0);
1175 if (!out)
1176 return NULL;
1177
1178 if (!self->extra)
1179 return out;
1180
1181 for (i = 0; i < self->extra->length; i++) {
1182 PyObject* item = self->extra->children[i];
1183 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +00001184 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001185 if (PyList_Append(out, item) < 0) {
1186 Py_DECREF(out);
1187 return NULL;
1188 }
1189 }
1190 }
1191
1192 return out;
1193}
1194
1195static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001196element_iterfind(ElementObject *self, PyObject *args, PyObject *kwds)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001197{
1198 PyObject* tag;
1199 PyObject* namespaces = Py_None;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001200 _Py_IDENTIFIER(iterfind);
Eli Bendersky737b1732012-05-29 06:02:56 +03001201 static char *kwlist[] = {"path", "namespaces", 0};
Eli Bendersky532d03e2013-08-10 08:00:39 -07001202 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001203
Eli Bendersky737b1732012-05-29 06:02:56 +03001204 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:iterfind", kwlist,
Eli Bendersky163d7f02013-11-24 06:55:04 -08001205 &tag, &namespaces)) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001206 return NULL;
Eli Bendersky163d7f02013-11-24 06:55:04 -08001207 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001208
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001209 return _PyObject_CallMethodId(
Eli Bendersky163d7f02013-11-24 06:55:04 -08001210 st->elementpath_obj, &PyId_iterfind, "OOO", self, tag, namespaces);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001211}
1212
1213static PyObject*
Eli Benderskya8736902013-01-05 06:26:39 -08001214element_get(ElementObject* self, PyObject* args, PyObject* kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001215{
1216 PyObject* value;
Eli Benderskya8736902013-01-05 06:26:39 -08001217 static char* kwlist[] = {"key", "default", 0};
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001218
1219 PyObject* key;
1220 PyObject* default_value = Py_None;
Eli Benderskya8736902013-01-05 06:26:39 -08001221
1222 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:get", kwlist, &key,
1223 &default_value))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001224 return NULL;
1225
1226 if (!self->extra || self->extra->attrib == Py_None)
1227 value = default_value;
1228 else {
1229 value = PyDict_GetItem(self->extra->attrib, key);
1230 if (!value)
1231 value = default_value;
1232 }
1233
1234 Py_INCREF(value);
1235 return value;
1236}
1237
1238static PyObject*
1239element_getchildren(ElementObject* self, PyObject* args)
1240{
1241 int i;
1242 PyObject* list;
1243
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001244 /* FIXME: report as deprecated? */
1245
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001246 if (!PyArg_ParseTuple(args, ":getchildren"))
1247 return NULL;
1248
1249 if (!self->extra)
1250 return PyList_New(0);
1251
1252 list = PyList_New(self->extra->length);
1253 if (!list)
1254 return NULL;
1255
1256 for (i = 0; i < self->extra->length; i++) {
1257 PyObject* item = self->extra->children[i];
1258 Py_INCREF(item);
1259 PyList_SET_ITEM(list, i, item);
1260 }
1261
1262 return list;
1263}
1264
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001265
Eli Bendersky64d11e62012-06-15 07:42:50 +03001266static PyObject *
1267create_elementiter(ElementObject *self, PyObject *tag, int gettext);
1268
1269
1270static PyObject *
Eli Benderskya8736902013-01-05 06:26:39 -08001271element_iter(ElementObject *self, PyObject *args, PyObject *kwds)
Eli Bendersky64d11e62012-06-15 07:42:50 +03001272{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001273 PyObject* tag = Py_None;
Eli Benderskya8736902013-01-05 06:26:39 -08001274 static char* kwlist[] = {"tag", 0};
1275
1276 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:iter", kwlist, &tag))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001277 return NULL;
1278
Eli Bendersky64d11e62012-06-15 07:42:50 +03001279 return create_elementiter(self, tag, 0);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001280}
1281
1282
1283static PyObject*
1284element_itertext(ElementObject* self, PyObject* args)
1285{
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001286 if (!PyArg_ParseTuple(args, ":itertext"))
1287 return NULL;
1288
Eli Bendersky64d11e62012-06-15 07:42:50 +03001289 return create_elementiter(self, Py_None, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001290}
1291
Eli Bendersky64d11e62012-06-15 07:42:50 +03001292
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001293static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001294element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001295{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001296 ElementObject* self = (ElementObject*) self_;
1297
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001298 if (!self->extra || index < 0 || index >= self->extra->length) {
1299 PyErr_SetString(
1300 PyExc_IndexError,
1301 "child index out of range"
1302 );
1303 return NULL;
1304 }
1305
1306 Py_INCREF(self->extra->children[index]);
1307 return self->extra->children[index];
1308}
1309
1310static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001311element_insert(ElementObject* self, PyObject* args)
1312{
1313 int i;
1314
1315 int index;
1316 PyObject* element;
1317 if (!PyArg_ParseTuple(args, "iO!:insert", &index,
1318 &Element_Type, &element))
1319 return NULL;
1320
Victor Stinner5f0af232013-07-11 23:01:36 +02001321 if (!self->extra) {
1322 if (create_extra(self, NULL) < 0)
1323 return NULL;
1324 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001325
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001326 if (index < 0) {
1327 index += self->extra->length;
1328 if (index < 0)
1329 index = 0;
1330 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001331 if (index > self->extra->length)
1332 index = self->extra->length;
1333
1334 if (element_resize(self, 1) < 0)
1335 return NULL;
1336
1337 for (i = self->extra->length; i > index; i--)
1338 self->extra->children[i] = self->extra->children[i-1];
1339
1340 Py_INCREF(element);
1341 self->extra->children[index] = element;
1342
1343 self->extra->length++;
1344
1345 Py_RETURN_NONE;
1346}
1347
1348static PyObject*
1349element_items(ElementObject* self, PyObject* args)
1350{
1351 if (!PyArg_ParseTuple(args, ":items"))
1352 return NULL;
1353
1354 if (!self->extra || self->extra->attrib == Py_None)
1355 return PyList_New(0);
1356
1357 return PyDict_Items(self->extra->attrib);
1358}
1359
1360static PyObject*
1361element_keys(ElementObject* self, PyObject* args)
1362{
1363 if (!PyArg_ParseTuple(args, ":keys"))
1364 return NULL;
1365
1366 if (!self->extra || self->extra->attrib == Py_None)
1367 return PyList_New(0);
1368
1369 return PyDict_Keys(self->extra->attrib);
1370}
1371
Martin v. Löwis18e16552006-02-15 17:27:45 +00001372static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001373element_length(ElementObject* self)
1374{
1375 if (!self->extra)
1376 return 0;
1377
1378 return self->extra->length;
1379}
1380
1381static PyObject*
1382element_makeelement(PyObject* self, PyObject* args, PyObject* kw)
1383{
1384 PyObject* elem;
1385
1386 PyObject* tag;
1387 PyObject* attrib;
1388 if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib))
1389 return NULL;
1390
1391 attrib = PyDict_Copy(attrib);
1392 if (!attrib)
1393 return NULL;
1394
Eli Bendersky092af1f2012-03-04 07:14:03 +02001395 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001396
1397 Py_DECREF(attrib);
1398
1399 return elem;
1400}
1401
1402static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001403element_remove(ElementObject* self, PyObject* args)
1404{
1405 int i;
1406
1407 PyObject* element;
1408 if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element))
1409 return NULL;
1410
1411 if (!self->extra) {
1412 /* element has no children, so raise exception */
1413 PyErr_SetString(
1414 PyExc_ValueError,
1415 "list.remove(x): x not in list"
1416 );
1417 return NULL;
1418 }
1419
1420 for (i = 0; i < self->extra->length; i++) {
1421 if (self->extra->children[i] == element)
1422 break;
Mark Dickinson211c6252009-02-01 10:28:51 +00001423 if (PyObject_RichCompareBool(self->extra->children[i], element, Py_EQ) == 1)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001424 break;
1425 }
1426
1427 if (i == self->extra->length) {
1428 /* element is not in children, so raise exception */
1429 PyErr_SetString(
1430 PyExc_ValueError,
1431 "list.remove(x): x not in list"
1432 );
1433 return NULL;
1434 }
1435
1436 Py_DECREF(self->extra->children[i]);
1437
1438 self->extra->length--;
1439
1440 for (; i < self->extra->length; i++)
1441 self->extra->children[i] = self->extra->children[i+1];
1442
1443 Py_RETURN_NONE;
1444}
1445
1446static PyObject*
1447element_repr(ElementObject* self)
1448{
Eli Bendersky092af1f2012-03-04 07:14:03 +02001449 if (self->tag)
1450 return PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1451 else
1452 return PyUnicode_FromFormat("<Element at %p>", self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001453}
1454
1455static PyObject*
1456element_set(ElementObject* self, PyObject* args)
1457{
1458 PyObject* attrib;
1459
1460 PyObject* key;
1461 PyObject* value;
1462 if (!PyArg_ParseTuple(args, "OO:set", &key, &value))
1463 return NULL;
1464
Victor Stinner5f0af232013-07-11 23:01:36 +02001465 if (!self->extra) {
1466 if (create_extra(self, NULL) < 0)
1467 return NULL;
1468 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001469
1470 attrib = element_get_attrib(self);
1471 if (!attrib)
1472 return NULL;
1473
1474 if (PyDict_SetItem(attrib, key, value) < 0)
1475 return NULL;
1476
1477 Py_RETURN_NONE;
1478}
1479
1480static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001481element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001482{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001483 ElementObject* self = (ElementObject*) self_;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001484 int i;
1485 PyObject* old;
1486
1487 if (!self->extra || index < 0 || index >= self->extra->length) {
1488 PyErr_SetString(
1489 PyExc_IndexError,
1490 "child assignment index out of range");
1491 return -1;
1492 }
1493
1494 old = self->extra->children[index];
1495
1496 if (item) {
1497 Py_INCREF(item);
1498 self->extra->children[index] = item;
1499 } else {
1500 self->extra->length--;
1501 for (i = index; i < self->extra->length; i++)
1502 self->extra->children[i] = self->extra->children[i+1];
1503 }
1504
1505 Py_DECREF(old);
1506
1507 return 0;
1508}
1509
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001510static PyObject*
1511element_subscr(PyObject* self_, PyObject* item)
1512{
1513 ElementObject* self = (ElementObject*) self_;
1514
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001515 if (PyIndex_Check(item)) {
1516 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001517
1518 if (i == -1 && PyErr_Occurred()) {
1519 return NULL;
1520 }
1521 if (i < 0 && self->extra)
1522 i += self->extra->length;
1523 return element_getitem(self_, i);
1524 }
1525 else if (PySlice_Check(item)) {
1526 Py_ssize_t start, stop, step, slicelen, cur, i;
1527 PyObject* list;
1528
1529 if (!self->extra)
1530 return PyList_New(0);
1531
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001532 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001533 self->extra->length,
1534 &start, &stop, &step, &slicelen) < 0) {
1535 return NULL;
1536 }
1537
1538 if (slicelen <= 0)
1539 return PyList_New(0);
1540 else {
1541 list = PyList_New(slicelen);
1542 if (!list)
1543 return NULL;
1544
1545 for (cur = start, i = 0; i < slicelen;
1546 cur += step, i++) {
1547 PyObject* item = self->extra->children[cur];
1548 Py_INCREF(item);
1549 PyList_SET_ITEM(list, i, item);
1550 }
1551
1552 return list;
1553 }
1554 }
1555 else {
1556 PyErr_SetString(PyExc_TypeError,
1557 "element indices must be integers");
1558 return NULL;
1559 }
1560}
1561
1562static int
1563element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1564{
1565 ElementObject* self = (ElementObject*) self_;
1566
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001567 if (PyIndex_Check(item)) {
1568 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001569
1570 if (i == -1 && PyErr_Occurred()) {
1571 return -1;
1572 }
1573 if (i < 0 && self->extra)
1574 i += self->extra->length;
1575 return element_setitem(self_, i, value);
1576 }
1577 else if (PySlice_Check(item)) {
1578 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1579
1580 PyObject* recycle = NULL;
1581 PyObject* seq = NULL;
1582
Victor Stinner5f0af232013-07-11 23:01:36 +02001583 if (!self->extra) {
1584 if (create_extra(self, NULL) < 0)
1585 return -1;
1586 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001587
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001588 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001589 self->extra->length,
1590 &start, &stop, &step, &slicelen) < 0) {
1591 return -1;
1592 }
1593
Eli Bendersky865756a2012-03-09 13:38:15 +02001594 if (value == NULL) {
1595 /* Delete slice */
1596 size_t cur;
1597 Py_ssize_t i;
1598
1599 if (slicelen <= 0)
1600 return 0;
1601
1602 /* Since we're deleting, the direction of the range doesn't matter,
1603 * so for simplicity make it always ascending.
1604 */
1605 if (step < 0) {
1606 stop = start + 1;
1607 start = stop + step * (slicelen - 1) - 1;
1608 step = -step;
1609 }
1610
1611 assert((size_t)slicelen <= PY_SIZE_MAX / sizeof(PyObject *));
1612
1613 /* recycle is a list that will contain all the children
1614 * scheduled for removal.
1615 */
1616 if (!(recycle = PyList_New(slicelen))) {
1617 PyErr_NoMemory();
1618 return -1;
1619 }
1620
1621 /* This loop walks over all the children that have to be deleted,
1622 * with cur pointing at them. num_moved is the amount of children
1623 * until the next deleted child that have to be "shifted down" to
1624 * occupy the deleted's places.
1625 * Note that in the ith iteration, shifting is done i+i places down
1626 * because i children were already removed.
1627 */
1628 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1629 /* Compute how many children have to be moved, clipping at the
1630 * list end.
1631 */
1632 Py_ssize_t num_moved = step - 1;
1633 if (cur + step >= (size_t)self->extra->length) {
1634 num_moved = self->extra->length - cur - 1;
1635 }
1636
1637 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1638
1639 memmove(
1640 self->extra->children + cur - i,
1641 self->extra->children + cur + 1,
1642 num_moved * sizeof(PyObject *));
1643 }
1644
1645 /* Leftover "tail" after the last removed child */
1646 cur = start + (size_t)slicelen * step;
1647 if (cur < (size_t)self->extra->length) {
1648 memmove(
1649 self->extra->children + cur - slicelen,
1650 self->extra->children + cur,
1651 (self->extra->length - cur) * sizeof(PyObject *));
1652 }
1653
1654 self->extra->length -= slicelen;
1655
1656 /* Discard the recycle list with all the deleted sub-elements */
1657 Py_XDECREF(recycle);
1658 return 0;
1659 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001660 else {
Eli Bendersky865756a2012-03-09 13:38:15 +02001661 /* A new slice is actually being assigned */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001662 seq = PySequence_Fast(value, "");
1663 if (!seq) {
1664 PyErr_Format(
1665 PyExc_TypeError,
1666 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1667 );
1668 return -1;
1669 }
1670 newlen = PySequence_Size(seq);
1671 }
1672
1673 if (step != 1 && newlen != slicelen)
1674 {
1675 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001676 "attempt to assign sequence of size %zd "
1677 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001678 newlen, slicelen
1679 );
1680 return -1;
1681 }
1682
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001683 /* Resize before creating the recycle bin, to prevent refleaks. */
1684 if (newlen > slicelen) {
1685 if (element_resize(self, newlen - slicelen) < 0) {
1686 if (seq) {
1687 Py_DECREF(seq);
1688 }
1689 return -1;
1690 }
1691 }
1692
1693 if (slicelen > 0) {
1694 /* to avoid recursive calls to this method (via decref), move
1695 old items to the recycle bin here, and get rid of them when
1696 we're done modifying the element */
1697 recycle = PyList_New(slicelen);
1698 if (!recycle) {
1699 if (seq) {
1700 Py_DECREF(seq);
1701 }
1702 return -1;
1703 }
1704 for (cur = start, i = 0; i < slicelen;
1705 cur += step, i++)
1706 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1707 }
1708
1709 if (newlen < slicelen) {
1710 /* delete slice */
1711 for (i = stop; i < self->extra->length; i++)
1712 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1713 } else if (newlen > slicelen) {
1714 /* insert slice */
1715 for (i = self->extra->length-1; i >= stop; i--)
1716 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1717 }
1718
1719 /* replace the slice */
1720 for (cur = start, i = 0; i < newlen;
1721 cur += step, i++) {
1722 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1723 Py_INCREF(element);
1724 self->extra->children[cur] = element;
1725 }
1726
1727 self->extra->length += newlen - slicelen;
1728
1729 if (seq) {
1730 Py_DECREF(seq);
1731 }
1732
1733 /* discard the recycle bin, and everything in it */
1734 Py_XDECREF(recycle);
1735
1736 return 0;
1737 }
1738 else {
1739 PyErr_SetString(PyExc_TypeError,
1740 "element indices must be integers");
1741 return -1;
1742 }
1743}
1744
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001745static PyMethodDef element_methods[] = {
1746
Eli Bendersky0192ba32012-03-30 16:38:33 +03001747 {"clear", (PyCFunction) element_clearmethod, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001748
Eli Benderskya8736902013-01-05 06:26:39 -08001749 {"get", (PyCFunction) element_get, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001750 {"set", (PyCFunction) element_set, METH_VARARGS},
1751
Eli Bendersky737b1732012-05-29 06:02:56 +03001752 {"find", (PyCFunction) element_find, METH_VARARGS | METH_KEYWORDS},
1753 {"findtext", (PyCFunction) element_findtext, METH_VARARGS | METH_KEYWORDS},
1754 {"findall", (PyCFunction) element_findall, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001755
1756 {"append", (PyCFunction) element_append, METH_VARARGS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001757 {"extend", (PyCFunction) element_extend, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001758 {"insert", (PyCFunction) element_insert, METH_VARARGS},
1759 {"remove", (PyCFunction) element_remove, METH_VARARGS},
1760
Eli Benderskya8736902013-01-05 06:26:39 -08001761 {"iter", (PyCFunction) element_iter, METH_VARARGS | METH_KEYWORDS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001762 {"itertext", (PyCFunction) element_itertext, METH_VARARGS},
Eli Bendersky737b1732012-05-29 06:02:56 +03001763 {"iterfind", (PyCFunction) element_iterfind, METH_VARARGS | METH_KEYWORDS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001764
Eli Benderskya8736902013-01-05 06:26:39 -08001765 {"getiterator", (PyCFunction) element_iter, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001766 {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS},
1767
1768 {"items", (PyCFunction) element_items, METH_VARARGS},
1769 {"keys", (PyCFunction) element_keys, METH_VARARGS},
1770
1771 {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS},
1772
1773 {"__copy__", (PyCFunction) element_copy, METH_VARARGS},
1774 {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS},
Martin v. Löwisbce16662012-06-17 10:41:22 +02001775 {"__sizeof__", element_sizeof, METH_NOARGS},
Eli Bendersky698bdb22013-01-10 06:01:06 -08001776 {"__getstate__", (PyCFunction)element_getstate, METH_NOARGS},
1777 {"__setstate__", (PyCFunction)element_setstate, METH_O},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001778
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001779 {NULL, NULL}
1780};
1781
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001782static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001783element_getattro(ElementObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001784{
1785 PyObject* res;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001786 char *name = "";
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001787
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001788 if (PyUnicode_Check(nameobj))
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001789 name = _PyUnicode_AsString(nameobj);
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001790
Alexander Belopolskye239d232010-12-08 23:31:48 +00001791 if (name == NULL)
1792 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001793
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001794 /* handle common attributes first */
1795 if (strcmp(name, "tag") == 0) {
1796 res = self->tag;
1797 Py_INCREF(res);
1798 return res;
1799 } else if (strcmp(name, "text") == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001800 res = element_get_text(self);
Victor Stinner71c8b7e2013-07-11 23:08:39 +02001801 Py_XINCREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001802 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001803 }
1804
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001805 /* methods */
1806 res = PyObject_GenericGetAttr((PyObject*) self, nameobj);
1807 if (res)
1808 return res;
1809
1810 /* less common attributes */
1811 if (strcmp(name, "tail") == 0) {
1812 PyErr_Clear();
1813 res = element_get_tail(self);
1814 } else if (strcmp(name, "attrib") == 0) {
1815 PyErr_Clear();
Victor Stinner5f0af232013-07-11 23:01:36 +02001816 if (!self->extra) {
1817 if (create_extra(self, NULL) < 0)
1818 return NULL;
1819 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001820 res = element_get_attrib(self);
1821 }
1822
1823 if (!res)
1824 return NULL;
1825
1826 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001827 return res;
1828}
1829
Eli Benderskyef9683b2013-05-18 07:52:34 -07001830static int
Eli Benderskyb20df952012-05-20 06:33:29 +03001831element_setattro(ElementObject* self, PyObject* nameobj, PyObject* value)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001832{
Eli Benderskyb20df952012-05-20 06:33:29 +03001833 char *name = "";
1834 if (PyUnicode_Check(nameobj))
1835 name = _PyUnicode_AsString(nameobj);
Victor Stinner4d463432013-07-11 23:05:03 +02001836 if (name == NULL)
Eli Benderskyef9683b2013-05-18 07:52:34 -07001837 return -1;
Victor Stinner4d463432013-07-11 23:05:03 +02001838
1839 if (strcmp(name, "tag") == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001840 Py_DECREF(self->tag);
1841 self->tag = value;
1842 Py_INCREF(self->tag);
1843 } else if (strcmp(name, "text") == 0) {
1844 Py_DECREF(JOIN_OBJ(self->text));
1845 self->text = value;
1846 Py_INCREF(self->text);
1847 } else if (strcmp(name, "tail") == 0) {
1848 Py_DECREF(JOIN_OBJ(self->tail));
1849 self->tail = value;
1850 Py_INCREF(self->tail);
1851 } else if (strcmp(name, "attrib") == 0) {
Victor Stinner5f0af232013-07-11 23:01:36 +02001852 if (!self->extra) {
1853 if (create_extra(self, NULL) < 0)
1854 return -1;
1855 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001856 Py_DECREF(self->extra->attrib);
1857 self->extra->attrib = value;
1858 Py_INCREF(self->extra->attrib);
1859 } else {
Eli Benderskyef9683b2013-05-18 07:52:34 -07001860 PyErr_SetString(PyExc_AttributeError,
Eli Bendersky6a55dc32013-05-19 16:59:59 -07001861 "Can't set arbitrary attributes on Element");
Eli Benderskyef9683b2013-05-18 07:52:34 -07001862 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001863 }
1864
Eli Benderskyef9683b2013-05-18 07:52:34 -07001865 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001866}
1867
1868static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001869 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001870 0, /* sq_concat */
1871 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001872 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001873 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001874 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001875 0,
1876};
1877
1878static PyMappingMethods element_as_mapping = {
1879 (lenfunc) element_length,
1880 (binaryfunc) element_subscr,
1881 (objobjargproc) element_ass_subscr,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001882};
1883
Neal Norwitz227b5332006-03-22 09:28:35 +00001884static PyTypeObject Element_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001885 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08001886 "xml.etree.ElementTree.Element", sizeof(ElementObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001887 /* methods */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001888 (destructor)element_dealloc, /* tp_dealloc */
1889 0, /* tp_print */
1890 0, /* tp_getattr */
Eli Benderskyb20df952012-05-20 06:33:29 +03001891 0, /* tp_setattr */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001892 0, /* tp_reserved */
1893 (reprfunc)element_repr, /* tp_repr */
1894 0, /* tp_as_number */
1895 &element_as_sequence, /* tp_as_sequence */
1896 &element_as_mapping, /* tp_as_mapping */
1897 0, /* tp_hash */
1898 0, /* tp_call */
1899 0, /* tp_str */
1900 (getattrofunc)element_getattro, /* tp_getattro */
Eli Benderskyb20df952012-05-20 06:33:29 +03001901 (setattrofunc)element_setattro, /* tp_setattro */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001902 0, /* tp_as_buffer */
Eli Bendersky0192ba32012-03-30 16:38:33 +03001903 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
1904 /* tp_flags */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001905 0, /* tp_doc */
Eli Bendersky0192ba32012-03-30 16:38:33 +03001906 (traverseproc)element_gc_traverse, /* tp_traverse */
1907 (inquiry)element_gc_clear, /* tp_clear */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001908 0, /* tp_richcompare */
Eli Benderskyebf37a22012-04-03 22:02:37 +03001909 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001910 0, /* tp_iter */
1911 0, /* tp_iternext */
1912 element_methods, /* tp_methods */
1913 0, /* tp_members */
1914 0, /* tp_getset */
1915 0, /* tp_base */
1916 0, /* tp_dict */
1917 0, /* tp_descr_get */
1918 0, /* tp_descr_set */
1919 0, /* tp_dictoffset */
1920 (initproc)element_init, /* tp_init */
1921 PyType_GenericAlloc, /* tp_alloc */
1922 element_new, /* tp_new */
1923 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001924};
1925
Eli Bendersky64d11e62012-06-15 07:42:50 +03001926/******************************* Element iterator ****************************/
1927
1928/* ElementIterObject represents the iteration state over an XML element in
1929 * pre-order traversal. To keep track of which sub-element should be returned
1930 * next, a stack of parents is maintained. This is a standard stack-based
1931 * iterative pre-order traversal of a tree.
1932 * The stack is managed using a single-linked list starting at parent_stack.
1933 * Each stack node contains the saved parent to which we should return after
1934 * the current one is exhausted, and the next child to examine in that parent.
1935 */
1936typedef struct ParentLocator_t {
1937 ElementObject *parent;
1938 Py_ssize_t child_index;
1939 struct ParentLocator_t *next;
1940} ParentLocator;
1941
1942typedef struct {
1943 PyObject_HEAD
1944 ParentLocator *parent_stack;
1945 ElementObject *root_element;
1946 PyObject *sought_tag;
1947 int root_done;
1948 int gettext;
1949} ElementIterObject;
1950
1951
1952static void
1953elementiter_dealloc(ElementIterObject *it)
1954{
1955 ParentLocator *p = it->parent_stack;
1956 while (p) {
1957 ParentLocator *temp = p;
1958 Py_XDECREF(p->parent);
1959 p = p->next;
1960 PyObject_Free(temp);
1961 }
1962
1963 Py_XDECREF(it->sought_tag);
1964 Py_XDECREF(it->root_element);
1965
1966 PyObject_GC_UnTrack(it);
1967 PyObject_GC_Del(it);
1968}
1969
1970static int
1971elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
1972{
1973 ParentLocator *p = it->parent_stack;
1974 while (p) {
1975 Py_VISIT(p->parent);
1976 p = p->next;
1977 }
1978
1979 Py_VISIT(it->root_element);
1980 Py_VISIT(it->sought_tag);
1981 return 0;
1982}
1983
1984/* Helper function for elementiter_next. Add a new parent to the parent stack.
1985 */
1986static ParentLocator *
1987parent_stack_push_new(ParentLocator *stack, ElementObject *parent)
1988{
1989 ParentLocator *new_node = PyObject_Malloc(sizeof(ParentLocator));
1990 if (new_node) {
1991 new_node->parent = parent;
1992 Py_INCREF(parent);
1993 new_node->child_index = 0;
1994 new_node->next = stack;
1995 }
1996 return new_node;
1997}
1998
1999static PyObject *
2000elementiter_next(ElementIterObject *it)
2001{
2002 /* Sub-element iterator.
Eli Bendersky45839902013-01-13 05:14:47 -08002003 *
Eli Bendersky64d11e62012-06-15 07:42:50 +03002004 * A short note on gettext: this function serves both the iter() and
2005 * itertext() methods to avoid code duplication. However, there are a few
2006 * small differences in the way these iterations work. Namely:
2007 * - itertext() only yields text from nodes that have it, and continues
2008 * iterating when a node doesn't have text (so it doesn't return any
2009 * node like iter())
2010 * - itertext() also has to handle tail, after finishing with all the
2011 * children of a node.
2012 */
Eli Bendersky113da642012-06-15 07:52:49 +03002013 ElementObject *cur_parent;
2014 Py_ssize_t child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002015
2016 while (1) {
2017 /* Handle the case reached in the beginning and end of iteration, where
2018 * the parent stack is empty. The root_done flag gives us indication
2019 * whether we've just started iterating (so root_done is 0), in which
2020 * case the root is returned. If root_done is 1 and we're here, the
2021 * iterator is exhausted.
2022 */
2023 if (!it->parent_stack->parent) {
2024 if (it->root_done) {
2025 PyErr_SetNone(PyExc_StopIteration);
2026 return NULL;
2027 } else {
2028 it->parent_stack = parent_stack_push_new(it->parent_stack,
2029 it->root_element);
2030 if (!it->parent_stack) {
2031 PyErr_NoMemory();
2032 return NULL;
2033 }
2034
2035 it->root_done = 1;
2036 if (it->sought_tag == Py_None ||
2037 PyObject_RichCompareBool(it->root_element->tag,
2038 it->sought_tag, Py_EQ) == 1) {
2039 if (it->gettext) {
Eli Benderskye6174ca2013-01-10 06:27:53 -08002040 PyObject *text = element_get_text(it->root_element);
2041 if (!text)
2042 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002043 if (PyObject_IsTrue(text)) {
2044 Py_INCREF(text);
2045 return text;
2046 }
2047 } else {
2048 Py_INCREF(it->root_element);
2049 return (PyObject *)it->root_element;
2050 }
2051 }
2052 }
2053 }
2054
2055 /* See if there are children left to traverse in the current parent. If
2056 * yes, visit the next child. If not, pop the stack and try again.
2057 */
Eli Bendersky113da642012-06-15 07:52:49 +03002058 cur_parent = it->parent_stack->parent;
2059 child_index = it->parent_stack->child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002060 if (cur_parent->extra && child_index < cur_parent->extra->length) {
2061 ElementObject *child = (ElementObject *)
2062 cur_parent->extra->children[child_index];
2063 it->parent_stack->child_index++;
2064 it->parent_stack = parent_stack_push_new(it->parent_stack,
2065 child);
2066 if (!it->parent_stack) {
2067 PyErr_NoMemory();
2068 return NULL;
2069 }
2070
2071 if (it->gettext) {
Eli Benderskye6174ca2013-01-10 06:27:53 -08002072 PyObject *text = element_get_text(child);
2073 if (!text)
2074 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002075 if (PyObject_IsTrue(text)) {
2076 Py_INCREF(text);
2077 return text;
2078 }
2079 } else if (it->sought_tag == Py_None ||
2080 PyObject_RichCompareBool(child->tag,
2081 it->sought_tag, Py_EQ) == 1) {
2082 Py_INCREF(child);
2083 return (PyObject *)child;
2084 }
2085 else
2086 continue;
2087 }
2088 else {
Eli Benderskye6174ca2013-01-10 06:27:53 -08002089 PyObject *tail;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002090 ParentLocator *next = it->parent_stack->next;
Eli Benderskye6174ca2013-01-10 06:27:53 -08002091 if (it->gettext) {
2092 tail = element_get_tail(cur_parent);
2093 if (!tail)
2094 return NULL;
2095 }
2096 else
2097 tail = Py_None;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002098 Py_XDECREF(it->parent_stack->parent);
2099 PyObject_Free(it->parent_stack);
2100 it->parent_stack = next;
2101
2102 /* Note that extra condition on it->parent_stack->parent here;
2103 * this is because itertext() is supposed to only return *inner*
2104 * text, not text following the element it began iteration with.
2105 */
2106 if (it->parent_stack->parent && PyObject_IsTrue(tail)) {
2107 Py_INCREF(tail);
2108 return tail;
2109 }
2110 }
2111 }
2112
2113 return NULL;
2114}
2115
2116
2117static PyTypeObject ElementIter_Type = {
2118 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002119 /* Using the module's name since the pure-Python implementation does not
2120 have such a type. */
Eli Bendersky64d11e62012-06-15 07:42:50 +03002121 "_elementtree._element_iterator", /* tp_name */
2122 sizeof(ElementIterObject), /* tp_basicsize */
2123 0, /* tp_itemsize */
2124 /* methods */
2125 (destructor)elementiter_dealloc, /* tp_dealloc */
2126 0, /* tp_print */
2127 0, /* tp_getattr */
2128 0, /* tp_setattr */
2129 0, /* tp_reserved */
2130 0, /* tp_repr */
2131 0, /* tp_as_number */
2132 0, /* tp_as_sequence */
2133 0, /* tp_as_mapping */
2134 0, /* tp_hash */
2135 0, /* tp_call */
2136 0, /* tp_str */
2137 0, /* tp_getattro */
2138 0, /* tp_setattro */
2139 0, /* tp_as_buffer */
2140 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2141 0, /* tp_doc */
2142 (traverseproc)elementiter_traverse, /* tp_traverse */
2143 0, /* tp_clear */
2144 0, /* tp_richcompare */
2145 0, /* tp_weaklistoffset */
2146 PyObject_SelfIter, /* tp_iter */
2147 (iternextfunc)elementiter_next, /* tp_iternext */
2148 0, /* tp_methods */
2149 0, /* tp_members */
2150 0, /* tp_getset */
2151 0, /* tp_base */
2152 0, /* tp_dict */
2153 0, /* tp_descr_get */
2154 0, /* tp_descr_set */
2155 0, /* tp_dictoffset */
2156 0, /* tp_init */
2157 0, /* tp_alloc */
2158 0, /* tp_new */
2159};
2160
2161
2162static PyObject *
2163create_elementiter(ElementObject *self, PyObject *tag, int gettext)
2164{
2165 ElementIterObject *it;
2166 PyObject *star = NULL;
2167
2168 it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
2169 if (!it)
2170 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002171
2172 if (PyUnicode_Check(tag))
2173 star = PyUnicode_FromString("*");
2174 else if (PyBytes_Check(tag))
2175 star = PyBytes_FromString("*");
2176
2177 if (star && PyObject_RichCompareBool(tag, star, Py_EQ) == 1)
2178 tag = Py_None;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002179 Py_XDECREF(star);
Victor Stinner4d463432013-07-11 23:05:03 +02002180
2181 Py_INCREF(tag);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002182 it->sought_tag = tag;
2183 it->root_done = 0;
2184 it->gettext = gettext;
Victor Stinner4d463432013-07-11 23:05:03 +02002185 Py_INCREF(self);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002186 it->root_element = self;
2187
Eli Bendersky64d11e62012-06-15 07:42:50 +03002188 PyObject_GC_Track(it);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002189
2190 it->parent_stack = PyObject_Malloc(sizeof(ParentLocator));
2191 if (it->parent_stack == NULL) {
2192 Py_DECREF(it);
2193 PyErr_NoMemory();
2194 return NULL;
2195 }
2196 it->parent_stack->parent = NULL;
2197 it->parent_stack->child_index = 0;
2198 it->parent_stack->next = NULL;
2199
Eli Bendersky64d11e62012-06-15 07:42:50 +03002200 return (PyObject *)it;
2201}
2202
2203
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002204/* ==================================================================== */
2205/* the tree builder type */
2206
2207typedef struct {
2208 PyObject_HEAD
2209
Eli Bendersky58d548d2012-05-29 15:45:16 +03002210 PyObject *root; /* root node (first created node) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002211
Antoine Pitrouee329312012-10-04 19:53:29 +02002212 PyObject *this; /* current node */
2213 PyObject *last; /* most recently created node */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002214
Eli Bendersky58d548d2012-05-29 15:45:16 +03002215 PyObject *data; /* data collector (string or list), or NULL */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002216
Eli Bendersky58d548d2012-05-29 15:45:16 +03002217 PyObject *stack; /* element stack */
2218 Py_ssize_t index; /* current stack size (0 means empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002219
Eli Bendersky48d358b2012-05-30 17:57:50 +03002220 PyObject *element_factory;
2221
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002222 /* element tracing */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002223 PyObject *events; /* list of events, or NULL if not collecting */
2224 PyObject *start_event_obj; /* event objects (NULL to ignore) */
2225 PyObject *end_event_obj;
2226 PyObject *start_ns_event_obj;
2227 PyObject *end_ns_event_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002228} TreeBuilderObject;
2229
Christian Heimes90aa7642007-12-19 02:45:37 +00002230#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002231
2232/* -------------------------------------------------------------------- */
2233/* constructor and destructor */
2234
Eli Bendersky58d548d2012-05-29 15:45:16 +03002235static PyObject *
2236treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002237{
Eli Bendersky58d548d2012-05-29 15:45:16 +03002238 TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
2239 if (t != NULL) {
2240 t->root = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002241
Eli Bendersky58d548d2012-05-29 15:45:16 +03002242 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002243 t->this = Py_None;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002244 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002245 t->last = Py_None;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002246
Eli Bendersky58d548d2012-05-29 15:45:16 +03002247 t->data = NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002248 t->element_factory = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002249 t->stack = PyList_New(20);
2250 if (!t->stack) {
2251 Py_DECREF(t->this);
2252 Py_DECREF(t->last);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002253 Py_DECREF((PyObject *) t);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002254 return NULL;
2255 }
2256 t->index = 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002257
Eli Bendersky58d548d2012-05-29 15:45:16 +03002258 t->events = NULL;
2259 t->start_event_obj = t->end_event_obj = NULL;
2260 t->start_ns_event_obj = t->end_ns_event_obj = NULL;
2261 }
2262 return (PyObject *)t;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002263}
2264
Eli Bendersky58d548d2012-05-29 15:45:16 +03002265static int
2266treebuilder_init(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002267{
Eli Benderskyc68e1362012-06-03 06:09:42 +03002268 static char *kwlist[] = {"element_factory", 0};
Eli Bendersky48d358b2012-05-30 17:57:50 +03002269 PyObject *element_factory = NULL;
2270 TreeBuilderObject *self_tb = (TreeBuilderObject *)self;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002271 PyObject *tmp;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002272
2273 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:TreeBuilder", kwlist,
2274 &element_factory)) {
2275 return -1;
2276 }
2277
2278 if (element_factory) {
2279 Py_INCREF(element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002280 tmp = self_tb->element_factory;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002281 self_tb->element_factory = element_factory;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002282 Py_XDECREF(tmp);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002283 }
2284
Eli Bendersky58d548d2012-05-29 15:45:16 +03002285 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002286}
2287
Eli Bendersky48d358b2012-05-30 17:57:50 +03002288static int
2289treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
2290{
2291 Py_VISIT(self->root);
2292 Py_VISIT(self->this);
2293 Py_VISIT(self->last);
2294 Py_VISIT(self->data);
2295 Py_VISIT(self->stack);
2296 Py_VISIT(self->element_factory);
2297 return 0;
2298}
2299
2300static int
2301treebuilder_gc_clear(TreeBuilderObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002302{
Antoine Pitrouc1948842012-10-01 23:40:37 +02002303 Py_CLEAR(self->end_ns_event_obj);
2304 Py_CLEAR(self->start_ns_event_obj);
2305 Py_CLEAR(self->end_event_obj);
2306 Py_CLEAR(self->start_event_obj);
2307 Py_CLEAR(self->events);
2308 Py_CLEAR(self->stack);
2309 Py_CLEAR(self->data);
2310 Py_CLEAR(self->last);
2311 Py_CLEAR(self->this);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002312 Py_CLEAR(self->element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002313 Py_CLEAR(self->root);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002314 return 0;
2315}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002316
Eli Bendersky48d358b2012-05-30 17:57:50 +03002317static void
2318treebuilder_dealloc(TreeBuilderObject *self)
2319{
2320 PyObject_GC_UnTrack(self);
2321 treebuilder_gc_clear(self);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002322 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002323}
2324
2325/* -------------------------------------------------------------------- */
Antoine Pitrouee329312012-10-04 19:53:29 +02002326/* helpers for handling of arbitrary element-like objects */
2327
2328static int
2329treebuilder_set_element_text_or_tail(PyObject *element, PyObject *data,
2330 PyObject **dest, _Py_Identifier *name)
2331{
2332 if (Element_CheckExact(element)) {
2333 Py_DECREF(JOIN_OBJ(*dest));
2334 *dest = JOIN_SET(data, PyList_CheckExact(data));
2335 return 0;
2336 }
2337 else {
2338 PyObject *joined = list_join(data);
2339 int r;
2340 if (joined == NULL)
2341 return -1;
2342 r = _PyObject_SetAttrId(element, name, joined);
2343 Py_DECREF(joined);
2344 return r;
2345 }
2346}
2347
2348/* These two functions steal a reference to data */
2349static int
2350treebuilder_set_element_text(PyObject *element, PyObject *data)
2351{
2352 _Py_IDENTIFIER(text);
2353 return treebuilder_set_element_text_or_tail(
2354 element, data, &((ElementObject *) element)->text, &PyId_text);
2355}
2356
2357static int
2358treebuilder_set_element_tail(PyObject *element, PyObject *data)
2359{
2360 _Py_IDENTIFIER(tail);
2361 return treebuilder_set_element_text_or_tail(
2362 element, data, &((ElementObject *) element)->tail, &PyId_tail);
2363}
2364
2365static int
2366treebuilder_add_subelement(PyObject *element, PyObject *child)
2367{
2368 _Py_IDENTIFIER(append);
2369 if (Element_CheckExact(element)) {
2370 ElementObject *elem = (ElementObject *) element;
2371 return element_add_subelement(elem, child);
2372 }
2373 else {
2374 PyObject *res;
2375 res = _PyObject_CallMethodId(element, &PyId_append, "O", child);
2376 if (res == NULL)
2377 return -1;
2378 Py_DECREF(res);
2379 return 0;
2380 }
2381}
2382
2383/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002384/* handlers */
2385
2386LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002387treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
2388 PyObject* attrib)
2389{
2390 PyObject* node;
2391 PyObject* this;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002392 elementtreestate *st = ET_STATE_GLOBAL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002393
2394 if (self->data) {
2395 if (self->this == self->last) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002396 if (treebuilder_set_element_text(self->last, self->data))
2397 return NULL;
2398 }
2399 else {
2400 if (treebuilder_set_element_tail(self->last, self->data))
2401 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002402 }
2403 self->data = NULL;
2404 }
2405
Eli Bendersky08231a92013-05-18 15:47:16 -07002406 if (self->element_factory && self->element_factory != Py_None) {
Eli Bendersky48d358b2012-05-30 17:57:50 +03002407 node = PyObject_CallFunction(self->element_factory, "OO", tag, attrib);
2408 } else {
2409 node = create_new_element(tag, attrib);
2410 }
2411 if (!node) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002412 return NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002413 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002414
Antoine Pitrouee329312012-10-04 19:53:29 +02002415 this = self->this;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002416
2417 if (this != Py_None) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002418 if (treebuilder_add_subelement(this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002419 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002420 } else {
2421 if (self->root) {
2422 PyErr_SetString(
Eli Bendersky532d03e2013-08-10 08:00:39 -07002423 st->parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002424 "multiple elements on top level"
2425 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002426 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002427 }
2428 Py_INCREF(node);
2429 self->root = node;
2430 }
2431
2432 if (self->index < PyList_GET_SIZE(self->stack)) {
2433 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002434 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002435 Py_INCREF(this);
2436 } else {
2437 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002438 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002439 }
2440 self->index++;
2441
2442 Py_DECREF(this);
2443 Py_INCREF(node);
Antoine Pitrouee329312012-10-04 19:53:29 +02002444 self->this = node;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002445
2446 Py_DECREF(self->last);
2447 Py_INCREF(node);
Antoine Pitrouee329312012-10-04 19:53:29 +02002448 self->last = node;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002449
2450 if (self->start_event_obj) {
2451 PyObject* res;
2452 PyObject* action = self->start_event_obj;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002453 res = PyTuple_Pack(2, action, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002454 if (res) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002455 PyList_Append(self->events, res);
2456 Py_DECREF(res);
2457 } else
2458 PyErr_Clear(); /* FIXME: propagate error */
2459 }
2460
2461 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002462
2463 error:
2464 Py_DECREF(node);
2465 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002466}
2467
2468LOCAL(PyObject*)
2469treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
2470{
2471 if (!self->data) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002472 if (self->last == Py_None) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00002473 /* ignore calls to data before the first call to start */
2474 Py_RETURN_NONE;
2475 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002476 /* store the first item as is */
2477 Py_INCREF(data); self->data = data;
2478 } else {
2479 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00002480 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
2481 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002482 /* XXX this code path unused in Python 3? */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002483 /* expat often generates single character data sections; handle
2484 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00002485 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
2486 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002487 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00002488 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002489 } else if (PyList_CheckExact(self->data)) {
2490 if (PyList_Append(self->data, data) < 0)
2491 return NULL;
2492 } else {
2493 PyObject* list = PyList_New(2);
2494 if (!list)
2495 return NULL;
2496 PyList_SET_ITEM(list, 0, self->data);
2497 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2498 self->data = list;
2499 }
2500 }
2501
2502 Py_RETURN_NONE;
2503}
2504
2505LOCAL(PyObject*)
2506treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2507{
2508 PyObject* item;
2509
2510 if (self->data) {
2511 if (self->this == self->last) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002512 if (treebuilder_set_element_text(self->last, self->data))
2513 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002514 } else {
Antoine Pitrouee329312012-10-04 19:53:29 +02002515 if (treebuilder_set_element_tail(self->last, self->data))
2516 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002517 }
2518 self->data = NULL;
2519 }
2520
2521 if (self->index == 0) {
2522 PyErr_SetString(
2523 PyExc_IndexError,
2524 "pop from empty stack"
2525 );
2526 return NULL;
2527 }
2528
2529 self->index--;
2530
2531 item = PyList_GET_ITEM(self->stack, self->index);
2532 Py_INCREF(item);
2533
2534 Py_DECREF(self->last);
2535
Antoine Pitrouee329312012-10-04 19:53:29 +02002536 self->last = self->this;
2537 self->this = item;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002538
2539 if (self->end_event_obj) {
2540 PyObject* res;
2541 PyObject* action = self->end_event_obj;
2542 PyObject* node = (PyObject*) self->last;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002543 res = PyTuple_Pack(2, action, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002544 if (res) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002545 PyList_Append(self->events, res);
2546 Py_DECREF(res);
2547 } else
2548 PyErr_Clear(); /* FIXME: propagate error */
2549 }
2550
2551 Py_INCREF(self->last);
2552 return (PyObject*) self->last;
2553}
2554
2555LOCAL(void)
2556treebuilder_handle_namespace(TreeBuilderObject* self, int start,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002557 PyObject *prefix, PyObject *uri)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002558{
2559 PyObject* res;
2560 PyObject* action;
2561 PyObject* parcel;
2562
2563 if (!self->events)
2564 return;
2565
2566 if (start) {
2567 if (!self->start_ns_event_obj)
2568 return;
2569 action = self->start_ns_event_obj;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002570 parcel = Py_BuildValue("OO", prefix, uri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002571 if (!parcel)
2572 return;
2573 Py_INCREF(action);
2574 } else {
2575 if (!self->end_ns_event_obj)
2576 return;
2577 action = self->end_ns_event_obj;
2578 Py_INCREF(action);
2579 parcel = Py_None;
2580 Py_INCREF(parcel);
2581 }
2582
2583 res = PyTuple_New(2);
2584
2585 if (res) {
2586 PyTuple_SET_ITEM(res, 0, action);
2587 PyTuple_SET_ITEM(res, 1, parcel);
2588 PyList_Append(self->events, res);
2589 Py_DECREF(res);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002590 }
2591 else {
2592 Py_DECREF(action);
2593 Py_DECREF(parcel);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002594 PyErr_Clear(); /* FIXME: propagate error */
Antoine Pitrouc1948842012-10-01 23:40:37 +02002595 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002596}
2597
2598/* -------------------------------------------------------------------- */
2599/* methods (in alphabetical order) */
2600
2601static PyObject*
2602treebuilder_data(TreeBuilderObject* self, PyObject* args)
2603{
2604 PyObject* data;
2605 if (!PyArg_ParseTuple(args, "O:data", &data))
2606 return NULL;
2607
2608 return treebuilder_handle_data(self, data);
2609}
2610
2611static PyObject*
2612treebuilder_end(TreeBuilderObject* self, PyObject* args)
2613{
2614 PyObject* tag;
2615 if (!PyArg_ParseTuple(args, "O:end", &tag))
2616 return NULL;
2617
2618 return treebuilder_handle_end(self, tag);
2619}
2620
2621LOCAL(PyObject*)
2622treebuilder_done(TreeBuilderObject* self)
2623{
2624 PyObject* res;
2625
2626 /* FIXME: check stack size? */
2627
2628 if (self->root)
2629 res = self->root;
2630 else
2631 res = Py_None;
2632
2633 Py_INCREF(res);
2634 return res;
2635}
2636
2637static PyObject*
2638treebuilder_close(TreeBuilderObject* self, PyObject* args)
2639{
2640 if (!PyArg_ParseTuple(args, ":close"))
2641 return NULL;
2642
2643 return treebuilder_done(self);
2644}
2645
2646static PyObject*
2647treebuilder_start(TreeBuilderObject* self, PyObject* args)
2648{
2649 PyObject* tag;
2650 PyObject* attrib = Py_None;
2651 if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib))
2652 return NULL;
2653
2654 return treebuilder_handle_start(self, tag, attrib);
2655}
2656
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002657static PyMethodDef treebuilder_methods[] = {
2658 {"data", (PyCFunction) treebuilder_data, METH_VARARGS},
2659 {"start", (PyCFunction) treebuilder_start, METH_VARARGS},
2660 {"end", (PyCFunction) treebuilder_end, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002661 {"close", (PyCFunction) treebuilder_close, METH_VARARGS},
2662 {NULL, NULL}
2663};
2664
Neal Norwitz227b5332006-03-22 09:28:35 +00002665static PyTypeObject TreeBuilder_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002666 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002667 "xml.etree.ElementTree.TreeBuilder", sizeof(TreeBuilderObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002668 /* methods */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002669 (destructor)treebuilder_dealloc, /* tp_dealloc */
2670 0, /* tp_print */
2671 0, /* tp_getattr */
2672 0, /* tp_setattr */
2673 0, /* tp_reserved */
2674 0, /* tp_repr */
2675 0, /* tp_as_number */
2676 0, /* tp_as_sequence */
2677 0, /* tp_as_mapping */
2678 0, /* tp_hash */
2679 0, /* tp_call */
2680 0, /* tp_str */
2681 0, /* tp_getattro */
2682 0, /* tp_setattro */
2683 0, /* tp_as_buffer */
Eli Bendersky48d358b2012-05-30 17:57:50 +03002684 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
2685 /* tp_flags */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002686 0, /* tp_doc */
Eli Bendersky48d358b2012-05-30 17:57:50 +03002687 (traverseproc)treebuilder_gc_traverse, /* tp_traverse */
2688 (inquiry)treebuilder_gc_clear, /* tp_clear */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002689 0, /* tp_richcompare */
2690 0, /* tp_weaklistoffset */
2691 0, /* tp_iter */
2692 0, /* tp_iternext */
2693 treebuilder_methods, /* tp_methods */
2694 0, /* tp_members */
2695 0, /* tp_getset */
2696 0, /* tp_base */
2697 0, /* tp_dict */
2698 0, /* tp_descr_get */
2699 0, /* tp_descr_set */
2700 0, /* tp_dictoffset */
2701 (initproc)treebuilder_init, /* tp_init */
2702 PyType_GenericAlloc, /* tp_alloc */
2703 treebuilder_new, /* tp_new */
2704 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002705};
2706
2707/* ==================================================================== */
2708/* the expat interface */
2709
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002710#include "expat.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002711#include "pyexpat.h"
Eli Bendersky532d03e2013-08-10 08:00:39 -07002712
2713/* The PyExpat_CAPI structure is an immutable dispatch table, so it can be
2714 * cached globally without being in per-module state.
2715 */
Eli Bendersky20d41742012-06-01 09:48:37 +03002716static struct PyExpat_CAPI *expat_capi;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002717#define EXPAT(func) (expat_capi->func)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002718
Eli Bendersky52467b12012-06-01 07:13:08 +03002719static XML_Memory_Handling_Suite ExpatMemoryHandler = {
2720 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
2721
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002722typedef struct {
2723 PyObject_HEAD
2724
2725 XML_Parser parser;
2726
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002727 PyObject *target;
2728 PyObject *entity;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002729
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002730 PyObject *names;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002731
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002732 PyObject *handle_start;
2733 PyObject *handle_data;
2734 PyObject *handle_end;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002735
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002736 PyObject *handle_comment;
2737 PyObject *handle_pi;
2738 PyObject *handle_doctype;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002739
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002740 PyObject *handle_close;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002741
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002742} XMLParserObject;
2743
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002744#define XMLParser_CheckExact(op) (Py_TYPE(op) == &XMLParser_Type)
2745
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002746/* helpers */
2747
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002748LOCAL(PyObject*)
2749makeuniversal(XMLParserObject* self, const char* string)
2750{
2751 /* convert a UTF-8 tag/attribute name from the expat parser
2752 to a universal name string */
2753
Antoine Pitrouc1948842012-10-01 23:40:37 +02002754 Py_ssize_t size = (Py_ssize_t) strlen(string);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002755 PyObject* key;
2756 PyObject* value;
2757
2758 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00002759 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002760 if (!key)
2761 return NULL;
2762
2763 value = PyDict_GetItem(self->names, key);
2764
2765 if (value) {
2766 Py_INCREF(value);
2767 } else {
2768 /* new name. convert to universal name, and decode as
2769 necessary */
2770
2771 PyObject* tag;
2772 char* p;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002773 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002774
2775 /* look for namespace separator */
2776 for (i = 0; i < size; i++)
2777 if (string[i] == '}')
2778 break;
2779 if (i != size) {
2780 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002781 tag = PyBytes_FromStringAndSize(NULL, size+1);
Victor Stinner71c8b7e2013-07-11 23:08:39 +02002782 if (tag == NULL) {
2783 Py_DECREF(key);
2784 return NULL;
2785 }
Christian Heimes72b710a2008-05-26 13:28:38 +00002786 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002787 p[0] = '{';
2788 memcpy(p+1, string, size);
2789 size++;
2790 } else {
2791 /* plain name; use key as tag */
2792 Py_INCREF(key);
2793 tag = key;
2794 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002795
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002796 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002797 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00002798 value = PyUnicode_DecodeUTF8(p, size, "strict");
2799 Py_DECREF(tag);
2800 if (!value) {
2801 Py_DECREF(key);
2802 return NULL;
2803 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002804
2805 /* add to names dictionary */
2806 if (PyDict_SetItem(self->names, key, value) < 0) {
2807 Py_DECREF(key);
2808 Py_DECREF(value);
2809 return NULL;
2810 }
2811 }
2812
2813 Py_DECREF(key);
2814 return value;
2815}
2816
Eli Bendersky5b77d812012-03-16 08:20:05 +02002817/* Set the ParseError exception with the given parameters.
2818 * If message is not NULL, it's used as the error string. Otherwise, the
2819 * message string is the default for the given error_code.
2820*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002821static void
Eli Bendersky5b77d812012-03-16 08:20:05 +02002822expat_set_error(enum XML_Error error_code, int line, int column, char *message)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002823{
Eli Bendersky5b77d812012-03-16 08:20:05 +02002824 PyObject *errmsg, *error, *position, *code;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002825 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002826
Victor Stinner499dfcf2011-03-21 13:26:24 +01002827 errmsg = PyUnicode_FromFormat("%s: line %d, column %d",
Eli Bendersky5b77d812012-03-16 08:20:05 +02002828 message ? message : EXPAT(ErrorString)(error_code),
2829 line, column);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002830 if (errmsg == NULL)
2831 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002832
Eli Bendersky532d03e2013-08-10 08:00:39 -07002833 error = PyObject_CallFunction(st->parseerror_obj, "O", errmsg);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002834 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002835 if (!error)
2836 return;
2837
Eli Bendersky5b77d812012-03-16 08:20:05 +02002838 /* Add code and position attributes */
2839 code = PyLong_FromLong((long)error_code);
2840 if (!code) {
2841 Py_DECREF(error);
2842 return;
2843 }
2844 if (PyObject_SetAttrString(error, "code", code) == -1) {
2845 Py_DECREF(error);
2846 Py_DECREF(code);
2847 return;
2848 }
2849 Py_DECREF(code);
2850
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002851 position = Py_BuildValue("(ii)", line, column);
2852 if (!position) {
2853 Py_DECREF(error);
2854 return;
2855 }
2856 if (PyObject_SetAttrString(error, "position", position) == -1) {
2857 Py_DECREF(error);
2858 Py_DECREF(position);
2859 return;
2860 }
2861 Py_DECREF(position);
2862
Eli Bendersky532d03e2013-08-10 08:00:39 -07002863 PyErr_SetObject(st->parseerror_obj, error);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002864 Py_DECREF(error);
2865}
2866
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002867/* -------------------------------------------------------------------- */
2868/* handlers */
2869
2870static void
2871expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2872 int data_len)
2873{
2874 PyObject* key;
2875 PyObject* value;
2876 PyObject* res;
2877
2878 if (data_len < 2 || data_in[0] != '&')
2879 return;
2880
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002881 if (PyErr_Occurred())
2882 return;
2883
Neal Norwitz0269b912007-08-08 06:56:02 +00002884 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002885 if (!key)
2886 return;
2887
2888 value = PyDict_GetItem(self->entity, key);
2889
2890 if (value) {
2891 if (TreeBuilder_CheckExact(self->target))
2892 res = treebuilder_handle_data(
2893 (TreeBuilderObject*) self->target, value
2894 );
2895 else if (self->handle_data)
2896 res = PyObject_CallFunction(self->handle_data, "O", value);
2897 else
2898 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002899 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002900 } else if (!PyErr_Occurred()) {
2901 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00002902 char message[128] = "undefined entity ";
2903 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002904 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02002905 XML_ERROR_UNDEFINED_ENTITY,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002906 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02002907 EXPAT(GetErrorColumnNumber)(self->parser),
2908 message
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002909 );
2910 }
2911
2912 Py_DECREF(key);
2913}
2914
2915static void
2916expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2917 const XML_Char **attrib_in)
2918{
2919 PyObject* res;
2920 PyObject* tag;
2921 PyObject* attrib;
2922 int ok;
2923
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002924 if (PyErr_Occurred())
2925 return;
2926
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002927 /* tag name */
2928 tag = makeuniversal(self, tag_in);
2929 if (!tag)
2930 return; /* parser will look for errors */
2931
2932 /* attributes */
2933 if (attrib_in[0]) {
2934 attrib = PyDict_New();
2935 if (!attrib)
2936 return;
2937 while (attrib_in[0] && attrib_in[1]) {
2938 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00002939 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002940 if (!key || !value) {
2941 Py_XDECREF(value);
2942 Py_XDECREF(key);
2943 Py_DECREF(attrib);
2944 return;
2945 }
2946 ok = PyDict_SetItem(attrib, key, value);
2947 Py_DECREF(value);
2948 Py_DECREF(key);
2949 if (ok < 0) {
2950 Py_DECREF(attrib);
2951 return;
2952 }
2953 attrib_in += 2;
2954 }
2955 } else {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002956 /* Pass an empty dictionary on */
Eli Bendersky48d358b2012-05-30 17:57:50 +03002957 attrib = PyDict_New();
2958 if (!attrib)
2959 return;
2960 }
2961
2962 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002963 /* shortcut */
2964 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2965 tag, attrib);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002966 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002967 else if (self->handle_start) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002968 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002969 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002970 res = NULL;
2971
2972 Py_DECREF(tag);
2973 Py_DECREF(attrib);
2974
2975 Py_XDECREF(res);
2976}
2977
2978static void
2979expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2980 int data_len)
2981{
2982 PyObject* data;
2983 PyObject* res;
2984
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002985 if (PyErr_Occurred())
2986 return;
2987
Neal Norwitz0269b912007-08-08 06:56:02 +00002988 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002989 if (!data)
2990 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002991
2992 if (TreeBuilder_CheckExact(self->target))
2993 /* shortcut */
2994 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
2995 else if (self->handle_data)
2996 res = PyObject_CallFunction(self->handle_data, "O", data);
2997 else
2998 res = NULL;
2999
3000 Py_DECREF(data);
3001
3002 Py_XDECREF(res);
3003}
3004
3005static void
3006expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
3007{
3008 PyObject* tag;
3009 PyObject* res = NULL;
3010
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003011 if (PyErr_Occurred())
3012 return;
3013
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003014 if (TreeBuilder_CheckExact(self->target))
3015 /* shortcut */
3016 /* the standard tree builder doesn't look at the end tag */
3017 res = treebuilder_handle_end(
3018 (TreeBuilderObject*) self->target, Py_None
3019 );
3020 else if (self->handle_end) {
3021 tag = makeuniversal(self, tag_in);
3022 if (tag) {
3023 res = PyObject_CallFunction(self->handle_end, "O", tag);
3024 Py_DECREF(tag);
3025 }
3026 }
3027
3028 Py_XDECREF(res);
3029}
3030
3031static void
3032expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
3033 const XML_Char *uri)
3034{
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003035 PyObject* sprefix = NULL;
3036 PyObject* suri = NULL;
3037
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003038 if (PyErr_Occurred())
3039 return;
3040
Eli Bendersky5dd40e52013-11-28 06:31:58 -08003041 if (uri)
Eli Bendersky4b795182013-11-28 06:33:21 -08003042 suri = PyUnicode_DecodeUTF8(uri, strlen(uri), "strict");
Eli Bendersky5dd40e52013-11-28 06:31:58 -08003043 else
Eli Bendersky4b795182013-11-28 06:33:21 -08003044 suri = PyUnicode_FromString("");
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003045 if (!suri)
3046 return;
3047
3048 if (prefix)
3049 sprefix = PyUnicode_DecodeUTF8(prefix, strlen(prefix), "strict");
3050 else
3051 sprefix = PyUnicode_FromString("");
3052 if (!sprefix) {
3053 Py_DECREF(suri);
3054 return;
3055 }
3056
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003057 treebuilder_handle_namespace(
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003058 (TreeBuilderObject*) self->target, 1, sprefix, suri
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003059 );
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003060
3061 Py_DECREF(sprefix);
3062 Py_DECREF(suri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003063}
3064
3065static void
3066expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
3067{
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003068 if (PyErr_Occurred())
3069 return;
3070
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003071 treebuilder_handle_namespace(
3072 (TreeBuilderObject*) self->target, 0, NULL, NULL
3073 );
3074}
3075
3076static void
3077expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
3078{
3079 PyObject* comment;
3080 PyObject* res;
3081
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003082 if (PyErr_Occurred())
3083 return;
3084
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003085 if (self->handle_comment) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003086 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003087 if (comment) {
3088 res = PyObject_CallFunction(self->handle_comment, "O", comment);
3089 Py_XDECREF(res);
3090 Py_DECREF(comment);
3091 }
3092 }
3093}
3094
Eli Bendersky45839902013-01-13 05:14:47 -08003095static void
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003096expat_start_doctype_handler(XMLParserObject *self,
3097 const XML_Char *doctype_name,
3098 const XML_Char *sysid,
3099 const XML_Char *pubid,
3100 int has_internal_subset)
3101{
3102 PyObject *self_pyobj = (PyObject *)self;
3103 PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
3104 PyObject *parser_doctype = NULL;
3105 PyObject *res = NULL;
3106
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003107 if (PyErr_Occurred())
3108 return;
3109
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003110 doctype_name_obj = makeuniversal(self, doctype_name);
3111 if (!doctype_name_obj)
3112 return;
3113
3114 if (sysid) {
3115 sysid_obj = makeuniversal(self, sysid);
3116 if (!sysid_obj) {
3117 Py_DECREF(doctype_name_obj);
3118 return;
3119 }
3120 } else {
3121 Py_INCREF(Py_None);
3122 sysid_obj = Py_None;
3123 }
3124
3125 if (pubid) {
3126 pubid_obj = makeuniversal(self, pubid);
3127 if (!pubid_obj) {
3128 Py_DECREF(doctype_name_obj);
3129 Py_DECREF(sysid_obj);
3130 return;
3131 }
3132 } else {
3133 Py_INCREF(Py_None);
3134 pubid_obj = Py_None;
3135 }
3136
3137 /* If the target has a handler for doctype, call it. */
3138 if (self->handle_doctype) {
3139 res = PyObject_CallFunction(self->handle_doctype, "OOO",
3140 doctype_name_obj, pubid_obj, sysid_obj);
3141 Py_CLEAR(res);
3142 }
3143
3144 /* Now see if the parser itself has a doctype method. If yes and it's
3145 * a subclass, call it but warn about deprecation. If it's not a subclass
3146 * (i.e. vanilla XMLParser), do nothing.
3147 */
3148 parser_doctype = PyObject_GetAttrString(self_pyobj, "doctype");
3149 if (parser_doctype) {
3150 if (!XMLParser_CheckExact(self_pyobj)) {
3151 if (PyErr_WarnEx(PyExc_DeprecationWarning,
3152 "This method of XMLParser is deprecated. Define"
3153 " doctype() method on the TreeBuilder target.",
3154 1) < 0) {
3155 goto clear;
3156 }
3157 res = PyObject_CallFunction(parser_doctype, "OOO",
3158 doctype_name_obj, pubid_obj, sysid_obj);
3159 Py_CLEAR(res);
3160 }
3161 }
3162
3163clear:
3164 Py_XDECREF(parser_doctype);
3165 Py_DECREF(doctype_name_obj);
3166 Py_DECREF(pubid_obj);
3167 Py_DECREF(sysid_obj);
3168}
3169
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003170static void
3171expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
3172 const XML_Char* data_in)
3173{
3174 PyObject* target;
3175 PyObject* data;
3176 PyObject* res;
3177
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003178 if (PyErr_Occurred())
3179 return;
3180
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003181 if (self->handle_pi) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003182 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3183 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003184 if (target && data) {
3185 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
3186 Py_XDECREF(res);
3187 Py_DECREF(data);
3188 Py_DECREF(target);
3189 } else {
3190 Py_XDECREF(data);
3191 Py_XDECREF(target);
3192 }
3193 }
3194}
3195
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003196/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003197
Eli Bendersky52467b12012-06-01 07:13:08 +03003198static PyObject *
3199xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003200{
Eli Bendersky52467b12012-06-01 07:13:08 +03003201 XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
3202 if (self) {
3203 self->parser = NULL;
3204 self->target = self->entity = self->names = NULL;
3205 self->handle_start = self->handle_data = self->handle_end = NULL;
3206 self->handle_comment = self->handle_pi = self->handle_close = NULL;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003207 self->handle_doctype = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003208 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003209 return (PyObject *)self;
3210}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003211
Eli Bendersky52467b12012-06-01 07:13:08 +03003212static int
3213xmlparser_init(PyObject *self, PyObject *args, PyObject *kwds)
3214{
3215 XMLParserObject *self_xp = (XMLParserObject *)self;
3216 PyObject *target = NULL, *html = NULL;
3217 char *encoding = NULL;
Eli Benderskyc68e1362012-06-03 06:09:42 +03003218 static char *kwlist[] = {"html", "target", "encoding", 0};
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003219
Eli Bendersky52467b12012-06-01 07:13:08 +03003220 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|OOz:XMLParser", kwlist,
3221 &html, &target, &encoding)) {
3222 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003223 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003224
Eli Bendersky52467b12012-06-01 07:13:08 +03003225 self_xp->entity = PyDict_New();
3226 if (!self_xp->entity)
3227 return -1;
3228
3229 self_xp->names = PyDict_New();
3230 if (!self_xp->names) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02003231 Py_CLEAR(self_xp->entity);
Eli Bendersky52467b12012-06-01 07:13:08 +03003232 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003233 }
3234
Eli Bendersky52467b12012-06-01 07:13:08 +03003235 self_xp->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
3236 if (!self_xp->parser) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02003237 Py_CLEAR(self_xp->entity);
3238 Py_CLEAR(self_xp->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003239 PyErr_NoMemory();
Eli Bendersky52467b12012-06-01 07:13:08 +03003240 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003241 }
3242
Eli Bendersky52467b12012-06-01 07:13:08 +03003243 if (target) {
3244 Py_INCREF(target);
3245 } else {
Eli Bendersky58d548d2012-05-29 15:45:16 +03003246 target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003247 if (!target) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02003248 Py_CLEAR(self_xp->entity);
3249 Py_CLEAR(self_xp->names);
Eli Bendersky52467b12012-06-01 07:13:08 +03003250 EXPAT(ParserFree)(self_xp->parser);
3251 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003252 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003253 }
3254 self_xp->target = target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003255
Eli Bendersky52467b12012-06-01 07:13:08 +03003256 self_xp->handle_start = PyObject_GetAttrString(target, "start");
3257 self_xp->handle_data = PyObject_GetAttrString(target, "data");
3258 self_xp->handle_end = PyObject_GetAttrString(target, "end");
3259 self_xp->handle_comment = PyObject_GetAttrString(target, "comment");
3260 self_xp->handle_pi = PyObject_GetAttrString(target, "pi");
3261 self_xp->handle_close = PyObject_GetAttrString(target, "close");
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003262 self_xp->handle_doctype = PyObject_GetAttrString(target, "doctype");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003263
3264 PyErr_Clear();
Eli Bendersky45839902013-01-13 05:14:47 -08003265
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003266 /* configure parser */
Eli Bendersky52467b12012-06-01 07:13:08 +03003267 EXPAT(SetUserData)(self_xp->parser, self_xp);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003268 EXPAT(SetElementHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003269 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003270 (XML_StartElementHandler) expat_start_handler,
3271 (XML_EndElementHandler) expat_end_handler
3272 );
3273 EXPAT(SetDefaultHandlerExpand)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003274 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003275 (XML_DefaultHandler) expat_default_handler
3276 );
3277 EXPAT(SetCharacterDataHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003278 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003279 (XML_CharacterDataHandler) expat_data_handler
3280 );
Eli Bendersky52467b12012-06-01 07:13:08 +03003281 if (self_xp->handle_comment)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003282 EXPAT(SetCommentHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003283 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003284 (XML_CommentHandler) expat_comment_handler
3285 );
Eli Bendersky52467b12012-06-01 07:13:08 +03003286 if (self_xp->handle_pi)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003287 EXPAT(SetProcessingInstructionHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003288 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003289 (XML_ProcessingInstructionHandler) expat_pi_handler
3290 );
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003291 EXPAT(SetStartDoctypeDeclHandler)(
3292 self_xp->parser,
3293 (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
3294 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003295 EXPAT(SetUnknownEncodingHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003296 self_xp->parser,
Eli Bendersky6dc32b32013-05-25 05:25:48 -07003297 EXPAT(DefaultUnknownEncodingHandler), NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003298 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003299
Eli Bendersky52467b12012-06-01 07:13:08 +03003300 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003301}
3302
Eli Bendersky52467b12012-06-01 07:13:08 +03003303static int
3304xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
3305{
3306 Py_VISIT(self->handle_close);
3307 Py_VISIT(self->handle_pi);
3308 Py_VISIT(self->handle_comment);
3309 Py_VISIT(self->handle_end);
3310 Py_VISIT(self->handle_data);
3311 Py_VISIT(self->handle_start);
3312
3313 Py_VISIT(self->target);
3314 Py_VISIT(self->entity);
3315 Py_VISIT(self->names);
3316
3317 return 0;
3318}
3319
3320static int
3321xmlparser_gc_clear(XMLParserObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003322{
3323 EXPAT(ParserFree)(self->parser);
3324
Antoine Pitrouc1948842012-10-01 23:40:37 +02003325 Py_CLEAR(self->handle_close);
3326 Py_CLEAR(self->handle_pi);
3327 Py_CLEAR(self->handle_comment);
3328 Py_CLEAR(self->handle_end);
3329 Py_CLEAR(self->handle_data);
3330 Py_CLEAR(self->handle_start);
3331 Py_CLEAR(self->handle_doctype);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003332
Antoine Pitrouc1948842012-10-01 23:40:37 +02003333 Py_CLEAR(self->target);
3334 Py_CLEAR(self->entity);
3335 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003336
Eli Bendersky52467b12012-06-01 07:13:08 +03003337 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003338}
3339
Eli Bendersky52467b12012-06-01 07:13:08 +03003340static void
3341xmlparser_dealloc(XMLParserObject* self)
3342{
3343 PyObject_GC_UnTrack(self);
3344 xmlparser_gc_clear(self);
3345 Py_TYPE(self)->tp_free((PyObject *)self);
3346}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003347
3348LOCAL(PyObject*)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003349expat_parse(XMLParserObject* self, const char* data, int data_len, int final)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003350{
3351 int ok;
3352
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003353 assert(!PyErr_Occurred());
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003354 ok = EXPAT(Parse)(self->parser, data, data_len, final);
3355
3356 if (PyErr_Occurred())
3357 return NULL;
3358
3359 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003360 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02003361 EXPAT(GetErrorCode)(self->parser),
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003362 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02003363 EXPAT(GetErrorColumnNumber)(self->parser),
3364 NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003365 );
3366 return NULL;
3367 }
3368
3369 Py_RETURN_NONE;
3370}
3371
3372static PyObject*
3373xmlparser_close(XMLParserObject* self, PyObject* args)
3374{
3375 /* end feeding data to parser */
3376
3377 PyObject* res;
3378 if (!PyArg_ParseTuple(args, ":close"))
3379 return NULL;
3380
3381 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003382 if (!res)
3383 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003384
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003385 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003386 Py_DECREF(res);
3387 return treebuilder_done((TreeBuilderObject*) self->target);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003388 }
3389 else if (self->handle_close) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003390 Py_DECREF(res);
3391 return PyObject_CallFunction(self->handle_close, "");
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003392 }
3393 else {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003394 return res;
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003395 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003396}
3397
3398static PyObject*
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003399xmlparser_feed(XMLParserObject* self, PyObject* arg)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003400{
3401 /* feed data to parser */
3402
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003403 if (PyUnicode_Check(arg)) {
3404 Py_ssize_t data_len;
3405 const char *data = PyUnicode_AsUTF8AndSize(arg, &data_len);
3406 if (data == NULL)
3407 return NULL;
3408 if (data_len > INT_MAX) {
3409 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3410 return NULL;
3411 }
3412 /* Explicitly set UTF-8 encoding. Return code ignored. */
3413 (void)EXPAT(SetEncoding)(self->parser, "utf-8");
3414 return expat_parse(self, data, (int)data_len, 0);
3415 }
3416 else {
3417 Py_buffer view;
3418 PyObject *res;
3419 if (PyObject_GetBuffer(arg, &view, PyBUF_SIMPLE) < 0)
3420 return NULL;
3421 if (view.len > INT_MAX) {
3422 PyBuffer_Release(&view);
3423 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3424 return NULL;
3425 }
3426 res = expat_parse(self, view.buf, (int)view.len, 0);
3427 PyBuffer_Release(&view);
3428 return res;
3429 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003430}
3431
3432static PyObject*
Eli Benderskya3699232013-05-19 18:47:23 -07003433xmlparser_parse_whole(XMLParserObject* self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003434{
Eli Benderskya3699232013-05-19 18:47:23 -07003435 /* (internal) parse the whole input, until end of stream */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003436 PyObject* reader;
3437 PyObject* buffer;
Eli Benderskyf996e772012-03-16 05:53:30 +02003438 PyObject* temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003439 PyObject* res;
3440
3441 PyObject* fileobj;
3442 if (!PyArg_ParseTuple(args, "O:_parse", &fileobj))
3443 return NULL;
3444
3445 reader = PyObject_GetAttrString(fileobj, "read");
3446 if (!reader)
3447 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003448
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003449 /* read from open file object */
3450 for (;;) {
3451
3452 buffer = PyObject_CallFunction(reader, "i", 64*1024);
3453
3454 if (!buffer) {
3455 /* read failed (e.g. due to KeyboardInterrupt) */
3456 Py_DECREF(reader);
3457 return NULL;
3458 }
3459
Eli Benderskyf996e772012-03-16 05:53:30 +02003460 if (PyUnicode_CheckExact(buffer)) {
3461 /* A unicode object is encoded into bytes using UTF-8 */
Victor Stinner59799a82013-11-13 14:17:30 +01003462 if (PyUnicode_GET_LENGTH(buffer) == 0) {
Eli Benderskyf996e772012-03-16 05:53:30 +02003463 Py_DECREF(buffer);
3464 break;
3465 }
3466 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
Antoine Pitrouc1948842012-10-01 23:40:37 +02003467 Py_DECREF(buffer);
Eli Benderskyf996e772012-03-16 05:53:30 +02003468 if (!temp) {
3469 /* Propagate exception from PyUnicode_AsEncodedString */
Eli Benderskyf996e772012-03-16 05:53:30 +02003470 Py_DECREF(reader);
3471 return NULL;
3472 }
Eli Benderskyf996e772012-03-16 05:53:30 +02003473 buffer = temp;
3474 }
3475 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003476 Py_DECREF(buffer);
3477 break;
3478 }
3479
3480 res = expat_parse(
Christian Heimes72b710a2008-05-26 13:28:38 +00003481 self, PyBytes_AS_STRING(buffer), PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003482 );
3483
3484 Py_DECREF(buffer);
3485
3486 if (!res) {
3487 Py_DECREF(reader);
3488 return NULL;
3489 }
3490 Py_DECREF(res);
3491
3492 }
3493
3494 Py_DECREF(reader);
3495
3496 res = expat_parse(self, "", 0, 1);
3497
3498 if (res && TreeBuilder_CheckExact(self->target)) {
3499 Py_DECREF(res);
3500 return treebuilder_done((TreeBuilderObject*) self->target);
3501 }
3502
3503 return res;
3504}
3505
3506static PyObject*
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003507xmlparser_doctype(XMLParserObject *self, PyObject *args)
3508{
3509 Py_RETURN_NONE;
3510}
3511
3512static PyObject*
3513xmlparser_setevents(XMLParserObject *self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003514{
3515 /* activate element event reporting */
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003516 Py_ssize_t i, seqlen;
3517 TreeBuilderObject *target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003518
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003519 PyObject *events_queue;
3520 PyObject *events_to_report = Py_None;
3521 PyObject *events_seq;
3522 if (!PyArg_ParseTuple(args, "O!|O:_setevents", &PyList_Type, &events_queue,
3523 &events_to_report))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003524 return NULL;
3525
3526 if (!TreeBuilder_CheckExact(self->target)) {
3527 PyErr_SetString(
3528 PyExc_TypeError,
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003529 "event handling only supported for ElementTree.TreeBuilder "
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003530 "targets"
3531 );
3532 return NULL;
3533 }
3534
3535 target = (TreeBuilderObject*) self->target;
3536
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003537 Py_INCREF(events_queue);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003538 Py_XDECREF(target->events);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003539 target->events = events_queue;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003540
3541 /* clear out existing events */
Antoine Pitrouc1948842012-10-01 23:40:37 +02003542 Py_CLEAR(target->start_event_obj);
3543 Py_CLEAR(target->end_event_obj);
3544 Py_CLEAR(target->start_ns_event_obj);
3545 Py_CLEAR(target->end_ns_event_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003546
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003547 if (events_to_report == Py_None) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003548 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003549 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003550 Py_RETURN_NONE;
3551 }
3552
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003553 if (!(events_seq = PySequence_Fast(events_to_report,
3554 "events must be a sequence"))) {
3555 return NULL;
3556 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003557
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003558 seqlen = PySequence_Size(events_seq);
3559 for (i = 0; i < seqlen; ++i) {
3560 PyObject *event_name_obj = PySequence_Fast_GET_ITEM(events_seq, i);
3561 char *event_name = NULL;
3562 if (PyUnicode_Check(event_name_obj)) {
3563 event_name = _PyUnicode_AsString(event_name_obj);
3564 } else if (PyBytes_Check(event_name_obj)) {
3565 event_name = PyBytes_AS_STRING(event_name_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003566 }
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003567
3568 if (event_name == NULL) {
3569 Py_DECREF(events_seq);
3570 PyErr_Format(PyExc_ValueError, "invalid events sequence");
3571 return NULL;
3572 } else if (strcmp(event_name, "start") == 0) {
3573 Py_INCREF(event_name_obj);
3574 target->start_event_obj = event_name_obj;
3575 } else if (strcmp(event_name, "end") == 0) {
3576 Py_INCREF(event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003577 Py_XDECREF(target->end_event_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003578 target->end_event_obj = event_name_obj;
3579 } else if (strcmp(event_name, "start-ns") == 0) {
3580 Py_INCREF(event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003581 Py_XDECREF(target->start_ns_event_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003582 target->start_ns_event_obj = event_name_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003583 EXPAT(SetNamespaceDeclHandler)(
3584 self->parser,
3585 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3586 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3587 );
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003588 } else if (strcmp(event_name, "end-ns") == 0) {
3589 Py_INCREF(event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003590 Py_XDECREF(target->end_ns_event_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003591 target->end_ns_event_obj = event_name_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003592 EXPAT(SetNamespaceDeclHandler)(
3593 self->parser,
3594 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3595 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3596 );
3597 } else {
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003598 Py_DECREF(events_seq);
3599 PyErr_Format(PyExc_ValueError, "unknown event '%s'", event_name);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003600 return NULL;
3601 }
3602 }
3603
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003604 Py_DECREF(events_seq);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003605 Py_RETURN_NONE;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003606}
3607
3608static PyMethodDef xmlparser_methods[] = {
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003609 {"feed", (PyCFunction) xmlparser_feed, METH_O},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003610 {"close", (PyCFunction) xmlparser_close, METH_VARARGS},
Eli Benderskya3699232013-05-19 18:47:23 -07003611 {"_parse_whole", (PyCFunction) xmlparser_parse_whole, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003612 {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003613 {"doctype", (PyCFunction) xmlparser_doctype, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003614 {NULL, NULL}
3615};
3616
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003617static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003618xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003619{
Alexander Belopolskye239d232010-12-08 23:31:48 +00003620 if (PyUnicode_Check(nameobj)) {
3621 PyObject* res;
3622 if (PyUnicode_CompareWithASCIIString(nameobj, "entity") == 0)
3623 res = self->entity;
3624 else if (PyUnicode_CompareWithASCIIString(nameobj, "target") == 0)
3625 res = self->target;
3626 else if (PyUnicode_CompareWithASCIIString(nameobj, "version") == 0) {
3627 return PyUnicode_FromFormat(
3628 "Expat %d.%d.%d", XML_MAJOR_VERSION,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003629 XML_MINOR_VERSION, XML_MICRO_VERSION);
Alexander Belopolskye239d232010-12-08 23:31:48 +00003630 }
3631 else
3632 goto generic;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003633
Alexander Belopolskye239d232010-12-08 23:31:48 +00003634 Py_INCREF(res);
3635 return res;
3636 }
3637 generic:
3638 return PyObject_GenericGetAttr((PyObject*) self, nameobj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003639}
3640
Neal Norwitz227b5332006-03-22 09:28:35 +00003641static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003642 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08003643 "xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003644 /* methods */
Eli Bendersky52467b12012-06-01 07:13:08 +03003645 (destructor)xmlparser_dealloc, /* tp_dealloc */
3646 0, /* tp_print */
3647 0, /* tp_getattr */
3648 0, /* tp_setattr */
3649 0, /* tp_reserved */
3650 0, /* tp_repr */
3651 0, /* tp_as_number */
3652 0, /* tp_as_sequence */
3653 0, /* tp_as_mapping */
3654 0, /* tp_hash */
3655 0, /* tp_call */
3656 0, /* tp_str */
3657 (getattrofunc)xmlparser_getattro, /* tp_getattro */
3658 0, /* tp_setattro */
3659 0, /* tp_as_buffer */
3660 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3661 /* tp_flags */
3662 0, /* tp_doc */
3663 (traverseproc)xmlparser_gc_traverse, /* tp_traverse */
3664 (inquiry)xmlparser_gc_clear, /* tp_clear */
3665 0, /* tp_richcompare */
3666 0, /* tp_weaklistoffset */
3667 0, /* tp_iter */
3668 0, /* tp_iternext */
3669 xmlparser_methods, /* tp_methods */
3670 0, /* tp_members */
3671 0, /* tp_getset */
3672 0, /* tp_base */
3673 0, /* tp_dict */
3674 0, /* tp_descr_get */
3675 0, /* tp_descr_set */
3676 0, /* tp_dictoffset */
3677 (initproc)xmlparser_init, /* tp_init */
3678 PyType_GenericAlloc, /* tp_alloc */
3679 xmlparser_new, /* tp_new */
3680 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003681};
3682
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003683/* ==================================================================== */
3684/* python module interface */
3685
3686static PyMethodDef _functions[] = {
Eli Benderskya8736902013-01-05 06:26:39 -08003687 {"SubElement", (PyCFunction) subelement, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003688 {NULL, NULL}
3689};
3690
Martin v. Löwis1a214512008-06-11 05:26:20 +00003691
Eli Bendersky532d03e2013-08-10 08:00:39 -07003692static struct PyModuleDef elementtreemodule = {
3693 PyModuleDef_HEAD_INIT,
3694 "_elementtree",
3695 NULL,
3696 sizeof(elementtreestate),
3697 _functions,
3698 NULL,
3699 elementtree_traverse,
3700 elementtree_clear,
3701 elementtree_free
Martin v. Löwis1a214512008-06-11 05:26:20 +00003702};
3703
Neal Norwitzf6657e62006-12-28 04:47:50 +00003704PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00003705PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003706{
Eli Bendersky64d11e62012-06-15 07:42:50 +03003707 PyObject *m, *temp;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003708 elementtreestate *st;
3709
3710 m = PyState_FindModule(&elementtreemodule);
3711 if (m) {
3712 Py_INCREF(m);
3713 return m;
3714 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003715
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003716 /* Initialize object types */
Ronald Oussoren138d0802013-07-19 11:11:25 +02003717 if (PyType_Ready(&ElementIter_Type) < 0)
3718 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003719 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003720 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003721 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003722 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003723 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003724 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003725
Eli Bendersky532d03e2013-08-10 08:00:39 -07003726 m = PyModule_Create(&elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003727 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00003728 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003729 st = ET_STATE(m);
Martin v. Löwis1a214512008-06-11 05:26:20 +00003730
Eli Bendersky828efde2012-04-05 05:40:58 +03003731 if (!(temp = PyImport_ImportModule("copy")))
3732 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003733 st->deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
Eli Bendersky828efde2012-04-05 05:40:58 +03003734 Py_XDECREF(temp);
3735
Eli Bendersky532d03e2013-08-10 08:00:39 -07003736 if (!(st->elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
Eli Bendersky828efde2012-04-05 05:40:58 +03003737 return NULL;
3738
Eli Bendersky20d41742012-06-01 09:48:37 +03003739 /* link against pyexpat */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003740 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
3741 if (expat_capi) {
3742 /* check that it's usable */
3743 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
3744 expat_capi->size < sizeof(struct PyExpat_CAPI) ||
3745 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
3746 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
Eli Bendersky52467b12012-06-01 07:13:08 +03003747 expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
Eli Benderskyef391ac2012-07-21 20:28:46 +03003748 PyErr_SetString(PyExc_ImportError,
3749 "pyexpat version is incompatible");
3750 return NULL;
Eli Bendersky52467b12012-06-01 07:13:08 +03003751 }
Eli Benderskyef391ac2012-07-21 20:28:46 +03003752 } else {
Eli Bendersky52467b12012-06-01 07:13:08 +03003753 return NULL;
Eli Benderskyef391ac2012-07-21 20:28:46 +03003754 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003755
Eli Bendersky532d03e2013-08-10 08:00:39 -07003756 st->parseerror_obj = PyErr_NewException(
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003757 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003758 );
Eli Bendersky532d03e2013-08-10 08:00:39 -07003759 Py_INCREF(st->parseerror_obj);
3760 PyModule_AddObject(m, "ParseError", st->parseerror_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003761
Eli Bendersky092af1f2012-03-04 07:14:03 +02003762 Py_INCREF((PyObject *)&Element_Type);
3763 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
3764
Eli Bendersky58d548d2012-05-29 15:45:16 +03003765 Py_INCREF((PyObject *)&TreeBuilder_Type);
3766 PyModule_AddObject(m, "TreeBuilder", (PyObject *)&TreeBuilder_Type);
3767
Eli Bendersky52467b12012-06-01 07:13:08 +03003768 Py_INCREF((PyObject *)&XMLParser_Type);
3769 PyModule_AddObject(m, "XMLParser", (PyObject *)&XMLParser_Type);
Eli Bendersky52467b12012-06-01 07:13:08 +03003770
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003771 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003772}