blob: 46a1f4157cc7267fcc06cb6a5e8f46169d791473 [file] [log] [blame]
Eli Benderskybf05df22013-04-20 05:44:01 -07001/*--------------------------------------------------------------------
2 * Licensed to PSF under a Contributor Agreement.
3 * See http://www.python.org/psf/license for licensing details.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
Eli Benderskybf05df22013-04-20 05:44:01 -07005 * _elementtree - C accelerator for xml.etree.ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00006 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
7 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00008 *
9 * info@pythonware.com
10 * http://www.pythonware.com
Eli Benderskybf05df22013-04-20 05:44:01 -070011 *--------------------------------------------------------------------
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000012 */
13
14#include "Python.h"
Eli Benderskyebf37a22012-04-03 22:02:37 +030015#include "structmember.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000016
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000017/* -------------------------------------------------------------------- */
18/* configuration */
19
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000020/* An element can hold this many children without extra memory
21 allocations. */
22#define STATIC_CHILDREN 4
23
24/* For best performance, chose a value so that 80-90% of all nodes
25 have no more than the given number of children. Set this to zero
26 to minimize the size of the element structure itself (this only
27 helps if you have lots of leaf nodes with attributes). */
28
29/* Also note that pymalloc always allocates blocks in multiples of
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010030 eight bytes. For the current C version of ElementTree, this means
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000031 that the number of children should be an even number, at least on
32 32-bit platforms. */
33
34/* -------------------------------------------------------------------- */
35
36#if 0
37static int memory = 0;
38#define ALLOC(size, comment)\
39do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
40#define RELEASE(size, comment)\
41do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
42#else
43#define ALLOC(size, comment)
44#define RELEASE(size, comment)
45#endif
46
47/* compiler tweaks */
48#if defined(_MSC_VER)
49#define LOCAL(type) static __inline type __fastcall
50#else
51#define LOCAL(type) static type
52#endif
53
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000054/* macros used to store 'join' flags in string object pointers. note
55 that all use of text and tail as object pointers must be wrapped in
56 JOIN_OBJ. see comments in the ElementObject definition for more
57 info. */
58#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
59#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
Antoine Pitrouca8aa4a2012-09-20 20:56:47 +020060#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~(Py_uintptr_t)1))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000061
Eli Benderskydd3661e2013-09-13 06:24:25 -070062/* Py_CLEAR for a PyObject* that uses a join flag. Pass the pointer by
63 * reference since this function sets it to NULL.
64*/
doko@ubuntu.com0648bf72013-09-18 12:12:28 +020065static void _clear_joined_ptr(PyObject **p)
Eli Benderskydd3661e2013-09-13 06:24:25 -070066{
67 if (*p) {
68 PyObject *tmp = JOIN_OBJ(*p);
69 *p = NULL;
70 Py_DECREF(tmp);
71 }
72}
73
Ronald Oussoren138d0802013-07-19 11:11:25 +020074/* Types defined by this extension */
75static PyTypeObject Element_Type;
76static PyTypeObject ElementIter_Type;
77static PyTypeObject TreeBuilder_Type;
78static PyTypeObject XMLParser_Type;
79
80
Eli Bendersky532d03e2013-08-10 08:00:39 -070081/* Per-module state; PEP 3121 */
82typedef struct {
83 PyObject *parseerror_obj;
84 PyObject *deepcopy_obj;
85 PyObject *elementpath_obj;
86} elementtreestate;
87
88static struct PyModuleDef elementtreemodule;
89
90/* Given a module object (assumed to be _elementtree), get its per-module
91 * state.
92 */
93#define ET_STATE(mod) ((elementtreestate *) PyModule_GetState(mod))
94
95/* Find the module instance imported in the currently running sub-interpreter
96 * and get its state.
97 */
98#define ET_STATE_GLOBAL \
99 ((elementtreestate *) PyModule_GetState(PyState_FindModule(&elementtreemodule)))
100
101static int
102elementtree_clear(PyObject *m)
103{
104 elementtreestate *st = ET_STATE(m);
105 Py_CLEAR(st->parseerror_obj);
106 Py_CLEAR(st->deepcopy_obj);
107 Py_CLEAR(st->elementpath_obj);
108 return 0;
109}
110
111static int
112elementtree_traverse(PyObject *m, visitproc visit, void *arg)
113{
114 elementtreestate *st = ET_STATE(m);
115 Py_VISIT(st->parseerror_obj);
116 Py_VISIT(st->deepcopy_obj);
117 Py_VISIT(st->elementpath_obj);
118 return 0;
119}
120
121static void
122elementtree_free(void *m)
123{
124 elementtree_clear((PyObject *)m);
125}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000126
127/* helpers */
128
129LOCAL(PyObject*)
130deepcopy(PyObject* object, PyObject* memo)
131{
132 /* do a deep copy of the given object */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000133 PyObject* args;
134 PyObject* result;
Eli Bendersky532d03e2013-08-10 08:00:39 -0700135 elementtreestate *st = ET_STATE_GLOBAL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000136
Eli Bendersky532d03e2013-08-10 08:00:39 -0700137 if (!st->deepcopy_obj) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000138 PyErr_SetString(
139 PyExc_RuntimeError,
140 "deepcopy helper not found"
141 );
142 return NULL;
143 }
144
Antoine Pitrouc1948842012-10-01 23:40:37 +0200145 args = PyTuple_Pack(2, object, memo);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000146 if (!args)
147 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -0700148 result = PyObject_CallObject(st->deepcopy_obj, args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000149 Py_DECREF(args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000150 return result;
151}
152
153LOCAL(PyObject*)
154list_join(PyObject* list)
155{
156 /* join list elements (destroying the list in the process) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000157 PyObject* joiner;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000158 PyObject* result;
159
Antoine Pitrouc1948842012-10-01 23:40:37 +0200160 joiner = PyUnicode_FromStringAndSize("", 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000161 if (!joiner)
162 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200163 result = PyUnicode_Join(joiner, list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000164 Py_DECREF(joiner);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200165 if (result)
166 Py_DECREF(list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000167 return result;
168}
169
Eli Bendersky48d358b2012-05-30 17:57:50 +0300170/* Is the given object an empty dictionary?
171*/
172static int
173is_empty_dict(PyObject *obj)
174{
175 return PyDict_CheckExact(obj) && PyDict_Size(obj) == 0;
176}
177
178
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000179/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200180/* the Element type */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000181
182typedef struct {
183
184 /* attributes (a dictionary object), or None if no attributes */
185 PyObject* attrib;
186
187 /* child elements */
188 int length; /* actual number of items */
189 int allocated; /* allocated items */
190
191 /* this either points to _children or to a malloced buffer */
192 PyObject* *children;
193
194 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100195
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000196} ElementObjectExtra;
197
198typedef struct {
199 PyObject_HEAD
200
201 /* element tag (a string). */
202 PyObject* tag;
203
204 /* text before first child. note that this is a tagged pointer;
205 use JOIN_OBJ to get the object pointer. the join flag is used
206 to distinguish lists created by the tree builder from lists
207 assigned to the attribute by application code; the former
208 should be joined before being returned to the user, the latter
209 should be left intact. */
210 PyObject* text;
211
212 /* text after this element, in parent. note that this is a tagged
213 pointer; use JOIN_OBJ to get the object pointer. */
214 PyObject* tail;
215
216 ElementObjectExtra* extra;
217
Eli Benderskyebf37a22012-04-03 22:02:37 +0300218 PyObject *weakreflist; /* For tp_weaklistoffset */
219
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000220} ElementObject;
221
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000222
Christian Heimes90aa7642007-12-19 02:45:37 +0000223#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000224
225/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200226/* Element constructors and destructor */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000227
228LOCAL(int)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200229create_extra(ElementObject* self, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000230{
231 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
Victor Stinner81aac732013-07-12 02:03:34 +0200232 if (!self->extra) {
233 PyErr_NoMemory();
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000234 return -1;
Victor Stinner81aac732013-07-12 02:03:34 +0200235 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000236
237 if (!attrib)
238 attrib = Py_None;
239
240 Py_INCREF(attrib);
241 self->extra->attrib = attrib;
242
243 self->extra->length = 0;
244 self->extra->allocated = STATIC_CHILDREN;
245 self->extra->children = self->extra->_children;
246
247 return 0;
248}
249
250LOCAL(void)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200251dealloc_extra(ElementObject* self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000252{
Eli Bendersky08b85292012-04-04 15:55:07 +0300253 ElementObjectExtra *myextra;
254 int i;
255
Eli Benderskyebf37a22012-04-03 22:02:37 +0300256 if (!self->extra)
257 return;
258
259 /* Avoid DECREFs calling into this code again (cycles, etc.)
260 */
Eli Bendersky08b85292012-04-04 15:55:07 +0300261 myextra = self->extra;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300262 self->extra = NULL;
263
264 Py_DECREF(myextra->attrib);
265
Eli Benderskyebf37a22012-04-03 22:02:37 +0300266 for (i = 0; i < myextra->length; i++)
267 Py_DECREF(myextra->children[i]);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000268
Eli Benderskyebf37a22012-04-03 22:02:37 +0300269 if (myextra->children != myextra->_children)
270 PyObject_Free(myextra->children);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000271
Eli Benderskyebf37a22012-04-03 22:02:37 +0300272 PyObject_Free(myextra);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000273}
274
Eli Bendersky092af1f2012-03-04 07:14:03 +0200275/* Convenience internal function to create new Element objects with the given
276 * tag and attributes.
277*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000278LOCAL(PyObject*)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200279create_new_element(PyObject* tag, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000280{
281 ElementObject* self;
282
Eli Bendersky0192ba32012-03-30 16:38:33 +0300283 self = PyObject_GC_New(ElementObject, &Element_Type);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000284 if (self == NULL)
285 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000286 self->extra = NULL;
287
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000288 Py_INCREF(tag);
289 self->tag = tag;
290
291 Py_INCREF(Py_None);
292 self->text = Py_None;
293
294 Py_INCREF(Py_None);
295 self->tail = Py_None;
296
Eli Benderskyebf37a22012-04-03 22:02:37 +0300297 self->weakreflist = NULL;
298
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200299 ALLOC(sizeof(ElementObject), "create element");
300 PyObject_GC_Track(self);
301
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200302 if (attrib != Py_None && !is_empty_dict(attrib)) {
303 if (create_extra(self, attrib) < 0) {
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200304 Py_DECREF(self);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200305 return NULL;
306 }
307 }
308
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000309 return (PyObject*) self;
310}
311
Eli Bendersky092af1f2012-03-04 07:14:03 +0200312static PyObject *
313element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
314{
315 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
316 if (e != NULL) {
317 Py_INCREF(Py_None);
318 e->tag = Py_None;
319
320 Py_INCREF(Py_None);
321 e->text = Py_None;
322
323 Py_INCREF(Py_None);
324 e->tail = Py_None;
325
326 e->extra = NULL;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300327 e->weakreflist = NULL;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200328 }
329 return (PyObject *)e;
330}
331
Eli Bendersky737b1732012-05-29 06:02:56 +0300332/* Helper function for extracting the attrib dictionary from a keywords dict.
333 * This is required by some constructors/functions in this module that can
Eli Bendersky45839902013-01-13 05:14:47 -0800334 * either accept attrib as a keyword argument or all attributes splashed
Eli Bendersky737b1732012-05-29 06:02:56 +0300335 * directly into *kwds.
Eli Benderskyd4cb4b72013-04-22 05:25:25 -0700336 *
337 * Return a dictionary with the content of kwds merged into the content of
338 * attrib. If there is no attrib keyword, return a copy of kwds.
Eli Bendersky737b1732012-05-29 06:02:56 +0300339 */
340static PyObject*
341get_attrib_from_keywords(PyObject *kwds)
342{
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700343 PyObject *attrib_str = PyUnicode_FromString("attrib");
344 PyObject *attrib = PyDict_GetItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300345
346 if (attrib) {
347 /* If attrib was found in kwds, copy its value and remove it from
348 * kwds
349 */
350 if (!PyDict_Check(attrib)) {
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700351 Py_DECREF(attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300352 PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
353 Py_TYPE(attrib)->tp_name);
354 return NULL;
355 }
356 attrib = PyDict_Copy(attrib);
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700357 PyDict_DelItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300358 } else {
359 attrib = PyDict_New();
360 }
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700361
362 Py_DECREF(attrib_str);
363
364 /* attrib can be NULL if PyDict_New failed */
365 if (attrib)
Christian Heimes7ed42942013-07-20 15:12:09 +0200366 if (PyDict_Update(attrib, kwds) < 0)
367 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300368 return attrib;
369}
370
Eli Bendersky092af1f2012-03-04 07:14:03 +0200371static int
372element_init(PyObject *self, PyObject *args, PyObject *kwds)
373{
374 PyObject *tag;
375 PyObject *tmp;
376 PyObject *attrib = NULL;
377 ElementObject *self_elem;
378
379 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
380 return -1;
381
Eli Bendersky737b1732012-05-29 06:02:56 +0300382 if (attrib) {
383 /* attrib passed as positional arg */
384 attrib = PyDict_Copy(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200385 if (!attrib)
386 return -1;
Eli Bendersky737b1732012-05-29 06:02:56 +0300387 if (kwds) {
388 if (PyDict_Update(attrib, kwds) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200389 Py_DECREF(attrib);
Eli Bendersky737b1732012-05-29 06:02:56 +0300390 return -1;
391 }
392 }
393 } else if (kwds) {
394 /* have keywords args */
395 attrib = get_attrib_from_keywords(kwds);
396 if (!attrib)
397 return -1;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200398 }
399
400 self_elem = (ElementObject *)self;
401
Antoine Pitrouc1948842012-10-01 23:40:37 +0200402 if (attrib != NULL && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200403 if (create_extra(self_elem, attrib) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200404 Py_DECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200405 return -1;
406 }
407 }
408
Eli Bendersky48d358b2012-05-30 17:57:50 +0300409 /* We own a reference to attrib here and it's no longer needed. */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200410 Py_XDECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200411
412 /* Replace the objects already pointed to by tag, text and tail. */
413 tmp = self_elem->tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200414 Py_INCREF(tag);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200415 self_elem->tag = tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200416 Py_DECREF(tmp);
417
418 tmp = self_elem->text;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200419 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200420 self_elem->text = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200421 Py_DECREF(JOIN_OBJ(tmp));
422
423 tmp = self_elem->tail;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200424 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200425 self_elem->tail = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200426 Py_DECREF(JOIN_OBJ(tmp));
427
428 return 0;
429}
430
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000431LOCAL(int)
432element_resize(ElementObject* self, int extra)
433{
434 int size;
435 PyObject* *children;
436
437 /* make sure self->children can hold the given number of extra
438 elements. set an exception and return -1 if allocation failed */
439
Victor Stinner5f0af232013-07-11 23:01:36 +0200440 if (!self->extra) {
441 if (create_extra(self, NULL) < 0)
442 return -1;
443 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000444
445 size = self->extra->length + extra;
446
447 if (size > self->extra->allocated) {
448 /* use Python 2.4's list growth strategy */
449 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000450 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100451 * which needs at least 4 bytes.
452 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000453 * be safe.
454 */
455 size = size ? size : 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000456 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000457 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100458 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000459 * false alarm always assume at least one child to be safe.
460 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000461 children = PyObject_Realloc(self->extra->children,
462 size * sizeof(PyObject*));
463 if (!children)
464 goto nomemory;
465 } else {
466 children = PyObject_Malloc(size * sizeof(PyObject*));
467 if (!children)
468 goto nomemory;
469 /* copy existing children from static area to malloc buffer */
470 memcpy(children, self->extra->children,
471 self->extra->length * sizeof(PyObject*));
472 }
473 self->extra->children = children;
474 self->extra->allocated = size;
475 }
476
477 return 0;
478
479 nomemory:
480 PyErr_NoMemory();
481 return -1;
482}
483
484LOCAL(int)
485element_add_subelement(ElementObject* self, PyObject* element)
486{
487 /* add a child element to a parent */
488
489 if (element_resize(self, 1) < 0)
490 return -1;
491
492 Py_INCREF(element);
493 self->extra->children[self->extra->length] = element;
494
495 self->extra->length++;
496
497 return 0;
498}
499
500LOCAL(PyObject*)
501element_get_attrib(ElementObject* self)
502{
503 /* return borrowed reference to attrib dictionary */
504 /* note: this function assumes that the extra section exists */
505
506 PyObject* res = self->extra->attrib;
507
508 if (res == Py_None) {
509 /* create missing dictionary */
510 res = PyDict_New();
511 if (!res)
512 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200513 Py_DECREF(Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000514 self->extra->attrib = res;
515 }
516
517 return res;
518}
519
520LOCAL(PyObject*)
521element_get_text(ElementObject* self)
522{
523 /* return borrowed reference to text attribute */
524
525 PyObject* res = self->text;
526
527 if (JOIN_GET(res)) {
528 res = JOIN_OBJ(res);
529 if (PyList_CheckExact(res)) {
530 res = list_join(res);
531 if (!res)
532 return NULL;
533 self->text = res;
534 }
535 }
536
537 return res;
538}
539
540LOCAL(PyObject*)
541element_get_tail(ElementObject* self)
542{
543 /* return borrowed reference to text attribute */
544
545 PyObject* res = self->tail;
546
547 if (JOIN_GET(res)) {
548 res = JOIN_OBJ(res);
549 if (PyList_CheckExact(res)) {
550 res = list_join(res);
551 if (!res)
552 return NULL;
553 self->tail = res;
554 }
555 }
556
557 return res;
558}
559
560static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300561subelement(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000562{
563 PyObject* elem;
564
565 ElementObject* parent;
566 PyObject* tag;
567 PyObject* attrib = NULL;
568 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
569 &Element_Type, &parent, &tag,
Eli Bendersky163d7f02013-11-24 06:55:04 -0800570 &PyDict_Type, &attrib)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000571 return NULL;
Eli Bendersky163d7f02013-11-24 06:55:04 -0800572 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000573
Eli Bendersky737b1732012-05-29 06:02:56 +0300574 if (attrib) {
575 /* attrib passed as positional arg */
576 attrib = PyDict_Copy(attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000577 if (!attrib)
578 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300579 if (kwds) {
580 if (PyDict_Update(attrib, kwds) < 0) {
581 return NULL;
582 }
583 }
584 } else if (kwds) {
585 /* have keyword args */
586 attrib = get_attrib_from_keywords(kwds);
587 if (!attrib)
588 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000589 } else {
Eli Bendersky737b1732012-05-29 06:02:56 +0300590 /* no attrib arg, no kwds, so no attribute */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000591 Py_INCREF(Py_None);
592 attrib = Py_None;
593 }
594
Eli Bendersky092af1f2012-03-04 07:14:03 +0200595 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000596 Py_DECREF(attrib);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200597 if (elem == NULL)
598 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000599
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000600 if (element_add_subelement(parent, elem) < 0) {
601 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000602 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000603 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000604
605 return elem;
606}
607
Eli Bendersky0192ba32012-03-30 16:38:33 +0300608static int
609element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
610{
611 Py_VISIT(self->tag);
612 Py_VISIT(JOIN_OBJ(self->text));
613 Py_VISIT(JOIN_OBJ(self->tail));
614
615 if (self->extra) {
616 int i;
617 Py_VISIT(self->extra->attrib);
618
619 for (i = 0; i < self->extra->length; ++i)
620 Py_VISIT(self->extra->children[i]);
621 }
622 return 0;
623}
624
625static int
626element_gc_clear(ElementObject *self)
627{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300628 Py_CLEAR(self->tag);
Eli Benderskydd3661e2013-09-13 06:24:25 -0700629 _clear_joined_ptr(&self->text);
630 _clear_joined_ptr(&self->tail);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300631
632 /* After dropping all references from extra, it's no longer valid anyway,
Eli Benderskyebf37a22012-04-03 22:02:37 +0300633 * so fully deallocate it.
Eli Bendersky0192ba32012-03-30 16:38:33 +0300634 */
Eli Benderskyebf37a22012-04-03 22:02:37 +0300635 dealloc_extra(self);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300636 return 0;
637}
638
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000639static void
640element_dealloc(ElementObject* self)
641{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300642 PyObject_GC_UnTrack(self);
Eli Benderskyebf37a22012-04-03 22:02:37 +0300643
644 if (self->weakreflist != NULL)
645 PyObject_ClearWeakRefs((PyObject *) self);
646
Eli Bendersky0192ba32012-03-30 16:38:33 +0300647 /* element_gc_clear clears all references and deallocates extra
648 */
649 element_gc_clear(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000650
651 RELEASE(sizeof(ElementObject), "destroy element");
Eli Bendersky092af1f2012-03-04 07:14:03 +0200652 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000653}
654
655/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000656
657static PyObject*
658element_append(ElementObject* self, PyObject* args)
659{
660 PyObject* element;
661 if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element))
662 return NULL;
663
664 if (element_add_subelement(self, element) < 0)
665 return NULL;
666
667 Py_RETURN_NONE;
668}
669
670static PyObject*
Eli Bendersky0192ba32012-03-30 16:38:33 +0300671element_clearmethod(ElementObject* self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000672{
673 if (!PyArg_ParseTuple(args, ":clear"))
674 return NULL;
675
Eli Benderskyebf37a22012-04-03 22:02:37 +0300676 dealloc_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000677
678 Py_INCREF(Py_None);
679 Py_DECREF(JOIN_OBJ(self->text));
680 self->text = Py_None;
681
682 Py_INCREF(Py_None);
683 Py_DECREF(JOIN_OBJ(self->tail));
684 self->tail = Py_None;
685
686 Py_RETURN_NONE;
687}
688
689static PyObject*
690element_copy(ElementObject* self, PyObject* args)
691{
692 int i;
693 ElementObject* element;
694
695 if (!PyArg_ParseTuple(args, ":__copy__"))
696 return NULL;
697
Eli Bendersky092af1f2012-03-04 07:14:03 +0200698 element = (ElementObject*) create_new_element(
Eli Bendersky163d7f02013-11-24 06:55:04 -0800699 self->tag, (self->extra) ? self->extra->attrib : Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000700 if (!element)
701 return NULL;
702
703 Py_DECREF(JOIN_OBJ(element->text));
704 element->text = self->text;
705 Py_INCREF(JOIN_OBJ(element->text));
706
707 Py_DECREF(JOIN_OBJ(element->tail));
708 element->tail = self->tail;
709 Py_INCREF(JOIN_OBJ(element->tail));
710
711 if (self->extra) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000712 if (element_resize(element, self->extra->length) < 0) {
713 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000714 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000715 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000716
717 for (i = 0; i < self->extra->length; i++) {
718 Py_INCREF(self->extra->children[i]);
719 element->extra->children[i] = self->extra->children[i];
720 }
721
722 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000723 }
724
725 return (PyObject*) element;
726}
727
728static PyObject*
729element_deepcopy(ElementObject* self, PyObject* args)
730{
731 int i;
732 ElementObject* element;
733 PyObject* tag;
734 PyObject* attrib;
735 PyObject* text;
736 PyObject* tail;
737 PyObject* id;
738
739 PyObject* memo;
740 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
741 return NULL;
742
743 tag = deepcopy(self->tag, memo);
744 if (!tag)
745 return NULL;
746
747 if (self->extra) {
748 attrib = deepcopy(self->extra->attrib, memo);
749 if (!attrib) {
750 Py_DECREF(tag);
751 return NULL;
752 }
753 } else {
754 Py_INCREF(Py_None);
755 attrib = Py_None;
756 }
757
Eli Bendersky092af1f2012-03-04 07:14:03 +0200758 element = (ElementObject*) create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000759
760 Py_DECREF(tag);
761 Py_DECREF(attrib);
762
763 if (!element)
764 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100765
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000766 text = deepcopy(JOIN_OBJ(self->text), memo);
767 if (!text)
768 goto error;
769 Py_DECREF(element->text);
770 element->text = JOIN_SET(text, JOIN_GET(self->text));
771
772 tail = deepcopy(JOIN_OBJ(self->tail), memo);
773 if (!tail)
774 goto error;
775 Py_DECREF(element->tail);
776 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
777
778 if (self->extra) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000779 if (element_resize(element, self->extra->length) < 0)
780 goto error;
781
782 for (i = 0; i < self->extra->length; i++) {
783 PyObject* child = deepcopy(self->extra->children[i], memo);
784 if (!child) {
785 element->extra->length = i;
786 goto error;
787 }
788 element->extra->children[i] = child;
789 }
790
791 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000792 }
793
794 /* add object to memo dictionary (so deepcopy won't visit it again) */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200795 id = PyLong_FromSsize_t((Py_uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000796 if (!id)
797 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000798
799 i = PyDict_SetItem(memo, id, (PyObject*) element);
800
801 Py_DECREF(id);
802
803 if (i < 0)
804 goto error;
805
806 return (PyObject*) element;
807
808 error:
809 Py_DECREF(element);
810 return NULL;
811}
812
Martin v. Löwisbce16662012-06-17 10:41:22 +0200813static PyObject*
Antoine Pitrou09fcb722013-10-23 19:20:21 +0200814element_sizeof(PyObject* myself, PyObject* args)
Martin v. Löwisbce16662012-06-17 10:41:22 +0200815{
Antoine Pitrou09fcb722013-10-23 19:20:21 +0200816 ElementObject *self = (ElementObject*)myself;
Martin v. Löwisbce16662012-06-17 10:41:22 +0200817 Py_ssize_t result = sizeof(ElementObject);
818 if (self->extra) {
819 result += sizeof(ElementObjectExtra);
820 if (self->extra->children != self->extra->_children)
821 result += sizeof(PyObject*) * self->extra->allocated;
822 }
823 return PyLong_FromSsize_t(result);
824}
825
Eli Bendersky698bdb22013-01-10 06:01:06 -0800826/* dict keys for getstate/setstate. */
827#define PICKLED_TAG "tag"
828#define PICKLED_CHILDREN "_children"
829#define PICKLED_ATTRIB "attrib"
830#define PICKLED_TAIL "tail"
831#define PICKLED_TEXT "text"
832
833/* __getstate__ returns a fabricated instance dict as in the pure-Python
834 * Element implementation, for interoperability/interchangeability. This
835 * makes the pure-Python implementation details an API, but (a) there aren't
836 * any unnecessary structures there; and (b) it buys compatibility with 3.2
837 * pickles. See issue #16076.
838 */
839static PyObject *
840element_getstate(ElementObject *self)
841{
842 int i, noattrib;
843 PyObject *instancedict = NULL, *children;
844
845 /* Build a list of children. */
846 children = PyList_New(self->extra ? self->extra->length : 0);
847 if (!children)
848 return NULL;
849 for (i = 0; i < PyList_GET_SIZE(children); i++) {
850 PyObject *child = self->extra->children[i];
851 Py_INCREF(child);
852 PyList_SET_ITEM(children, i, child);
853 }
854
855 /* Construct the state object. */
856 noattrib = (self->extra == NULL || self->extra->attrib == Py_None);
857 if (noattrib)
858 instancedict = Py_BuildValue("{sOsOs{}sOsO}",
859 PICKLED_TAG, self->tag,
860 PICKLED_CHILDREN, children,
861 PICKLED_ATTRIB,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700862 PICKLED_TEXT, JOIN_OBJ(self->text),
863 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800864 else
865 instancedict = Py_BuildValue("{sOsOsOsOsO}",
866 PICKLED_TAG, self->tag,
867 PICKLED_CHILDREN, children,
868 PICKLED_ATTRIB, self->extra->attrib,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700869 PICKLED_TEXT, JOIN_OBJ(self->text),
870 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800871 if (instancedict) {
872 Py_DECREF(children);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800873 return instancedict;
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800874 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800875 else {
876 for (i = 0; i < PyList_GET_SIZE(children); i++)
877 Py_DECREF(PyList_GET_ITEM(children, i));
878 Py_DECREF(children);
879
880 return NULL;
881 }
882}
883
884static PyObject *
885element_setstate_from_attributes(ElementObject *self,
886 PyObject *tag,
887 PyObject *attrib,
888 PyObject *text,
889 PyObject *tail,
890 PyObject *children)
891{
892 Py_ssize_t i, nchildren;
893
894 if (!tag) {
895 PyErr_SetString(PyExc_TypeError, "tag may not be NULL");
896 return NULL;
897 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800898
899 Py_CLEAR(self->tag);
900 self->tag = tag;
901 Py_INCREF(self->tag);
902
Eli Benderskydd3661e2013-09-13 06:24:25 -0700903 _clear_joined_ptr(&self->text);
904 self->text = text ? JOIN_SET(text, PyList_CheckExact(text)) : Py_None;
905 Py_INCREF(JOIN_OBJ(self->text));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800906
Eli Benderskydd3661e2013-09-13 06:24:25 -0700907 _clear_joined_ptr(&self->tail);
908 self->tail = tail ? JOIN_SET(tail, PyList_CheckExact(tail)) : Py_None;
909 Py_INCREF(JOIN_OBJ(self->tail));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800910
911 /* Handle ATTRIB and CHILDREN. */
912 if (!children && !attrib)
913 Py_RETURN_NONE;
914
915 /* Compute 'nchildren'. */
916 if (children) {
917 if (!PyList_Check(children)) {
918 PyErr_SetString(PyExc_TypeError, "'_children' is not a list");
919 return NULL;
920 }
921 nchildren = PyList_Size(children);
922 }
923 else {
924 nchildren = 0;
925 }
926
927 /* Allocate 'extra'. */
928 if (element_resize(self, nchildren)) {
929 return NULL;
930 }
931 assert(self->extra && self->extra->allocated >= nchildren);
932
933 /* Copy children */
934 for (i = 0; i < nchildren; i++) {
935 self->extra->children[i] = PyList_GET_ITEM(children, i);
936 Py_INCREF(self->extra->children[i]);
937 }
938
939 self->extra->length = nchildren;
940 self->extra->allocated = nchildren;
941
942 /* Stash attrib. */
943 if (attrib) {
944 Py_CLEAR(self->extra->attrib);
945 self->extra->attrib = attrib;
946 Py_INCREF(attrib);
947 }
948
949 Py_RETURN_NONE;
950}
951
952/* __setstate__ for Element instance from the Python implementation.
953 * 'state' should be the instance dict.
954 */
955static PyObject *
956element_setstate_from_Python(ElementObject *self, PyObject *state)
957{
958 static char *kwlist[] = {PICKLED_TAG, PICKLED_ATTRIB, PICKLED_TEXT,
959 PICKLED_TAIL, PICKLED_CHILDREN, 0};
960 PyObject *args;
961 PyObject *tag, *attrib, *text, *tail, *children;
Eli Bendersky799e3ed2013-01-12 05:42:38 -0800962 PyObject *retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800963
Eli Bendersky698bdb22013-01-10 06:01:06 -0800964 tag = attrib = text = tail = children = NULL;
965 args = PyTuple_New(0);
Eli Bendersky799e3ed2013-01-12 05:42:38 -0800966 if (!args)
Eli Bendersky698bdb22013-01-10 06:01:06 -0800967 return NULL;
Eli Bendersky799e3ed2013-01-12 05:42:38 -0800968
969 if (PyArg_ParseTupleAndKeywords(args, state, "|$OOOOO", kwlist, &tag,
970 &attrib, &text, &tail, &children))
971 retval = element_setstate_from_attributes(self, tag, attrib, text,
972 tail, children);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800973 else
Eli Bendersky799e3ed2013-01-12 05:42:38 -0800974 retval = NULL;
975
976 Py_DECREF(args);
977 return retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800978}
979
980static PyObject *
981element_setstate(ElementObject *self, PyObject *state)
982{
983 if (!PyDict_CheckExact(state)) {
984 PyErr_Format(PyExc_TypeError,
985 "Don't know how to unpickle \"%.200R\" as an Element",
986 state);
987 return NULL;
988 }
989 else
990 return element_setstate_from_Python(self, state);
991}
992
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000993LOCAL(int)
994checkpath(PyObject* tag)
995{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000996 Py_ssize_t i;
997 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000998
999 /* check if a tag contains an xpath character */
1000
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001001#define PATHCHAR(ch) \
1002 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001003
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001004 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001005 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
1006 void *data = PyUnicode_DATA(tag);
1007 unsigned int kind = PyUnicode_KIND(tag);
1008 for (i = 0; i < len; i++) {
1009 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1010 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001011 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001012 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001013 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001014 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001015 return 1;
1016 }
1017 return 0;
1018 }
Christian Heimes72b710a2008-05-26 13:28:38 +00001019 if (PyBytes_Check(tag)) {
1020 char *p = PyBytes_AS_STRING(tag);
1021 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001022 if (p[i] == '{')
1023 check = 0;
1024 else if (p[i] == '}')
1025 check = 1;
1026 else if (check && PATHCHAR(p[i]))
1027 return 1;
1028 }
1029 return 0;
1030 }
1031
1032 return 1; /* unknown type; might be path expression */
1033}
1034
1035static PyObject*
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001036element_extend(ElementObject* self, PyObject* args)
1037{
1038 PyObject* seq;
1039 Py_ssize_t i, seqlen = 0;
1040
1041 PyObject* seq_in;
1042 if (!PyArg_ParseTuple(args, "O:extend", &seq_in))
1043 return NULL;
1044
1045 seq = PySequence_Fast(seq_in, "");
1046 if (!seq) {
1047 PyErr_Format(
1048 PyExc_TypeError,
1049 "expected sequence, not \"%.200s\"", Py_TYPE(seq_in)->tp_name
1050 );
1051 return NULL;
1052 }
1053
1054 seqlen = PySequence_Size(seq);
1055 for (i = 0; i < seqlen; i++) {
1056 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001057 if (!PyObject_IsInstance(element, (PyObject *)&Element_Type)) {
1058 Py_DECREF(seq);
1059 PyErr_Format(
1060 PyExc_TypeError,
1061 "expected an Element, not \"%.200s\"",
1062 Py_TYPE(element)->tp_name);
1063 return NULL;
1064 }
1065
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001066 if (element_add_subelement(self, element) < 0) {
1067 Py_DECREF(seq);
1068 return NULL;
1069 }
1070 }
1071
1072 Py_DECREF(seq);
1073
1074 Py_RETURN_NONE;
1075}
1076
1077static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001078element_find(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001079{
1080 int i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001081 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001082 PyObject* namespaces = Py_None;
Eli Bendersky737b1732012-05-29 06:02:56 +03001083 static char *kwlist[] = {"path", "namespaces", 0};
Eli Bendersky532d03e2013-08-10 08:00:39 -07001084 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001085
Eli Bendersky737b1732012-05-29 06:02:56 +03001086 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:find", kwlist,
1087 &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001088 return NULL;
1089
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001090 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001091 _Py_IDENTIFIER(find);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001092 return _PyObject_CallMethodId(
Eli Bendersky532d03e2013-08-10 08:00:39 -07001093 st->elementpath_obj, &PyId_find, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001094 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001095 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001096
1097 if (!self->extra)
1098 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001099
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001100 for (i = 0; i < self->extra->length; i++) {
1101 PyObject* item = self->extra->children[i];
1102 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +00001103 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001104 Py_INCREF(item);
1105 return item;
1106 }
1107 }
1108
1109 Py_RETURN_NONE;
1110}
1111
1112static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001113element_findtext(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001114{
1115 int i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001116 PyObject* tag;
1117 PyObject* default_value = Py_None;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001118 PyObject* namespaces = Py_None;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001119 _Py_IDENTIFIER(findtext);
Eli Bendersky737b1732012-05-29 06:02:56 +03001120 static char *kwlist[] = {"path", "default", "namespaces", 0};
Eli Bendersky532d03e2013-08-10 08:00:39 -07001121 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001122
Eli Bendersky737b1732012-05-29 06:02:56 +03001123 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO:findtext", kwlist,
1124 &tag, &default_value, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001125 return NULL;
1126
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001127 if (checkpath(tag) || namespaces != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001128 return _PyObject_CallMethodId(
Eli Bendersky532d03e2013-08-10 08:00:39 -07001129 st->elementpath_obj, &PyId_findtext, "OOOO", self, tag, default_value, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001130 );
1131
1132 if (!self->extra) {
1133 Py_INCREF(default_value);
1134 return default_value;
1135 }
1136
1137 for (i = 0; i < self->extra->length; i++) {
1138 ElementObject* item = (ElementObject*) self->extra->children[i];
Eli Bendersky163d7f02013-11-24 06:55:04 -08001139 if (Element_CheckExact(item) &&
1140 (PyObject_RichCompareBool(item->tag, tag, Py_EQ) == 1)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001141 PyObject* text = element_get_text(item);
1142 if (text == Py_None)
Eli Bendersky25771b32013-01-13 05:26:07 -08001143 return PyUnicode_New(0, 0);
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001144 Py_XINCREF(text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001145 return text;
1146 }
1147 }
1148
1149 Py_INCREF(default_value);
1150 return default_value;
1151}
1152
1153static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001154element_findall(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001155{
1156 int i;
1157 PyObject* out;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001158 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001159 PyObject* namespaces = Py_None;
Eli Bendersky737b1732012-05-29 06:02:56 +03001160 static char *kwlist[] = {"path", "namespaces", 0};
Eli Bendersky532d03e2013-08-10 08:00:39 -07001161 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001162
Eli Bendersky737b1732012-05-29 06:02:56 +03001163 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:findall", kwlist,
1164 &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001165 return NULL;
1166
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001167 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001168 _Py_IDENTIFIER(findall);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001169 return _PyObject_CallMethodId(
Eli Bendersky532d03e2013-08-10 08:00:39 -07001170 st->elementpath_obj, &PyId_findall, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001171 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001172 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001173
1174 out = PyList_New(0);
1175 if (!out)
1176 return NULL;
1177
1178 if (!self->extra)
1179 return out;
1180
1181 for (i = 0; i < self->extra->length; i++) {
1182 PyObject* item = self->extra->children[i];
1183 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +00001184 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001185 if (PyList_Append(out, item) < 0) {
1186 Py_DECREF(out);
1187 return NULL;
1188 }
1189 }
1190 }
1191
1192 return out;
1193}
1194
1195static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001196element_iterfind(ElementObject *self, PyObject *args, PyObject *kwds)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001197{
1198 PyObject* tag;
1199 PyObject* namespaces = Py_None;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001200 _Py_IDENTIFIER(iterfind);
Eli Bendersky737b1732012-05-29 06:02:56 +03001201 static char *kwlist[] = {"path", "namespaces", 0};
Eli Bendersky532d03e2013-08-10 08:00:39 -07001202 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001203
Eli Bendersky737b1732012-05-29 06:02:56 +03001204 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:iterfind", kwlist,
Eli Bendersky163d7f02013-11-24 06:55:04 -08001205 &tag, &namespaces)) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001206 return NULL;
Eli Bendersky163d7f02013-11-24 06:55:04 -08001207 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001208
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001209 return _PyObject_CallMethodId(
Eli Bendersky163d7f02013-11-24 06:55:04 -08001210 st->elementpath_obj, &PyId_iterfind, "OOO", self, tag, namespaces);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001211}
1212
1213static PyObject*
Eli Benderskya8736902013-01-05 06:26:39 -08001214element_get(ElementObject* self, PyObject* args, PyObject* kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001215{
1216 PyObject* value;
Eli Benderskya8736902013-01-05 06:26:39 -08001217 static char* kwlist[] = {"key", "default", 0};
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001218
1219 PyObject* key;
1220 PyObject* default_value = Py_None;
Eli Benderskya8736902013-01-05 06:26:39 -08001221
1222 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:get", kwlist, &key,
1223 &default_value))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001224 return NULL;
1225
1226 if (!self->extra || self->extra->attrib == Py_None)
1227 value = default_value;
1228 else {
1229 value = PyDict_GetItem(self->extra->attrib, key);
1230 if (!value)
1231 value = default_value;
1232 }
1233
1234 Py_INCREF(value);
1235 return value;
1236}
1237
1238static PyObject*
1239element_getchildren(ElementObject* self, PyObject* args)
1240{
1241 int i;
1242 PyObject* list;
1243
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001244 /* FIXME: report as deprecated? */
1245
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001246 if (!PyArg_ParseTuple(args, ":getchildren"))
1247 return NULL;
1248
1249 if (!self->extra)
1250 return PyList_New(0);
1251
1252 list = PyList_New(self->extra->length);
1253 if (!list)
1254 return NULL;
1255
1256 for (i = 0; i < self->extra->length; i++) {
1257 PyObject* item = self->extra->children[i];
1258 Py_INCREF(item);
1259 PyList_SET_ITEM(list, i, item);
1260 }
1261
1262 return list;
1263}
1264
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001265
Eli Bendersky64d11e62012-06-15 07:42:50 +03001266static PyObject *
1267create_elementiter(ElementObject *self, PyObject *tag, int gettext);
1268
1269
1270static PyObject *
Eli Benderskya8736902013-01-05 06:26:39 -08001271element_iter(ElementObject *self, PyObject *args, PyObject *kwds)
Eli Bendersky64d11e62012-06-15 07:42:50 +03001272{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001273 PyObject* tag = Py_None;
Eli Benderskya8736902013-01-05 06:26:39 -08001274 static char* kwlist[] = {"tag", 0};
1275
1276 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:iter", kwlist, &tag))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001277 return NULL;
1278
Eli Bendersky64d11e62012-06-15 07:42:50 +03001279 return create_elementiter(self, tag, 0);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001280}
1281
1282
1283static PyObject*
1284element_itertext(ElementObject* self, PyObject* args)
1285{
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001286 if (!PyArg_ParseTuple(args, ":itertext"))
1287 return NULL;
1288
Eli Bendersky64d11e62012-06-15 07:42:50 +03001289 return create_elementiter(self, Py_None, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001290}
1291
Eli Bendersky64d11e62012-06-15 07:42:50 +03001292
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001293static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001294element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001295{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001296 ElementObject* self = (ElementObject*) self_;
1297
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001298 if (!self->extra || index < 0 || index >= self->extra->length) {
1299 PyErr_SetString(
1300 PyExc_IndexError,
1301 "child index out of range"
1302 );
1303 return NULL;
1304 }
1305
1306 Py_INCREF(self->extra->children[index]);
1307 return self->extra->children[index];
1308}
1309
1310static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001311element_insert(ElementObject* self, PyObject* args)
1312{
1313 int i;
1314
1315 int index;
1316 PyObject* element;
1317 if (!PyArg_ParseTuple(args, "iO!:insert", &index,
1318 &Element_Type, &element))
1319 return NULL;
1320
Victor Stinner5f0af232013-07-11 23:01:36 +02001321 if (!self->extra) {
1322 if (create_extra(self, NULL) < 0)
1323 return NULL;
1324 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001325
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001326 if (index < 0) {
1327 index += self->extra->length;
1328 if (index < 0)
1329 index = 0;
1330 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001331 if (index > self->extra->length)
1332 index = self->extra->length;
1333
1334 if (element_resize(self, 1) < 0)
1335 return NULL;
1336
1337 for (i = self->extra->length; i > index; i--)
1338 self->extra->children[i] = self->extra->children[i-1];
1339
1340 Py_INCREF(element);
1341 self->extra->children[index] = element;
1342
1343 self->extra->length++;
1344
1345 Py_RETURN_NONE;
1346}
1347
1348static PyObject*
1349element_items(ElementObject* self, PyObject* args)
1350{
1351 if (!PyArg_ParseTuple(args, ":items"))
1352 return NULL;
1353
1354 if (!self->extra || self->extra->attrib == Py_None)
1355 return PyList_New(0);
1356
1357 return PyDict_Items(self->extra->attrib);
1358}
1359
1360static PyObject*
1361element_keys(ElementObject* self, PyObject* args)
1362{
1363 if (!PyArg_ParseTuple(args, ":keys"))
1364 return NULL;
1365
1366 if (!self->extra || self->extra->attrib == Py_None)
1367 return PyList_New(0);
1368
1369 return PyDict_Keys(self->extra->attrib);
1370}
1371
Martin v. Löwis18e16552006-02-15 17:27:45 +00001372static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001373element_length(ElementObject* self)
1374{
1375 if (!self->extra)
1376 return 0;
1377
1378 return self->extra->length;
1379}
1380
1381static PyObject*
1382element_makeelement(PyObject* self, PyObject* args, PyObject* kw)
1383{
1384 PyObject* elem;
1385
1386 PyObject* tag;
1387 PyObject* attrib;
1388 if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib))
1389 return NULL;
1390
1391 attrib = PyDict_Copy(attrib);
1392 if (!attrib)
1393 return NULL;
1394
Eli Bendersky092af1f2012-03-04 07:14:03 +02001395 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001396
1397 Py_DECREF(attrib);
1398
1399 return elem;
1400}
1401
1402static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001403element_remove(ElementObject* self, PyObject* args)
1404{
1405 int i;
1406
1407 PyObject* element;
1408 if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element))
1409 return NULL;
1410
1411 if (!self->extra) {
1412 /* element has no children, so raise exception */
1413 PyErr_SetString(
1414 PyExc_ValueError,
1415 "list.remove(x): x not in list"
1416 );
1417 return NULL;
1418 }
1419
1420 for (i = 0; i < self->extra->length; i++) {
1421 if (self->extra->children[i] == element)
1422 break;
Mark Dickinson211c6252009-02-01 10:28:51 +00001423 if (PyObject_RichCompareBool(self->extra->children[i], element, Py_EQ) == 1)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001424 break;
1425 }
1426
1427 if (i == self->extra->length) {
1428 /* element is not in children, so raise exception */
1429 PyErr_SetString(
1430 PyExc_ValueError,
1431 "list.remove(x): x not in list"
1432 );
1433 return NULL;
1434 }
1435
1436 Py_DECREF(self->extra->children[i]);
1437
1438 self->extra->length--;
1439
1440 for (; i < self->extra->length; i++)
1441 self->extra->children[i] = self->extra->children[i+1];
1442
1443 Py_RETURN_NONE;
1444}
1445
1446static PyObject*
1447element_repr(ElementObject* self)
1448{
Eli Bendersky092af1f2012-03-04 07:14:03 +02001449 if (self->tag)
1450 return PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1451 else
1452 return PyUnicode_FromFormat("<Element at %p>", self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001453}
1454
1455static PyObject*
1456element_set(ElementObject* self, PyObject* args)
1457{
1458 PyObject* attrib;
1459
1460 PyObject* key;
1461 PyObject* value;
1462 if (!PyArg_ParseTuple(args, "OO:set", &key, &value))
1463 return NULL;
1464
Victor Stinner5f0af232013-07-11 23:01:36 +02001465 if (!self->extra) {
1466 if (create_extra(self, NULL) < 0)
1467 return NULL;
1468 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001469
1470 attrib = element_get_attrib(self);
1471 if (!attrib)
1472 return NULL;
1473
1474 if (PyDict_SetItem(attrib, key, value) < 0)
1475 return NULL;
1476
1477 Py_RETURN_NONE;
1478}
1479
1480static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001481element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001482{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001483 ElementObject* self = (ElementObject*) self_;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001484 int i;
1485 PyObject* old;
1486
1487 if (!self->extra || index < 0 || index >= self->extra->length) {
1488 PyErr_SetString(
1489 PyExc_IndexError,
1490 "child assignment index out of range");
1491 return -1;
1492 }
1493
1494 old = self->extra->children[index];
1495
1496 if (item) {
1497 Py_INCREF(item);
1498 self->extra->children[index] = item;
1499 } else {
1500 self->extra->length--;
1501 for (i = index; i < self->extra->length; i++)
1502 self->extra->children[i] = self->extra->children[i+1];
1503 }
1504
1505 Py_DECREF(old);
1506
1507 return 0;
1508}
1509
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001510static PyObject*
1511element_subscr(PyObject* self_, PyObject* item)
1512{
1513 ElementObject* self = (ElementObject*) self_;
1514
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001515 if (PyIndex_Check(item)) {
1516 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001517
1518 if (i == -1 && PyErr_Occurred()) {
1519 return NULL;
1520 }
1521 if (i < 0 && self->extra)
1522 i += self->extra->length;
1523 return element_getitem(self_, i);
1524 }
1525 else if (PySlice_Check(item)) {
1526 Py_ssize_t start, stop, step, slicelen, cur, i;
1527 PyObject* list;
1528
1529 if (!self->extra)
1530 return PyList_New(0);
1531
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001532 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001533 self->extra->length,
1534 &start, &stop, &step, &slicelen) < 0) {
1535 return NULL;
1536 }
1537
1538 if (slicelen <= 0)
1539 return PyList_New(0);
1540 else {
1541 list = PyList_New(slicelen);
1542 if (!list)
1543 return NULL;
1544
1545 for (cur = start, i = 0; i < slicelen;
1546 cur += step, i++) {
1547 PyObject* item = self->extra->children[cur];
1548 Py_INCREF(item);
1549 PyList_SET_ITEM(list, i, item);
1550 }
1551
1552 return list;
1553 }
1554 }
1555 else {
1556 PyErr_SetString(PyExc_TypeError,
1557 "element indices must be integers");
1558 return NULL;
1559 }
1560}
1561
1562static int
1563element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1564{
1565 ElementObject* self = (ElementObject*) self_;
1566
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001567 if (PyIndex_Check(item)) {
1568 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001569
1570 if (i == -1 && PyErr_Occurred()) {
1571 return -1;
1572 }
1573 if (i < 0 && self->extra)
1574 i += self->extra->length;
1575 return element_setitem(self_, i, value);
1576 }
1577 else if (PySlice_Check(item)) {
1578 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1579
1580 PyObject* recycle = NULL;
1581 PyObject* seq = NULL;
1582
Victor Stinner5f0af232013-07-11 23:01:36 +02001583 if (!self->extra) {
1584 if (create_extra(self, NULL) < 0)
1585 return -1;
1586 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001587
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001588 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001589 self->extra->length,
1590 &start, &stop, &step, &slicelen) < 0) {
1591 return -1;
1592 }
1593
Eli Bendersky865756a2012-03-09 13:38:15 +02001594 if (value == NULL) {
1595 /* Delete slice */
1596 size_t cur;
1597 Py_ssize_t i;
1598
1599 if (slicelen <= 0)
1600 return 0;
1601
1602 /* Since we're deleting, the direction of the range doesn't matter,
1603 * so for simplicity make it always ascending.
1604 */
1605 if (step < 0) {
1606 stop = start + 1;
1607 start = stop + step * (slicelen - 1) - 1;
1608 step = -step;
1609 }
1610
1611 assert((size_t)slicelen <= PY_SIZE_MAX / sizeof(PyObject *));
1612
1613 /* recycle is a list that will contain all the children
1614 * scheduled for removal.
1615 */
1616 if (!(recycle = PyList_New(slicelen))) {
1617 PyErr_NoMemory();
1618 return -1;
1619 }
1620
1621 /* This loop walks over all the children that have to be deleted,
1622 * with cur pointing at them. num_moved is the amount of children
1623 * until the next deleted child that have to be "shifted down" to
1624 * occupy the deleted's places.
1625 * Note that in the ith iteration, shifting is done i+i places down
1626 * because i children were already removed.
1627 */
1628 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1629 /* Compute how many children have to be moved, clipping at the
1630 * list end.
1631 */
1632 Py_ssize_t num_moved = step - 1;
1633 if (cur + step >= (size_t)self->extra->length) {
1634 num_moved = self->extra->length - cur - 1;
1635 }
1636
1637 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1638
1639 memmove(
1640 self->extra->children + cur - i,
1641 self->extra->children + cur + 1,
1642 num_moved * sizeof(PyObject *));
1643 }
1644
1645 /* Leftover "tail" after the last removed child */
1646 cur = start + (size_t)slicelen * step;
1647 if (cur < (size_t)self->extra->length) {
1648 memmove(
1649 self->extra->children + cur - slicelen,
1650 self->extra->children + cur,
1651 (self->extra->length - cur) * sizeof(PyObject *));
1652 }
1653
1654 self->extra->length -= slicelen;
1655
1656 /* Discard the recycle list with all the deleted sub-elements */
1657 Py_XDECREF(recycle);
1658 return 0;
1659 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001660 else {
Eli Bendersky865756a2012-03-09 13:38:15 +02001661 /* A new slice is actually being assigned */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001662 seq = PySequence_Fast(value, "");
1663 if (!seq) {
1664 PyErr_Format(
1665 PyExc_TypeError,
1666 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1667 );
1668 return -1;
1669 }
1670 newlen = PySequence_Size(seq);
1671 }
1672
1673 if (step != 1 && newlen != slicelen)
1674 {
1675 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001676 "attempt to assign sequence of size %zd "
1677 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001678 newlen, slicelen
1679 );
1680 return -1;
1681 }
1682
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001683 /* Resize before creating the recycle bin, to prevent refleaks. */
1684 if (newlen > slicelen) {
1685 if (element_resize(self, newlen - slicelen) < 0) {
1686 if (seq) {
1687 Py_DECREF(seq);
1688 }
1689 return -1;
1690 }
1691 }
1692
1693 if (slicelen > 0) {
1694 /* to avoid recursive calls to this method (via decref), move
1695 old items to the recycle bin here, and get rid of them when
1696 we're done modifying the element */
1697 recycle = PyList_New(slicelen);
1698 if (!recycle) {
1699 if (seq) {
1700 Py_DECREF(seq);
1701 }
1702 return -1;
1703 }
1704 for (cur = start, i = 0; i < slicelen;
1705 cur += step, i++)
1706 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1707 }
1708
1709 if (newlen < slicelen) {
1710 /* delete slice */
1711 for (i = stop; i < self->extra->length; i++)
1712 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1713 } else if (newlen > slicelen) {
1714 /* insert slice */
1715 for (i = self->extra->length-1; i >= stop; i--)
1716 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1717 }
1718
1719 /* replace the slice */
1720 for (cur = start, i = 0; i < newlen;
1721 cur += step, i++) {
1722 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1723 Py_INCREF(element);
1724 self->extra->children[cur] = element;
1725 }
1726
1727 self->extra->length += newlen - slicelen;
1728
1729 if (seq) {
1730 Py_DECREF(seq);
1731 }
1732
1733 /* discard the recycle bin, and everything in it */
1734 Py_XDECREF(recycle);
1735
1736 return 0;
1737 }
1738 else {
1739 PyErr_SetString(PyExc_TypeError,
1740 "element indices must be integers");
1741 return -1;
1742 }
1743}
1744
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001745static PyMethodDef element_methods[] = {
1746
Eli Bendersky0192ba32012-03-30 16:38:33 +03001747 {"clear", (PyCFunction) element_clearmethod, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001748
Eli Benderskya8736902013-01-05 06:26:39 -08001749 {"get", (PyCFunction) element_get, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001750 {"set", (PyCFunction) element_set, METH_VARARGS},
1751
Eli Bendersky737b1732012-05-29 06:02:56 +03001752 {"find", (PyCFunction) element_find, METH_VARARGS | METH_KEYWORDS},
1753 {"findtext", (PyCFunction) element_findtext, METH_VARARGS | METH_KEYWORDS},
1754 {"findall", (PyCFunction) element_findall, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001755
1756 {"append", (PyCFunction) element_append, METH_VARARGS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001757 {"extend", (PyCFunction) element_extend, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001758 {"insert", (PyCFunction) element_insert, METH_VARARGS},
1759 {"remove", (PyCFunction) element_remove, METH_VARARGS},
1760
Eli Benderskya8736902013-01-05 06:26:39 -08001761 {"iter", (PyCFunction) element_iter, METH_VARARGS | METH_KEYWORDS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001762 {"itertext", (PyCFunction) element_itertext, METH_VARARGS},
Eli Bendersky737b1732012-05-29 06:02:56 +03001763 {"iterfind", (PyCFunction) element_iterfind, METH_VARARGS | METH_KEYWORDS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001764
Eli Benderskya8736902013-01-05 06:26:39 -08001765 {"getiterator", (PyCFunction) element_iter, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001766 {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS},
1767
1768 {"items", (PyCFunction) element_items, METH_VARARGS},
1769 {"keys", (PyCFunction) element_keys, METH_VARARGS},
1770
1771 {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS},
1772
1773 {"__copy__", (PyCFunction) element_copy, METH_VARARGS},
1774 {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS},
Martin v. Löwisbce16662012-06-17 10:41:22 +02001775 {"__sizeof__", element_sizeof, METH_NOARGS},
Eli Bendersky698bdb22013-01-10 06:01:06 -08001776 {"__getstate__", (PyCFunction)element_getstate, METH_NOARGS},
1777 {"__setstate__", (PyCFunction)element_setstate, METH_O},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001778
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001779 {NULL, NULL}
1780};
1781
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001782static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001783element_getattro(ElementObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001784{
1785 PyObject* res;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001786 char *name = "";
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001787
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001788 if (PyUnicode_Check(nameobj))
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001789 name = _PyUnicode_AsString(nameobj);
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001790
Alexander Belopolskye239d232010-12-08 23:31:48 +00001791 if (name == NULL)
1792 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001793
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001794 /* handle common attributes first */
1795 if (strcmp(name, "tag") == 0) {
1796 res = self->tag;
1797 Py_INCREF(res);
1798 return res;
1799 } else if (strcmp(name, "text") == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001800 res = element_get_text(self);
Victor Stinner71c8b7e2013-07-11 23:08:39 +02001801 Py_XINCREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001802 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001803 }
1804
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001805 /* methods */
1806 res = PyObject_GenericGetAttr((PyObject*) self, nameobj);
1807 if (res)
1808 return res;
1809
1810 /* less common attributes */
1811 if (strcmp(name, "tail") == 0) {
1812 PyErr_Clear();
1813 res = element_get_tail(self);
1814 } else if (strcmp(name, "attrib") == 0) {
1815 PyErr_Clear();
Victor Stinner5f0af232013-07-11 23:01:36 +02001816 if (!self->extra) {
1817 if (create_extra(self, NULL) < 0)
1818 return NULL;
1819 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001820 res = element_get_attrib(self);
1821 }
1822
1823 if (!res)
1824 return NULL;
1825
1826 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001827 return res;
1828}
1829
Eli Benderskyef9683b2013-05-18 07:52:34 -07001830static int
Eli Benderskyb20df952012-05-20 06:33:29 +03001831element_setattro(ElementObject* self, PyObject* nameobj, PyObject* value)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001832{
Eli Benderskyb20df952012-05-20 06:33:29 +03001833 char *name = "";
1834 if (PyUnicode_Check(nameobj))
1835 name = _PyUnicode_AsString(nameobj);
Victor Stinner4d463432013-07-11 23:05:03 +02001836 if (name == NULL)
Eli Benderskyef9683b2013-05-18 07:52:34 -07001837 return -1;
Victor Stinner4d463432013-07-11 23:05:03 +02001838
1839 if (strcmp(name, "tag") == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001840 Py_DECREF(self->tag);
1841 self->tag = value;
1842 Py_INCREF(self->tag);
1843 } else if (strcmp(name, "text") == 0) {
1844 Py_DECREF(JOIN_OBJ(self->text));
1845 self->text = value;
1846 Py_INCREF(self->text);
1847 } else if (strcmp(name, "tail") == 0) {
1848 Py_DECREF(JOIN_OBJ(self->tail));
1849 self->tail = value;
1850 Py_INCREF(self->tail);
1851 } else if (strcmp(name, "attrib") == 0) {
Victor Stinner5f0af232013-07-11 23:01:36 +02001852 if (!self->extra) {
1853 if (create_extra(self, NULL) < 0)
1854 return -1;
1855 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001856 Py_DECREF(self->extra->attrib);
1857 self->extra->attrib = value;
1858 Py_INCREF(self->extra->attrib);
1859 } else {
Eli Benderskyef9683b2013-05-18 07:52:34 -07001860 PyErr_SetString(PyExc_AttributeError,
Eli Bendersky6a55dc32013-05-19 16:59:59 -07001861 "Can't set arbitrary attributes on Element");
Eli Benderskyef9683b2013-05-18 07:52:34 -07001862 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001863 }
1864
Eli Benderskyef9683b2013-05-18 07:52:34 -07001865 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001866}
1867
1868static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001869 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001870 0, /* sq_concat */
1871 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001872 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001873 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001874 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001875 0,
1876};
1877
1878static PyMappingMethods element_as_mapping = {
1879 (lenfunc) element_length,
1880 (binaryfunc) element_subscr,
1881 (objobjargproc) element_ass_subscr,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001882};
1883
Neal Norwitz227b5332006-03-22 09:28:35 +00001884static PyTypeObject Element_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001885 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08001886 "xml.etree.ElementTree.Element", sizeof(ElementObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001887 /* methods */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001888 (destructor)element_dealloc, /* tp_dealloc */
1889 0, /* tp_print */
1890 0, /* tp_getattr */
Eli Benderskyb20df952012-05-20 06:33:29 +03001891 0, /* tp_setattr */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001892 0, /* tp_reserved */
1893 (reprfunc)element_repr, /* tp_repr */
1894 0, /* tp_as_number */
1895 &element_as_sequence, /* tp_as_sequence */
1896 &element_as_mapping, /* tp_as_mapping */
1897 0, /* tp_hash */
1898 0, /* tp_call */
1899 0, /* tp_str */
1900 (getattrofunc)element_getattro, /* tp_getattro */
Eli Benderskyb20df952012-05-20 06:33:29 +03001901 (setattrofunc)element_setattro, /* tp_setattro */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001902 0, /* tp_as_buffer */
Eli Bendersky0192ba32012-03-30 16:38:33 +03001903 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
1904 /* tp_flags */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001905 0, /* tp_doc */
Eli Bendersky0192ba32012-03-30 16:38:33 +03001906 (traverseproc)element_gc_traverse, /* tp_traverse */
1907 (inquiry)element_gc_clear, /* tp_clear */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001908 0, /* tp_richcompare */
Eli Benderskyebf37a22012-04-03 22:02:37 +03001909 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001910 0, /* tp_iter */
1911 0, /* tp_iternext */
1912 element_methods, /* tp_methods */
1913 0, /* tp_members */
1914 0, /* tp_getset */
1915 0, /* tp_base */
1916 0, /* tp_dict */
1917 0, /* tp_descr_get */
1918 0, /* tp_descr_set */
1919 0, /* tp_dictoffset */
1920 (initproc)element_init, /* tp_init */
1921 PyType_GenericAlloc, /* tp_alloc */
1922 element_new, /* tp_new */
1923 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001924};
1925
Eli Bendersky64d11e62012-06-15 07:42:50 +03001926/******************************* Element iterator ****************************/
1927
1928/* ElementIterObject represents the iteration state over an XML element in
1929 * pre-order traversal. To keep track of which sub-element should be returned
1930 * next, a stack of parents is maintained. This is a standard stack-based
1931 * iterative pre-order traversal of a tree.
1932 * The stack is managed using a single-linked list starting at parent_stack.
1933 * Each stack node contains the saved parent to which we should return after
1934 * the current one is exhausted, and the next child to examine in that parent.
1935 */
1936typedef struct ParentLocator_t {
1937 ElementObject *parent;
1938 Py_ssize_t child_index;
1939 struct ParentLocator_t *next;
1940} ParentLocator;
1941
1942typedef struct {
1943 PyObject_HEAD
1944 ParentLocator *parent_stack;
1945 ElementObject *root_element;
1946 PyObject *sought_tag;
1947 int root_done;
1948 int gettext;
1949} ElementIterObject;
1950
1951
1952static void
1953elementiter_dealloc(ElementIterObject *it)
1954{
1955 ParentLocator *p = it->parent_stack;
1956 while (p) {
1957 ParentLocator *temp = p;
1958 Py_XDECREF(p->parent);
1959 p = p->next;
1960 PyObject_Free(temp);
1961 }
1962
1963 Py_XDECREF(it->sought_tag);
1964 Py_XDECREF(it->root_element);
1965
1966 PyObject_GC_UnTrack(it);
1967 PyObject_GC_Del(it);
1968}
1969
1970static int
1971elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
1972{
1973 ParentLocator *p = it->parent_stack;
1974 while (p) {
1975 Py_VISIT(p->parent);
1976 p = p->next;
1977 }
1978
1979 Py_VISIT(it->root_element);
1980 Py_VISIT(it->sought_tag);
1981 return 0;
1982}
1983
1984/* Helper function for elementiter_next. Add a new parent to the parent stack.
1985 */
1986static ParentLocator *
1987parent_stack_push_new(ParentLocator *stack, ElementObject *parent)
1988{
1989 ParentLocator *new_node = PyObject_Malloc(sizeof(ParentLocator));
1990 if (new_node) {
1991 new_node->parent = parent;
1992 Py_INCREF(parent);
1993 new_node->child_index = 0;
1994 new_node->next = stack;
1995 }
1996 return new_node;
1997}
1998
1999static PyObject *
2000elementiter_next(ElementIterObject *it)
2001{
2002 /* Sub-element iterator.
Eli Bendersky45839902013-01-13 05:14:47 -08002003 *
Eli Bendersky64d11e62012-06-15 07:42:50 +03002004 * A short note on gettext: this function serves both the iter() and
2005 * itertext() methods to avoid code duplication. However, there are a few
2006 * small differences in the way these iterations work. Namely:
2007 * - itertext() only yields text from nodes that have it, and continues
2008 * iterating when a node doesn't have text (so it doesn't return any
2009 * node like iter())
2010 * - itertext() also has to handle tail, after finishing with all the
2011 * children of a node.
2012 */
Eli Bendersky113da642012-06-15 07:52:49 +03002013 ElementObject *cur_parent;
2014 Py_ssize_t child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002015
2016 while (1) {
2017 /* Handle the case reached in the beginning and end of iteration, where
2018 * the parent stack is empty. The root_done flag gives us indication
2019 * whether we've just started iterating (so root_done is 0), in which
2020 * case the root is returned. If root_done is 1 and we're here, the
2021 * iterator is exhausted.
2022 */
2023 if (!it->parent_stack->parent) {
2024 if (it->root_done) {
2025 PyErr_SetNone(PyExc_StopIteration);
2026 return NULL;
2027 } else {
2028 it->parent_stack = parent_stack_push_new(it->parent_stack,
2029 it->root_element);
2030 if (!it->parent_stack) {
2031 PyErr_NoMemory();
2032 return NULL;
2033 }
2034
2035 it->root_done = 1;
2036 if (it->sought_tag == Py_None ||
2037 PyObject_RichCompareBool(it->root_element->tag,
2038 it->sought_tag, Py_EQ) == 1) {
2039 if (it->gettext) {
Eli Benderskye6174ca2013-01-10 06:27:53 -08002040 PyObject *text = element_get_text(it->root_element);
2041 if (!text)
2042 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002043 if (PyObject_IsTrue(text)) {
2044 Py_INCREF(text);
2045 return text;
2046 }
2047 } else {
2048 Py_INCREF(it->root_element);
2049 return (PyObject *)it->root_element;
2050 }
2051 }
2052 }
2053 }
2054
2055 /* See if there are children left to traverse in the current parent. If
2056 * yes, visit the next child. If not, pop the stack and try again.
2057 */
Eli Bendersky113da642012-06-15 07:52:49 +03002058 cur_parent = it->parent_stack->parent;
2059 child_index = it->parent_stack->child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002060 if (cur_parent->extra && child_index < cur_parent->extra->length) {
2061 ElementObject *child = (ElementObject *)
2062 cur_parent->extra->children[child_index];
2063 it->parent_stack->child_index++;
2064 it->parent_stack = parent_stack_push_new(it->parent_stack,
2065 child);
2066 if (!it->parent_stack) {
2067 PyErr_NoMemory();
2068 return NULL;
2069 }
2070
2071 if (it->gettext) {
Eli Benderskye6174ca2013-01-10 06:27:53 -08002072 PyObject *text = element_get_text(child);
2073 if (!text)
2074 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002075 if (PyObject_IsTrue(text)) {
2076 Py_INCREF(text);
2077 return text;
2078 }
2079 } else if (it->sought_tag == Py_None ||
2080 PyObject_RichCompareBool(child->tag,
2081 it->sought_tag, Py_EQ) == 1) {
2082 Py_INCREF(child);
2083 return (PyObject *)child;
2084 }
2085 else
2086 continue;
2087 }
2088 else {
Eli Benderskye6174ca2013-01-10 06:27:53 -08002089 PyObject *tail;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002090 ParentLocator *next = it->parent_stack->next;
Eli Benderskye6174ca2013-01-10 06:27:53 -08002091 if (it->gettext) {
2092 tail = element_get_tail(cur_parent);
2093 if (!tail)
2094 return NULL;
2095 }
2096 else
2097 tail = Py_None;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002098 Py_XDECREF(it->parent_stack->parent);
2099 PyObject_Free(it->parent_stack);
2100 it->parent_stack = next;
2101
2102 /* Note that extra condition on it->parent_stack->parent here;
2103 * this is because itertext() is supposed to only return *inner*
2104 * text, not text following the element it began iteration with.
2105 */
2106 if (it->parent_stack->parent && PyObject_IsTrue(tail)) {
2107 Py_INCREF(tail);
2108 return tail;
2109 }
2110 }
2111 }
2112
2113 return NULL;
2114}
2115
2116
2117static PyTypeObject ElementIter_Type = {
2118 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002119 /* Using the module's name since the pure-Python implementation does not
2120 have such a type. */
Eli Bendersky64d11e62012-06-15 07:42:50 +03002121 "_elementtree._element_iterator", /* tp_name */
2122 sizeof(ElementIterObject), /* tp_basicsize */
2123 0, /* tp_itemsize */
2124 /* methods */
2125 (destructor)elementiter_dealloc, /* tp_dealloc */
2126 0, /* tp_print */
2127 0, /* tp_getattr */
2128 0, /* tp_setattr */
2129 0, /* tp_reserved */
2130 0, /* tp_repr */
2131 0, /* tp_as_number */
2132 0, /* tp_as_sequence */
2133 0, /* tp_as_mapping */
2134 0, /* tp_hash */
2135 0, /* tp_call */
2136 0, /* tp_str */
2137 0, /* tp_getattro */
2138 0, /* tp_setattro */
2139 0, /* tp_as_buffer */
2140 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2141 0, /* tp_doc */
2142 (traverseproc)elementiter_traverse, /* tp_traverse */
2143 0, /* tp_clear */
2144 0, /* tp_richcompare */
2145 0, /* tp_weaklistoffset */
2146 PyObject_SelfIter, /* tp_iter */
2147 (iternextfunc)elementiter_next, /* tp_iternext */
2148 0, /* tp_methods */
2149 0, /* tp_members */
2150 0, /* tp_getset */
2151 0, /* tp_base */
2152 0, /* tp_dict */
2153 0, /* tp_descr_get */
2154 0, /* tp_descr_set */
2155 0, /* tp_dictoffset */
2156 0, /* tp_init */
2157 0, /* tp_alloc */
2158 0, /* tp_new */
2159};
2160
2161
2162static PyObject *
2163create_elementiter(ElementObject *self, PyObject *tag, int gettext)
2164{
2165 ElementIterObject *it;
2166 PyObject *star = NULL;
2167
2168 it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
2169 if (!it)
2170 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002171
2172 if (PyUnicode_Check(tag))
2173 star = PyUnicode_FromString("*");
2174 else if (PyBytes_Check(tag))
2175 star = PyBytes_FromString("*");
2176
2177 if (star && PyObject_RichCompareBool(tag, star, Py_EQ) == 1)
2178 tag = Py_None;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002179 Py_XDECREF(star);
Victor Stinner4d463432013-07-11 23:05:03 +02002180
2181 Py_INCREF(tag);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002182 it->sought_tag = tag;
2183 it->root_done = 0;
2184 it->gettext = gettext;
Victor Stinner4d463432013-07-11 23:05:03 +02002185 Py_INCREF(self);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002186 it->root_element = self;
2187
Eli Bendersky64d11e62012-06-15 07:42:50 +03002188 PyObject_GC_Track(it);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002189
2190 it->parent_stack = PyObject_Malloc(sizeof(ParentLocator));
2191 if (it->parent_stack == NULL) {
2192 Py_DECREF(it);
2193 PyErr_NoMemory();
2194 return NULL;
2195 }
2196 it->parent_stack->parent = NULL;
2197 it->parent_stack->child_index = 0;
2198 it->parent_stack->next = NULL;
2199
Eli Bendersky64d11e62012-06-15 07:42:50 +03002200 return (PyObject *)it;
2201}
2202
2203
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002204/* ==================================================================== */
2205/* the tree builder type */
2206
2207typedef struct {
2208 PyObject_HEAD
2209
Eli Bendersky58d548d2012-05-29 15:45:16 +03002210 PyObject *root; /* root node (first created node) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002211
Antoine Pitrouee329312012-10-04 19:53:29 +02002212 PyObject *this; /* current node */
2213 PyObject *last; /* most recently created node */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002214
Eli Bendersky58d548d2012-05-29 15:45:16 +03002215 PyObject *data; /* data collector (string or list), or NULL */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002216
Eli Bendersky58d548d2012-05-29 15:45:16 +03002217 PyObject *stack; /* element stack */
2218 Py_ssize_t index; /* current stack size (0 means empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002219
Eli Bendersky48d358b2012-05-30 17:57:50 +03002220 PyObject *element_factory;
2221
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002222 /* element tracing */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002223 PyObject *events; /* list of events, or NULL if not collecting */
2224 PyObject *start_event_obj; /* event objects (NULL to ignore) */
2225 PyObject *end_event_obj;
2226 PyObject *start_ns_event_obj;
2227 PyObject *end_ns_event_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002228} TreeBuilderObject;
2229
Christian Heimes90aa7642007-12-19 02:45:37 +00002230#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002231
2232/* -------------------------------------------------------------------- */
2233/* constructor and destructor */
2234
Eli Bendersky58d548d2012-05-29 15:45:16 +03002235static PyObject *
2236treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002237{
Eli Bendersky58d548d2012-05-29 15:45:16 +03002238 TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
2239 if (t != NULL) {
2240 t->root = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002241
Eli Bendersky58d548d2012-05-29 15:45:16 +03002242 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002243 t->this = Py_None;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002244 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002245 t->last = Py_None;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002246
Eli Bendersky58d548d2012-05-29 15:45:16 +03002247 t->data = NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002248 t->element_factory = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002249 t->stack = PyList_New(20);
2250 if (!t->stack) {
2251 Py_DECREF(t->this);
2252 Py_DECREF(t->last);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002253 Py_DECREF((PyObject *) t);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002254 return NULL;
2255 }
2256 t->index = 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002257
Eli Bendersky58d548d2012-05-29 15:45:16 +03002258 t->events = NULL;
2259 t->start_event_obj = t->end_event_obj = NULL;
2260 t->start_ns_event_obj = t->end_ns_event_obj = NULL;
2261 }
2262 return (PyObject *)t;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002263}
2264
Eli Bendersky58d548d2012-05-29 15:45:16 +03002265static int
2266treebuilder_init(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002267{
Eli Benderskyc68e1362012-06-03 06:09:42 +03002268 static char *kwlist[] = {"element_factory", 0};
Eli Bendersky48d358b2012-05-30 17:57:50 +03002269 PyObject *element_factory = NULL;
2270 TreeBuilderObject *self_tb = (TreeBuilderObject *)self;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002271 PyObject *tmp;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002272
2273 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:TreeBuilder", kwlist,
2274 &element_factory)) {
2275 return -1;
2276 }
2277
2278 if (element_factory) {
2279 Py_INCREF(element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002280 tmp = self_tb->element_factory;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002281 self_tb->element_factory = element_factory;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002282 Py_XDECREF(tmp);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002283 }
2284
Eli Bendersky58d548d2012-05-29 15:45:16 +03002285 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002286}
2287
Eli Bendersky48d358b2012-05-30 17:57:50 +03002288static int
2289treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
2290{
2291 Py_VISIT(self->root);
2292 Py_VISIT(self->this);
2293 Py_VISIT(self->last);
2294 Py_VISIT(self->data);
2295 Py_VISIT(self->stack);
2296 Py_VISIT(self->element_factory);
2297 return 0;
2298}
2299
2300static int
2301treebuilder_gc_clear(TreeBuilderObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002302{
Antoine Pitrouc1948842012-10-01 23:40:37 +02002303 Py_CLEAR(self->end_ns_event_obj);
2304 Py_CLEAR(self->start_ns_event_obj);
2305 Py_CLEAR(self->end_event_obj);
2306 Py_CLEAR(self->start_event_obj);
2307 Py_CLEAR(self->events);
2308 Py_CLEAR(self->stack);
2309 Py_CLEAR(self->data);
2310 Py_CLEAR(self->last);
2311 Py_CLEAR(self->this);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002312 Py_CLEAR(self->element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002313 Py_CLEAR(self->root);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002314 return 0;
2315}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002316
Eli Bendersky48d358b2012-05-30 17:57:50 +03002317static void
2318treebuilder_dealloc(TreeBuilderObject *self)
2319{
2320 PyObject_GC_UnTrack(self);
2321 treebuilder_gc_clear(self);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002322 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002323}
2324
2325/* -------------------------------------------------------------------- */
Antoine Pitrouee329312012-10-04 19:53:29 +02002326/* helpers for handling of arbitrary element-like objects */
2327
2328static int
2329treebuilder_set_element_text_or_tail(PyObject *element, PyObject *data,
2330 PyObject **dest, _Py_Identifier *name)
2331{
2332 if (Element_CheckExact(element)) {
2333 Py_DECREF(JOIN_OBJ(*dest));
2334 *dest = JOIN_SET(data, PyList_CheckExact(data));
2335 return 0;
2336 }
2337 else {
2338 PyObject *joined = list_join(data);
2339 int r;
2340 if (joined == NULL)
2341 return -1;
2342 r = _PyObject_SetAttrId(element, name, joined);
2343 Py_DECREF(joined);
2344 return r;
2345 }
2346}
2347
2348/* These two functions steal a reference to data */
2349static int
2350treebuilder_set_element_text(PyObject *element, PyObject *data)
2351{
2352 _Py_IDENTIFIER(text);
2353 return treebuilder_set_element_text_or_tail(
2354 element, data, &((ElementObject *) element)->text, &PyId_text);
2355}
2356
2357static int
2358treebuilder_set_element_tail(PyObject *element, PyObject *data)
2359{
2360 _Py_IDENTIFIER(tail);
2361 return treebuilder_set_element_text_or_tail(
2362 element, data, &((ElementObject *) element)->tail, &PyId_tail);
2363}
2364
2365static int
2366treebuilder_add_subelement(PyObject *element, PyObject *child)
2367{
2368 _Py_IDENTIFIER(append);
2369 if (Element_CheckExact(element)) {
2370 ElementObject *elem = (ElementObject *) element;
2371 return element_add_subelement(elem, child);
2372 }
2373 else {
2374 PyObject *res;
2375 res = _PyObject_CallMethodId(element, &PyId_append, "O", child);
2376 if (res == NULL)
2377 return -1;
2378 Py_DECREF(res);
2379 return 0;
2380 }
2381}
2382
2383/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002384/* handlers */
2385
2386LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002387treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
2388 PyObject* attrib)
2389{
2390 PyObject* node;
2391 PyObject* this;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002392 elementtreestate *st = ET_STATE_GLOBAL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002393
2394 if (self->data) {
2395 if (self->this == self->last) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002396 if (treebuilder_set_element_text(self->last, self->data))
2397 return NULL;
2398 }
2399 else {
2400 if (treebuilder_set_element_tail(self->last, self->data))
2401 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002402 }
2403 self->data = NULL;
2404 }
2405
Eli Bendersky08231a92013-05-18 15:47:16 -07002406 if (self->element_factory && self->element_factory != Py_None) {
Eli Bendersky48d358b2012-05-30 17:57:50 +03002407 node = PyObject_CallFunction(self->element_factory, "OO", tag, attrib);
2408 } else {
2409 node = create_new_element(tag, attrib);
2410 }
2411 if (!node) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002412 return NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002413 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002414
Antoine Pitrouee329312012-10-04 19:53:29 +02002415 this = self->this;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002416
2417 if (this != Py_None) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002418 if (treebuilder_add_subelement(this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002419 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002420 } else {
2421 if (self->root) {
2422 PyErr_SetString(
Eli Bendersky532d03e2013-08-10 08:00:39 -07002423 st->parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002424 "multiple elements on top level"
2425 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002426 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002427 }
2428 Py_INCREF(node);
2429 self->root = node;
2430 }
2431
2432 if (self->index < PyList_GET_SIZE(self->stack)) {
2433 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002434 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002435 Py_INCREF(this);
2436 } else {
2437 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002438 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002439 }
2440 self->index++;
2441
2442 Py_DECREF(this);
2443 Py_INCREF(node);
Antoine Pitrouee329312012-10-04 19:53:29 +02002444 self->this = node;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002445
2446 Py_DECREF(self->last);
2447 Py_INCREF(node);
Antoine Pitrouee329312012-10-04 19:53:29 +02002448 self->last = node;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002449
2450 if (self->start_event_obj) {
2451 PyObject* res;
2452 PyObject* action = self->start_event_obj;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002453 res = PyTuple_Pack(2, action, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002454 if (res) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002455 PyList_Append(self->events, res);
2456 Py_DECREF(res);
2457 } else
2458 PyErr_Clear(); /* FIXME: propagate error */
2459 }
2460
2461 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002462
2463 error:
2464 Py_DECREF(node);
2465 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002466}
2467
2468LOCAL(PyObject*)
2469treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
2470{
2471 if (!self->data) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002472 if (self->last == Py_None) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00002473 /* ignore calls to data before the first call to start */
2474 Py_RETURN_NONE;
2475 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002476 /* store the first item as is */
2477 Py_INCREF(data); self->data = data;
2478 } else {
2479 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00002480 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
2481 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002482 /* XXX this code path unused in Python 3? */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002483 /* expat often generates single character data sections; handle
2484 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00002485 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
2486 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002487 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00002488 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002489 } else if (PyList_CheckExact(self->data)) {
2490 if (PyList_Append(self->data, data) < 0)
2491 return NULL;
2492 } else {
2493 PyObject* list = PyList_New(2);
2494 if (!list)
2495 return NULL;
2496 PyList_SET_ITEM(list, 0, self->data);
2497 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2498 self->data = list;
2499 }
2500 }
2501
2502 Py_RETURN_NONE;
2503}
2504
2505LOCAL(PyObject*)
2506treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2507{
2508 PyObject* item;
2509
2510 if (self->data) {
2511 if (self->this == self->last) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002512 if (treebuilder_set_element_text(self->last, self->data))
2513 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002514 } else {
Antoine Pitrouee329312012-10-04 19:53:29 +02002515 if (treebuilder_set_element_tail(self->last, self->data))
2516 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002517 }
2518 self->data = NULL;
2519 }
2520
2521 if (self->index == 0) {
2522 PyErr_SetString(
2523 PyExc_IndexError,
2524 "pop from empty stack"
2525 );
2526 return NULL;
2527 }
2528
2529 self->index--;
2530
2531 item = PyList_GET_ITEM(self->stack, self->index);
2532 Py_INCREF(item);
2533
2534 Py_DECREF(self->last);
2535
Antoine Pitrouee329312012-10-04 19:53:29 +02002536 self->last = self->this;
2537 self->this = item;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002538
2539 if (self->end_event_obj) {
2540 PyObject* res;
2541 PyObject* action = self->end_event_obj;
2542 PyObject* node = (PyObject*) self->last;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002543 res = PyTuple_Pack(2, action, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002544 if (res) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002545 PyList_Append(self->events, res);
2546 Py_DECREF(res);
2547 } else
2548 PyErr_Clear(); /* FIXME: propagate error */
2549 }
2550
2551 Py_INCREF(self->last);
2552 return (PyObject*) self->last;
2553}
2554
2555LOCAL(void)
2556treebuilder_handle_namespace(TreeBuilderObject* self, int start,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002557 PyObject *prefix, PyObject *uri)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002558{
2559 PyObject* res;
2560 PyObject* action;
2561 PyObject* parcel;
2562
2563 if (!self->events)
2564 return;
2565
2566 if (start) {
2567 if (!self->start_ns_event_obj)
2568 return;
2569 action = self->start_ns_event_obj;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002570 parcel = Py_BuildValue("OO", prefix, uri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002571 if (!parcel)
2572 return;
2573 Py_INCREF(action);
2574 } else {
2575 if (!self->end_ns_event_obj)
2576 return;
2577 action = self->end_ns_event_obj;
2578 Py_INCREF(action);
2579 parcel = Py_None;
2580 Py_INCREF(parcel);
2581 }
2582
2583 res = PyTuple_New(2);
2584
2585 if (res) {
2586 PyTuple_SET_ITEM(res, 0, action);
2587 PyTuple_SET_ITEM(res, 1, parcel);
2588 PyList_Append(self->events, res);
2589 Py_DECREF(res);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002590 }
2591 else {
2592 Py_DECREF(action);
2593 Py_DECREF(parcel);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002594 PyErr_Clear(); /* FIXME: propagate error */
Antoine Pitrouc1948842012-10-01 23:40:37 +02002595 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002596}
2597
2598/* -------------------------------------------------------------------- */
2599/* methods (in alphabetical order) */
2600
2601static PyObject*
2602treebuilder_data(TreeBuilderObject* self, PyObject* args)
2603{
2604 PyObject* data;
2605 if (!PyArg_ParseTuple(args, "O:data", &data))
2606 return NULL;
2607
2608 return treebuilder_handle_data(self, data);
2609}
2610
2611static PyObject*
2612treebuilder_end(TreeBuilderObject* self, PyObject* args)
2613{
2614 PyObject* tag;
2615 if (!PyArg_ParseTuple(args, "O:end", &tag))
2616 return NULL;
2617
2618 return treebuilder_handle_end(self, tag);
2619}
2620
2621LOCAL(PyObject*)
2622treebuilder_done(TreeBuilderObject* self)
2623{
2624 PyObject* res;
2625
2626 /* FIXME: check stack size? */
2627
2628 if (self->root)
2629 res = self->root;
2630 else
2631 res = Py_None;
2632
2633 Py_INCREF(res);
2634 return res;
2635}
2636
2637static PyObject*
2638treebuilder_close(TreeBuilderObject* self, PyObject* args)
2639{
2640 if (!PyArg_ParseTuple(args, ":close"))
2641 return NULL;
2642
2643 return treebuilder_done(self);
2644}
2645
2646static PyObject*
2647treebuilder_start(TreeBuilderObject* self, PyObject* args)
2648{
2649 PyObject* tag;
2650 PyObject* attrib = Py_None;
2651 if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib))
2652 return NULL;
2653
2654 return treebuilder_handle_start(self, tag, attrib);
2655}
2656
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002657static PyMethodDef treebuilder_methods[] = {
2658 {"data", (PyCFunction) treebuilder_data, METH_VARARGS},
2659 {"start", (PyCFunction) treebuilder_start, METH_VARARGS},
2660 {"end", (PyCFunction) treebuilder_end, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002661 {"close", (PyCFunction) treebuilder_close, METH_VARARGS},
2662 {NULL, NULL}
2663};
2664
Neal Norwitz227b5332006-03-22 09:28:35 +00002665static PyTypeObject TreeBuilder_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002666 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002667 "xml.etree.ElementTree.TreeBuilder", sizeof(TreeBuilderObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002668 /* methods */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002669 (destructor)treebuilder_dealloc, /* tp_dealloc */
2670 0, /* tp_print */
2671 0, /* tp_getattr */
2672 0, /* tp_setattr */
2673 0, /* tp_reserved */
2674 0, /* tp_repr */
2675 0, /* tp_as_number */
2676 0, /* tp_as_sequence */
2677 0, /* tp_as_mapping */
2678 0, /* tp_hash */
2679 0, /* tp_call */
2680 0, /* tp_str */
2681 0, /* tp_getattro */
2682 0, /* tp_setattro */
2683 0, /* tp_as_buffer */
Eli Bendersky48d358b2012-05-30 17:57:50 +03002684 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
2685 /* tp_flags */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002686 0, /* tp_doc */
Eli Bendersky48d358b2012-05-30 17:57:50 +03002687 (traverseproc)treebuilder_gc_traverse, /* tp_traverse */
2688 (inquiry)treebuilder_gc_clear, /* tp_clear */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002689 0, /* tp_richcompare */
2690 0, /* tp_weaklistoffset */
2691 0, /* tp_iter */
2692 0, /* tp_iternext */
2693 treebuilder_methods, /* tp_methods */
2694 0, /* tp_members */
2695 0, /* tp_getset */
2696 0, /* tp_base */
2697 0, /* tp_dict */
2698 0, /* tp_descr_get */
2699 0, /* tp_descr_set */
2700 0, /* tp_dictoffset */
2701 (initproc)treebuilder_init, /* tp_init */
2702 PyType_GenericAlloc, /* tp_alloc */
2703 treebuilder_new, /* tp_new */
2704 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002705};
2706
2707/* ==================================================================== */
2708/* the expat interface */
2709
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002710#include "expat.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002711#include "pyexpat.h"
Eli Bendersky532d03e2013-08-10 08:00:39 -07002712
2713/* The PyExpat_CAPI structure is an immutable dispatch table, so it can be
2714 * cached globally without being in per-module state.
2715 */
Eli Bendersky20d41742012-06-01 09:48:37 +03002716static struct PyExpat_CAPI *expat_capi;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002717#define EXPAT(func) (expat_capi->func)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002718
Eli Bendersky52467b12012-06-01 07:13:08 +03002719static XML_Memory_Handling_Suite ExpatMemoryHandler = {
2720 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
2721
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002722typedef struct {
2723 PyObject_HEAD
2724
2725 XML_Parser parser;
2726
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002727 PyObject *target;
2728 PyObject *entity;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002729
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002730 PyObject *names;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002731
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002732 PyObject *handle_start;
2733 PyObject *handle_data;
2734 PyObject *handle_end;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002735
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002736 PyObject *handle_comment;
2737 PyObject *handle_pi;
2738 PyObject *handle_doctype;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002739
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002740 PyObject *handle_close;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002741
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002742} XMLParserObject;
2743
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002744#define XMLParser_CheckExact(op) (Py_TYPE(op) == &XMLParser_Type)
2745
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002746/* helpers */
2747
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002748LOCAL(PyObject*)
2749makeuniversal(XMLParserObject* self, const char* string)
2750{
2751 /* convert a UTF-8 tag/attribute name from the expat parser
2752 to a universal name string */
2753
Antoine Pitrouc1948842012-10-01 23:40:37 +02002754 Py_ssize_t size = (Py_ssize_t) strlen(string);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002755 PyObject* key;
2756 PyObject* value;
2757
2758 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00002759 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002760 if (!key)
2761 return NULL;
2762
2763 value = PyDict_GetItem(self->names, key);
2764
2765 if (value) {
2766 Py_INCREF(value);
2767 } else {
2768 /* new name. convert to universal name, and decode as
2769 necessary */
2770
2771 PyObject* tag;
2772 char* p;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002773 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002774
2775 /* look for namespace separator */
2776 for (i = 0; i < size; i++)
2777 if (string[i] == '}')
2778 break;
2779 if (i != size) {
2780 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002781 tag = PyBytes_FromStringAndSize(NULL, size+1);
Victor Stinner71c8b7e2013-07-11 23:08:39 +02002782 if (tag == NULL) {
2783 Py_DECREF(key);
2784 return NULL;
2785 }
Christian Heimes72b710a2008-05-26 13:28:38 +00002786 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002787 p[0] = '{';
2788 memcpy(p+1, string, size);
2789 size++;
2790 } else {
2791 /* plain name; use key as tag */
2792 Py_INCREF(key);
2793 tag = key;
2794 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002795
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002796 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002797 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00002798 value = PyUnicode_DecodeUTF8(p, size, "strict");
2799 Py_DECREF(tag);
2800 if (!value) {
2801 Py_DECREF(key);
2802 return NULL;
2803 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002804
2805 /* add to names dictionary */
2806 if (PyDict_SetItem(self->names, key, value) < 0) {
2807 Py_DECREF(key);
2808 Py_DECREF(value);
2809 return NULL;
2810 }
2811 }
2812
2813 Py_DECREF(key);
2814 return value;
2815}
2816
Eli Bendersky5b77d812012-03-16 08:20:05 +02002817/* Set the ParseError exception with the given parameters.
2818 * If message is not NULL, it's used as the error string. Otherwise, the
2819 * message string is the default for the given error_code.
2820*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002821static void
Eli Bendersky5b77d812012-03-16 08:20:05 +02002822expat_set_error(enum XML_Error error_code, int line, int column, char *message)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002823{
Eli Bendersky5b77d812012-03-16 08:20:05 +02002824 PyObject *errmsg, *error, *position, *code;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002825 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002826
Victor Stinner499dfcf2011-03-21 13:26:24 +01002827 errmsg = PyUnicode_FromFormat("%s: line %d, column %d",
Eli Bendersky5b77d812012-03-16 08:20:05 +02002828 message ? message : EXPAT(ErrorString)(error_code),
2829 line, column);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002830 if (errmsg == NULL)
2831 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002832
Eli Bendersky532d03e2013-08-10 08:00:39 -07002833 error = PyObject_CallFunction(st->parseerror_obj, "O", errmsg);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002834 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002835 if (!error)
2836 return;
2837
Eli Bendersky5b77d812012-03-16 08:20:05 +02002838 /* Add code and position attributes */
2839 code = PyLong_FromLong((long)error_code);
2840 if (!code) {
2841 Py_DECREF(error);
2842 return;
2843 }
2844 if (PyObject_SetAttrString(error, "code", code) == -1) {
2845 Py_DECREF(error);
2846 Py_DECREF(code);
2847 return;
2848 }
2849 Py_DECREF(code);
2850
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002851 position = Py_BuildValue("(ii)", line, column);
2852 if (!position) {
2853 Py_DECREF(error);
2854 return;
2855 }
2856 if (PyObject_SetAttrString(error, "position", position) == -1) {
2857 Py_DECREF(error);
2858 Py_DECREF(position);
2859 return;
2860 }
2861 Py_DECREF(position);
2862
Eli Bendersky532d03e2013-08-10 08:00:39 -07002863 PyErr_SetObject(st->parseerror_obj, error);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002864 Py_DECREF(error);
2865}
2866
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002867/* -------------------------------------------------------------------- */
2868/* handlers */
2869
2870static void
2871expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2872 int data_len)
2873{
2874 PyObject* key;
2875 PyObject* value;
2876 PyObject* res;
2877
2878 if (data_len < 2 || data_in[0] != '&')
2879 return;
2880
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002881 if (PyErr_Occurred())
2882 return;
2883
Neal Norwitz0269b912007-08-08 06:56:02 +00002884 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002885 if (!key)
2886 return;
2887
2888 value = PyDict_GetItem(self->entity, key);
2889
2890 if (value) {
2891 if (TreeBuilder_CheckExact(self->target))
2892 res = treebuilder_handle_data(
2893 (TreeBuilderObject*) self->target, value
2894 );
2895 else if (self->handle_data)
2896 res = PyObject_CallFunction(self->handle_data, "O", value);
2897 else
2898 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002899 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002900 } else if (!PyErr_Occurred()) {
2901 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00002902 char message[128] = "undefined entity ";
2903 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002904 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02002905 XML_ERROR_UNDEFINED_ENTITY,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002906 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02002907 EXPAT(GetErrorColumnNumber)(self->parser),
2908 message
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002909 );
2910 }
2911
2912 Py_DECREF(key);
2913}
2914
2915static void
2916expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2917 const XML_Char **attrib_in)
2918{
2919 PyObject* res;
2920 PyObject* tag;
2921 PyObject* attrib;
2922 int ok;
2923
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002924 if (PyErr_Occurred())
2925 return;
2926
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002927 /* tag name */
2928 tag = makeuniversal(self, tag_in);
2929 if (!tag)
2930 return; /* parser will look for errors */
2931
2932 /* attributes */
2933 if (attrib_in[0]) {
2934 attrib = PyDict_New();
2935 if (!attrib)
2936 return;
2937 while (attrib_in[0] && attrib_in[1]) {
2938 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00002939 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002940 if (!key || !value) {
2941 Py_XDECREF(value);
2942 Py_XDECREF(key);
2943 Py_DECREF(attrib);
2944 return;
2945 }
2946 ok = PyDict_SetItem(attrib, key, value);
2947 Py_DECREF(value);
2948 Py_DECREF(key);
2949 if (ok < 0) {
2950 Py_DECREF(attrib);
2951 return;
2952 }
2953 attrib_in += 2;
2954 }
2955 } else {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002956 /* Pass an empty dictionary on */
Eli Bendersky48d358b2012-05-30 17:57:50 +03002957 attrib = PyDict_New();
2958 if (!attrib)
2959 return;
2960 }
2961
2962 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002963 /* shortcut */
2964 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2965 tag, attrib);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002966 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002967 else if (self->handle_start) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002968 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002969 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002970 res = NULL;
2971
2972 Py_DECREF(tag);
2973 Py_DECREF(attrib);
2974
2975 Py_XDECREF(res);
2976}
2977
2978static void
2979expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2980 int data_len)
2981{
2982 PyObject* data;
2983 PyObject* res;
2984
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002985 if (PyErr_Occurred())
2986 return;
2987
Neal Norwitz0269b912007-08-08 06:56:02 +00002988 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002989 if (!data)
2990 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002991
2992 if (TreeBuilder_CheckExact(self->target))
2993 /* shortcut */
2994 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
2995 else if (self->handle_data)
2996 res = PyObject_CallFunction(self->handle_data, "O", data);
2997 else
2998 res = NULL;
2999
3000 Py_DECREF(data);
3001
3002 Py_XDECREF(res);
3003}
3004
3005static void
3006expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
3007{
3008 PyObject* tag;
3009 PyObject* res = NULL;
3010
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003011 if (PyErr_Occurred())
3012 return;
3013
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003014 if (TreeBuilder_CheckExact(self->target))
3015 /* shortcut */
3016 /* the standard tree builder doesn't look at the end tag */
3017 res = treebuilder_handle_end(
3018 (TreeBuilderObject*) self->target, Py_None
3019 );
3020 else if (self->handle_end) {
3021 tag = makeuniversal(self, tag_in);
3022 if (tag) {
3023 res = PyObject_CallFunction(self->handle_end, "O", tag);
3024 Py_DECREF(tag);
3025 }
3026 }
3027
3028 Py_XDECREF(res);
3029}
3030
3031static void
3032expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
3033 const XML_Char *uri)
3034{
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003035 PyObject* sprefix = NULL;
3036 PyObject* suri = NULL;
3037
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003038 if (PyErr_Occurred())
3039 return;
3040
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003041 suri = PyUnicode_DecodeUTF8(uri, strlen(uri), "strict");
3042 if (!suri)
3043 return;
3044
3045 if (prefix)
3046 sprefix = PyUnicode_DecodeUTF8(prefix, strlen(prefix), "strict");
3047 else
3048 sprefix = PyUnicode_FromString("");
3049 if (!sprefix) {
3050 Py_DECREF(suri);
3051 return;
3052 }
3053
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003054 treebuilder_handle_namespace(
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003055 (TreeBuilderObject*) self->target, 1, sprefix, suri
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003056 );
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003057
3058 Py_DECREF(sprefix);
3059 Py_DECREF(suri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003060}
3061
3062static void
3063expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
3064{
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003065 if (PyErr_Occurred())
3066 return;
3067
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003068 treebuilder_handle_namespace(
3069 (TreeBuilderObject*) self->target, 0, NULL, NULL
3070 );
3071}
3072
3073static void
3074expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
3075{
3076 PyObject* comment;
3077 PyObject* res;
3078
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003079 if (PyErr_Occurred())
3080 return;
3081
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003082 if (self->handle_comment) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003083 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003084 if (comment) {
3085 res = PyObject_CallFunction(self->handle_comment, "O", comment);
3086 Py_XDECREF(res);
3087 Py_DECREF(comment);
3088 }
3089 }
3090}
3091
Eli Bendersky45839902013-01-13 05:14:47 -08003092static void
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003093expat_start_doctype_handler(XMLParserObject *self,
3094 const XML_Char *doctype_name,
3095 const XML_Char *sysid,
3096 const XML_Char *pubid,
3097 int has_internal_subset)
3098{
3099 PyObject *self_pyobj = (PyObject *)self;
3100 PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
3101 PyObject *parser_doctype = NULL;
3102 PyObject *res = NULL;
3103
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003104 if (PyErr_Occurred())
3105 return;
3106
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003107 doctype_name_obj = makeuniversal(self, doctype_name);
3108 if (!doctype_name_obj)
3109 return;
3110
3111 if (sysid) {
3112 sysid_obj = makeuniversal(self, sysid);
3113 if (!sysid_obj) {
3114 Py_DECREF(doctype_name_obj);
3115 return;
3116 }
3117 } else {
3118 Py_INCREF(Py_None);
3119 sysid_obj = Py_None;
3120 }
3121
3122 if (pubid) {
3123 pubid_obj = makeuniversal(self, pubid);
3124 if (!pubid_obj) {
3125 Py_DECREF(doctype_name_obj);
3126 Py_DECREF(sysid_obj);
3127 return;
3128 }
3129 } else {
3130 Py_INCREF(Py_None);
3131 pubid_obj = Py_None;
3132 }
3133
3134 /* If the target has a handler for doctype, call it. */
3135 if (self->handle_doctype) {
3136 res = PyObject_CallFunction(self->handle_doctype, "OOO",
3137 doctype_name_obj, pubid_obj, sysid_obj);
3138 Py_CLEAR(res);
3139 }
3140
3141 /* Now see if the parser itself has a doctype method. If yes and it's
3142 * a subclass, call it but warn about deprecation. If it's not a subclass
3143 * (i.e. vanilla XMLParser), do nothing.
3144 */
3145 parser_doctype = PyObject_GetAttrString(self_pyobj, "doctype");
3146 if (parser_doctype) {
3147 if (!XMLParser_CheckExact(self_pyobj)) {
3148 if (PyErr_WarnEx(PyExc_DeprecationWarning,
3149 "This method of XMLParser is deprecated. Define"
3150 " doctype() method on the TreeBuilder target.",
3151 1) < 0) {
3152 goto clear;
3153 }
3154 res = PyObject_CallFunction(parser_doctype, "OOO",
3155 doctype_name_obj, pubid_obj, sysid_obj);
3156 Py_CLEAR(res);
3157 }
3158 }
3159
3160clear:
3161 Py_XDECREF(parser_doctype);
3162 Py_DECREF(doctype_name_obj);
3163 Py_DECREF(pubid_obj);
3164 Py_DECREF(sysid_obj);
3165}
3166
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003167static void
3168expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
3169 const XML_Char* data_in)
3170{
3171 PyObject* target;
3172 PyObject* data;
3173 PyObject* res;
3174
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003175 if (PyErr_Occurred())
3176 return;
3177
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003178 if (self->handle_pi) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003179 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3180 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003181 if (target && data) {
3182 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
3183 Py_XDECREF(res);
3184 Py_DECREF(data);
3185 Py_DECREF(target);
3186 } else {
3187 Py_XDECREF(data);
3188 Py_XDECREF(target);
3189 }
3190 }
3191}
3192
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003193/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003194
Eli Bendersky52467b12012-06-01 07:13:08 +03003195static PyObject *
3196xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003197{
Eli Bendersky52467b12012-06-01 07:13:08 +03003198 XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
3199 if (self) {
3200 self->parser = NULL;
3201 self->target = self->entity = self->names = NULL;
3202 self->handle_start = self->handle_data = self->handle_end = NULL;
3203 self->handle_comment = self->handle_pi = self->handle_close = NULL;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003204 self->handle_doctype = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003205 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003206 return (PyObject *)self;
3207}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003208
Eli Bendersky52467b12012-06-01 07:13:08 +03003209static int
3210xmlparser_init(PyObject *self, PyObject *args, PyObject *kwds)
3211{
3212 XMLParserObject *self_xp = (XMLParserObject *)self;
3213 PyObject *target = NULL, *html = NULL;
3214 char *encoding = NULL;
Eli Benderskyc68e1362012-06-03 06:09:42 +03003215 static char *kwlist[] = {"html", "target", "encoding", 0};
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003216
Eli Bendersky52467b12012-06-01 07:13:08 +03003217 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|OOz:XMLParser", kwlist,
3218 &html, &target, &encoding)) {
3219 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003220 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003221
Eli Bendersky52467b12012-06-01 07:13:08 +03003222 self_xp->entity = PyDict_New();
3223 if (!self_xp->entity)
3224 return -1;
3225
3226 self_xp->names = PyDict_New();
3227 if (!self_xp->names) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02003228 Py_CLEAR(self_xp->entity);
Eli Bendersky52467b12012-06-01 07:13:08 +03003229 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003230 }
3231
Eli Bendersky52467b12012-06-01 07:13:08 +03003232 self_xp->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
3233 if (!self_xp->parser) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02003234 Py_CLEAR(self_xp->entity);
3235 Py_CLEAR(self_xp->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003236 PyErr_NoMemory();
Eli Bendersky52467b12012-06-01 07:13:08 +03003237 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003238 }
3239
Eli Bendersky52467b12012-06-01 07:13:08 +03003240 if (target) {
3241 Py_INCREF(target);
3242 } else {
Eli Bendersky58d548d2012-05-29 15:45:16 +03003243 target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003244 if (!target) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02003245 Py_CLEAR(self_xp->entity);
3246 Py_CLEAR(self_xp->names);
Eli Bendersky52467b12012-06-01 07:13:08 +03003247 EXPAT(ParserFree)(self_xp->parser);
3248 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003249 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003250 }
3251 self_xp->target = target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003252
Eli Bendersky52467b12012-06-01 07:13:08 +03003253 self_xp->handle_start = PyObject_GetAttrString(target, "start");
3254 self_xp->handle_data = PyObject_GetAttrString(target, "data");
3255 self_xp->handle_end = PyObject_GetAttrString(target, "end");
3256 self_xp->handle_comment = PyObject_GetAttrString(target, "comment");
3257 self_xp->handle_pi = PyObject_GetAttrString(target, "pi");
3258 self_xp->handle_close = PyObject_GetAttrString(target, "close");
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003259 self_xp->handle_doctype = PyObject_GetAttrString(target, "doctype");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003260
3261 PyErr_Clear();
Eli Bendersky45839902013-01-13 05:14:47 -08003262
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003263 /* configure parser */
Eli Bendersky52467b12012-06-01 07:13:08 +03003264 EXPAT(SetUserData)(self_xp->parser, self_xp);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003265 EXPAT(SetElementHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003266 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003267 (XML_StartElementHandler) expat_start_handler,
3268 (XML_EndElementHandler) expat_end_handler
3269 );
3270 EXPAT(SetDefaultHandlerExpand)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003271 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003272 (XML_DefaultHandler) expat_default_handler
3273 );
3274 EXPAT(SetCharacterDataHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003275 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003276 (XML_CharacterDataHandler) expat_data_handler
3277 );
Eli Bendersky52467b12012-06-01 07:13:08 +03003278 if (self_xp->handle_comment)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003279 EXPAT(SetCommentHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003280 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003281 (XML_CommentHandler) expat_comment_handler
3282 );
Eli Bendersky52467b12012-06-01 07:13:08 +03003283 if (self_xp->handle_pi)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003284 EXPAT(SetProcessingInstructionHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003285 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003286 (XML_ProcessingInstructionHandler) expat_pi_handler
3287 );
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003288 EXPAT(SetStartDoctypeDeclHandler)(
3289 self_xp->parser,
3290 (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
3291 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003292 EXPAT(SetUnknownEncodingHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003293 self_xp->parser,
Eli Bendersky6dc32b32013-05-25 05:25:48 -07003294 EXPAT(DefaultUnknownEncodingHandler), NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003295 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003296
Eli Bendersky52467b12012-06-01 07:13:08 +03003297 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003298}
3299
Eli Bendersky52467b12012-06-01 07:13:08 +03003300static int
3301xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
3302{
3303 Py_VISIT(self->handle_close);
3304 Py_VISIT(self->handle_pi);
3305 Py_VISIT(self->handle_comment);
3306 Py_VISIT(self->handle_end);
3307 Py_VISIT(self->handle_data);
3308 Py_VISIT(self->handle_start);
3309
3310 Py_VISIT(self->target);
3311 Py_VISIT(self->entity);
3312 Py_VISIT(self->names);
3313
3314 return 0;
3315}
3316
3317static int
3318xmlparser_gc_clear(XMLParserObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003319{
3320 EXPAT(ParserFree)(self->parser);
3321
Antoine Pitrouc1948842012-10-01 23:40:37 +02003322 Py_CLEAR(self->handle_close);
3323 Py_CLEAR(self->handle_pi);
3324 Py_CLEAR(self->handle_comment);
3325 Py_CLEAR(self->handle_end);
3326 Py_CLEAR(self->handle_data);
3327 Py_CLEAR(self->handle_start);
3328 Py_CLEAR(self->handle_doctype);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003329
Antoine Pitrouc1948842012-10-01 23:40:37 +02003330 Py_CLEAR(self->target);
3331 Py_CLEAR(self->entity);
3332 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003333
Eli Bendersky52467b12012-06-01 07:13:08 +03003334 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003335}
3336
Eli Bendersky52467b12012-06-01 07:13:08 +03003337static void
3338xmlparser_dealloc(XMLParserObject* self)
3339{
3340 PyObject_GC_UnTrack(self);
3341 xmlparser_gc_clear(self);
3342 Py_TYPE(self)->tp_free((PyObject *)self);
3343}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003344
3345LOCAL(PyObject*)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003346expat_parse(XMLParserObject* self, const char* data, int data_len, int final)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003347{
3348 int ok;
3349
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003350 assert(!PyErr_Occurred());
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003351 ok = EXPAT(Parse)(self->parser, data, data_len, final);
3352
3353 if (PyErr_Occurred())
3354 return NULL;
3355
3356 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003357 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02003358 EXPAT(GetErrorCode)(self->parser),
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003359 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02003360 EXPAT(GetErrorColumnNumber)(self->parser),
3361 NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003362 );
3363 return NULL;
3364 }
3365
3366 Py_RETURN_NONE;
3367}
3368
3369static PyObject*
3370xmlparser_close(XMLParserObject* self, PyObject* args)
3371{
3372 /* end feeding data to parser */
3373
3374 PyObject* res;
3375 if (!PyArg_ParseTuple(args, ":close"))
3376 return NULL;
3377
3378 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003379 if (!res)
3380 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003381
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003382 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003383 Py_DECREF(res);
3384 return treebuilder_done((TreeBuilderObject*) self->target);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003385 }
3386 else if (self->handle_close) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003387 Py_DECREF(res);
3388 return PyObject_CallFunction(self->handle_close, "");
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003389 }
3390 else {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003391 return res;
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003392 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003393}
3394
3395static PyObject*
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003396xmlparser_feed(XMLParserObject* self, PyObject* arg)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003397{
3398 /* feed data to parser */
3399
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003400 if (PyUnicode_Check(arg)) {
3401 Py_ssize_t data_len;
3402 const char *data = PyUnicode_AsUTF8AndSize(arg, &data_len);
3403 if (data == NULL)
3404 return NULL;
3405 if (data_len > INT_MAX) {
3406 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3407 return NULL;
3408 }
3409 /* Explicitly set UTF-8 encoding. Return code ignored. */
3410 (void)EXPAT(SetEncoding)(self->parser, "utf-8");
3411 return expat_parse(self, data, (int)data_len, 0);
3412 }
3413 else {
3414 Py_buffer view;
3415 PyObject *res;
3416 if (PyObject_GetBuffer(arg, &view, PyBUF_SIMPLE) < 0)
3417 return NULL;
3418 if (view.len > INT_MAX) {
3419 PyBuffer_Release(&view);
3420 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3421 return NULL;
3422 }
3423 res = expat_parse(self, view.buf, (int)view.len, 0);
3424 PyBuffer_Release(&view);
3425 return res;
3426 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003427}
3428
3429static PyObject*
Eli Benderskya3699232013-05-19 18:47:23 -07003430xmlparser_parse_whole(XMLParserObject* self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003431{
Eli Benderskya3699232013-05-19 18:47:23 -07003432 /* (internal) parse the whole input, until end of stream */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003433 PyObject* reader;
3434 PyObject* buffer;
Eli Benderskyf996e772012-03-16 05:53:30 +02003435 PyObject* temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003436 PyObject* res;
3437
3438 PyObject* fileobj;
3439 if (!PyArg_ParseTuple(args, "O:_parse", &fileobj))
3440 return NULL;
3441
3442 reader = PyObject_GetAttrString(fileobj, "read");
3443 if (!reader)
3444 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003445
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003446 /* read from open file object */
3447 for (;;) {
3448
3449 buffer = PyObject_CallFunction(reader, "i", 64*1024);
3450
3451 if (!buffer) {
3452 /* read failed (e.g. due to KeyboardInterrupt) */
3453 Py_DECREF(reader);
3454 return NULL;
3455 }
3456
Eli Benderskyf996e772012-03-16 05:53:30 +02003457 if (PyUnicode_CheckExact(buffer)) {
3458 /* A unicode object is encoded into bytes using UTF-8 */
Victor Stinner59799a82013-11-13 14:17:30 +01003459 if (PyUnicode_GET_LENGTH(buffer) == 0) {
Eli Benderskyf996e772012-03-16 05:53:30 +02003460 Py_DECREF(buffer);
3461 break;
3462 }
3463 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
Antoine Pitrouc1948842012-10-01 23:40:37 +02003464 Py_DECREF(buffer);
Eli Benderskyf996e772012-03-16 05:53:30 +02003465 if (!temp) {
3466 /* Propagate exception from PyUnicode_AsEncodedString */
Eli Benderskyf996e772012-03-16 05:53:30 +02003467 Py_DECREF(reader);
3468 return NULL;
3469 }
Eli Benderskyf996e772012-03-16 05:53:30 +02003470 buffer = temp;
3471 }
3472 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003473 Py_DECREF(buffer);
3474 break;
3475 }
3476
3477 res = expat_parse(
Christian Heimes72b710a2008-05-26 13:28:38 +00003478 self, PyBytes_AS_STRING(buffer), PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003479 );
3480
3481 Py_DECREF(buffer);
3482
3483 if (!res) {
3484 Py_DECREF(reader);
3485 return NULL;
3486 }
3487 Py_DECREF(res);
3488
3489 }
3490
3491 Py_DECREF(reader);
3492
3493 res = expat_parse(self, "", 0, 1);
3494
3495 if (res && TreeBuilder_CheckExact(self->target)) {
3496 Py_DECREF(res);
3497 return treebuilder_done((TreeBuilderObject*) self->target);
3498 }
3499
3500 return res;
3501}
3502
3503static PyObject*
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003504xmlparser_doctype(XMLParserObject *self, PyObject *args)
3505{
3506 Py_RETURN_NONE;
3507}
3508
3509static PyObject*
3510xmlparser_setevents(XMLParserObject *self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003511{
3512 /* activate element event reporting */
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003513 Py_ssize_t i, seqlen;
3514 TreeBuilderObject *target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003515
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003516 PyObject *events_queue;
3517 PyObject *events_to_report = Py_None;
3518 PyObject *events_seq;
3519 if (!PyArg_ParseTuple(args, "O!|O:_setevents", &PyList_Type, &events_queue,
3520 &events_to_report))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003521 return NULL;
3522
3523 if (!TreeBuilder_CheckExact(self->target)) {
3524 PyErr_SetString(
3525 PyExc_TypeError,
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003526 "event handling only supported for ElementTree.TreeBuilder "
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003527 "targets"
3528 );
3529 return NULL;
3530 }
3531
3532 target = (TreeBuilderObject*) self->target;
3533
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003534 Py_INCREF(events_queue);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003535 Py_XDECREF(target->events);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003536 target->events = events_queue;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003537
3538 /* clear out existing events */
Antoine Pitrouc1948842012-10-01 23:40:37 +02003539 Py_CLEAR(target->start_event_obj);
3540 Py_CLEAR(target->end_event_obj);
3541 Py_CLEAR(target->start_ns_event_obj);
3542 Py_CLEAR(target->end_ns_event_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003543
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003544 if (events_to_report == Py_None) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003545 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003546 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003547 Py_RETURN_NONE;
3548 }
3549
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003550 if (!(events_seq = PySequence_Fast(events_to_report,
3551 "events must be a sequence"))) {
3552 return NULL;
3553 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003554
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003555 seqlen = PySequence_Size(events_seq);
3556 for (i = 0; i < seqlen; ++i) {
3557 PyObject *event_name_obj = PySequence_Fast_GET_ITEM(events_seq, i);
3558 char *event_name = NULL;
3559 if (PyUnicode_Check(event_name_obj)) {
3560 event_name = _PyUnicode_AsString(event_name_obj);
3561 } else if (PyBytes_Check(event_name_obj)) {
3562 event_name = PyBytes_AS_STRING(event_name_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003563 }
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003564
3565 if (event_name == NULL) {
3566 Py_DECREF(events_seq);
3567 PyErr_Format(PyExc_ValueError, "invalid events sequence");
3568 return NULL;
3569 } else if (strcmp(event_name, "start") == 0) {
3570 Py_INCREF(event_name_obj);
3571 target->start_event_obj = event_name_obj;
3572 } else if (strcmp(event_name, "end") == 0) {
3573 Py_INCREF(event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003574 Py_XDECREF(target->end_event_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003575 target->end_event_obj = event_name_obj;
3576 } else if (strcmp(event_name, "start-ns") == 0) {
3577 Py_INCREF(event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003578 Py_XDECREF(target->start_ns_event_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003579 target->start_ns_event_obj = event_name_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003580 EXPAT(SetNamespaceDeclHandler)(
3581 self->parser,
3582 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3583 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3584 );
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003585 } else if (strcmp(event_name, "end-ns") == 0) {
3586 Py_INCREF(event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003587 Py_XDECREF(target->end_ns_event_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003588 target->end_ns_event_obj = event_name_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003589 EXPAT(SetNamespaceDeclHandler)(
3590 self->parser,
3591 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3592 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3593 );
3594 } else {
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003595 Py_DECREF(events_seq);
3596 PyErr_Format(PyExc_ValueError, "unknown event '%s'", event_name);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003597 return NULL;
3598 }
3599 }
3600
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003601 Py_DECREF(events_seq);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003602 Py_RETURN_NONE;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003603}
3604
3605static PyMethodDef xmlparser_methods[] = {
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003606 {"feed", (PyCFunction) xmlparser_feed, METH_O},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003607 {"close", (PyCFunction) xmlparser_close, METH_VARARGS},
Eli Benderskya3699232013-05-19 18:47:23 -07003608 {"_parse_whole", (PyCFunction) xmlparser_parse_whole, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003609 {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003610 {"doctype", (PyCFunction) xmlparser_doctype, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003611 {NULL, NULL}
3612};
3613
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003614static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003615xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003616{
Alexander Belopolskye239d232010-12-08 23:31:48 +00003617 if (PyUnicode_Check(nameobj)) {
3618 PyObject* res;
3619 if (PyUnicode_CompareWithASCIIString(nameobj, "entity") == 0)
3620 res = self->entity;
3621 else if (PyUnicode_CompareWithASCIIString(nameobj, "target") == 0)
3622 res = self->target;
3623 else if (PyUnicode_CompareWithASCIIString(nameobj, "version") == 0) {
3624 return PyUnicode_FromFormat(
3625 "Expat %d.%d.%d", XML_MAJOR_VERSION,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003626 XML_MINOR_VERSION, XML_MICRO_VERSION);
Alexander Belopolskye239d232010-12-08 23:31:48 +00003627 }
3628 else
3629 goto generic;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003630
Alexander Belopolskye239d232010-12-08 23:31:48 +00003631 Py_INCREF(res);
3632 return res;
3633 }
3634 generic:
3635 return PyObject_GenericGetAttr((PyObject*) self, nameobj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003636}
3637
Neal Norwitz227b5332006-03-22 09:28:35 +00003638static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003639 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08003640 "xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003641 /* methods */
Eli Bendersky52467b12012-06-01 07:13:08 +03003642 (destructor)xmlparser_dealloc, /* tp_dealloc */
3643 0, /* tp_print */
3644 0, /* tp_getattr */
3645 0, /* tp_setattr */
3646 0, /* tp_reserved */
3647 0, /* tp_repr */
3648 0, /* tp_as_number */
3649 0, /* tp_as_sequence */
3650 0, /* tp_as_mapping */
3651 0, /* tp_hash */
3652 0, /* tp_call */
3653 0, /* tp_str */
3654 (getattrofunc)xmlparser_getattro, /* tp_getattro */
3655 0, /* tp_setattro */
3656 0, /* tp_as_buffer */
3657 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3658 /* tp_flags */
3659 0, /* tp_doc */
3660 (traverseproc)xmlparser_gc_traverse, /* tp_traverse */
3661 (inquiry)xmlparser_gc_clear, /* tp_clear */
3662 0, /* tp_richcompare */
3663 0, /* tp_weaklistoffset */
3664 0, /* tp_iter */
3665 0, /* tp_iternext */
3666 xmlparser_methods, /* tp_methods */
3667 0, /* tp_members */
3668 0, /* tp_getset */
3669 0, /* tp_base */
3670 0, /* tp_dict */
3671 0, /* tp_descr_get */
3672 0, /* tp_descr_set */
3673 0, /* tp_dictoffset */
3674 (initproc)xmlparser_init, /* tp_init */
3675 PyType_GenericAlloc, /* tp_alloc */
3676 xmlparser_new, /* tp_new */
3677 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003678};
3679
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003680/* ==================================================================== */
3681/* python module interface */
3682
3683static PyMethodDef _functions[] = {
Eli Benderskya8736902013-01-05 06:26:39 -08003684 {"SubElement", (PyCFunction) subelement, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003685 {NULL, NULL}
3686};
3687
Martin v. Löwis1a214512008-06-11 05:26:20 +00003688
Eli Bendersky532d03e2013-08-10 08:00:39 -07003689static struct PyModuleDef elementtreemodule = {
3690 PyModuleDef_HEAD_INIT,
3691 "_elementtree",
3692 NULL,
3693 sizeof(elementtreestate),
3694 _functions,
3695 NULL,
3696 elementtree_traverse,
3697 elementtree_clear,
3698 elementtree_free
Martin v. Löwis1a214512008-06-11 05:26:20 +00003699};
3700
Neal Norwitzf6657e62006-12-28 04:47:50 +00003701PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00003702PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003703{
Eli Bendersky64d11e62012-06-15 07:42:50 +03003704 PyObject *m, *temp;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003705 elementtreestate *st;
3706
3707 m = PyState_FindModule(&elementtreemodule);
3708 if (m) {
3709 Py_INCREF(m);
3710 return m;
3711 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003712
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003713 /* Initialize object types */
Ronald Oussoren138d0802013-07-19 11:11:25 +02003714 if (PyType_Ready(&ElementIter_Type) < 0)
3715 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003716 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003717 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003718 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003719 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003720 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003721 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003722
Eli Bendersky532d03e2013-08-10 08:00:39 -07003723 m = PyModule_Create(&elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003724 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00003725 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003726 st = ET_STATE(m);
Martin v. Löwis1a214512008-06-11 05:26:20 +00003727
Eli Bendersky828efde2012-04-05 05:40:58 +03003728 if (!(temp = PyImport_ImportModule("copy")))
3729 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003730 st->deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
Eli Bendersky828efde2012-04-05 05:40:58 +03003731 Py_XDECREF(temp);
3732
Eli Bendersky532d03e2013-08-10 08:00:39 -07003733 if (!(st->elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
Eli Bendersky828efde2012-04-05 05:40:58 +03003734 return NULL;
3735
Eli Bendersky20d41742012-06-01 09:48:37 +03003736 /* link against pyexpat */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003737 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
3738 if (expat_capi) {
3739 /* check that it's usable */
3740 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
3741 expat_capi->size < sizeof(struct PyExpat_CAPI) ||
3742 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
3743 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
Eli Bendersky52467b12012-06-01 07:13:08 +03003744 expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
Eli Benderskyef391ac2012-07-21 20:28:46 +03003745 PyErr_SetString(PyExc_ImportError,
3746 "pyexpat version is incompatible");
3747 return NULL;
Eli Bendersky52467b12012-06-01 07:13:08 +03003748 }
Eli Benderskyef391ac2012-07-21 20:28:46 +03003749 } else {
Eli Bendersky52467b12012-06-01 07:13:08 +03003750 return NULL;
Eli Benderskyef391ac2012-07-21 20:28:46 +03003751 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003752
Eli Bendersky532d03e2013-08-10 08:00:39 -07003753 st->parseerror_obj = PyErr_NewException(
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003754 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003755 );
Eli Bendersky532d03e2013-08-10 08:00:39 -07003756 Py_INCREF(st->parseerror_obj);
3757 PyModule_AddObject(m, "ParseError", st->parseerror_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003758
Eli Bendersky092af1f2012-03-04 07:14:03 +02003759 Py_INCREF((PyObject *)&Element_Type);
3760 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
3761
Eli Bendersky58d548d2012-05-29 15:45:16 +03003762 Py_INCREF((PyObject *)&TreeBuilder_Type);
3763 PyModule_AddObject(m, "TreeBuilder", (PyObject *)&TreeBuilder_Type);
3764
Eli Bendersky52467b12012-06-01 07:13:08 +03003765 Py_INCREF((PyObject *)&XMLParser_Type);
3766 PyModule_AddObject(m, "XMLParser", (PyObject *)&XMLParser_Type);
Eli Bendersky52467b12012-06-01 07:13:08 +03003767
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003768 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003769}