blob: cb7069d69f0fa853911a54a45dc15a9f2d4bc3b7 [file] [log] [blame]
Eli Benderskybf05df22013-04-20 05:44:01 -07001/*--------------------------------------------------------------------
2 * Licensed to PSF under a Contributor Agreement.
3 * See http://www.python.org/psf/license for licensing details.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
Eli Benderskybf05df22013-04-20 05:44:01 -07005 * _elementtree - C accelerator for xml.etree.ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00006 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
7 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00008 *
9 * info@pythonware.com
10 * http://www.pythonware.com
Eli Benderskybf05df22013-04-20 05:44:01 -070011 *--------------------------------------------------------------------
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000012 */
13
14#include "Python.h"
Eli Benderskyebf37a22012-04-03 22:02:37 +030015#include "structmember.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000016
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000017/* -------------------------------------------------------------------- */
18/* configuration */
19
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000020/* An element can hold this many children without extra memory
21 allocations. */
22#define STATIC_CHILDREN 4
23
24/* For best performance, chose a value so that 80-90% of all nodes
25 have no more than the given number of children. Set this to zero
26 to minimize the size of the element structure itself (this only
27 helps if you have lots of leaf nodes with attributes). */
28
29/* Also note that pymalloc always allocates blocks in multiples of
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010030 eight bytes. For the current C version of ElementTree, this means
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000031 that the number of children should be an even number, at least on
32 32-bit platforms. */
33
34/* -------------------------------------------------------------------- */
35
36#if 0
37static int memory = 0;
38#define ALLOC(size, comment)\
39do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
40#define RELEASE(size, comment)\
41do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
42#else
43#define ALLOC(size, comment)
44#define RELEASE(size, comment)
45#endif
46
47/* compiler tweaks */
48#if defined(_MSC_VER)
49#define LOCAL(type) static __inline type __fastcall
50#else
51#define LOCAL(type) static type
52#endif
53
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000054/* macros used to store 'join' flags in string object pointers. note
55 that all use of text and tail as object pointers must be wrapped in
56 JOIN_OBJ. see comments in the ElementObject definition for more
57 info. */
58#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
59#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
Antoine Pitrouca8aa4a2012-09-20 20:56:47 +020060#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~(Py_uintptr_t)1))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000061
Eli Benderskydd3661e2013-09-13 06:24:25 -070062/* Py_CLEAR for a PyObject* that uses a join flag. Pass the pointer by
63 * reference since this function sets it to NULL.
64*/
doko@ubuntu.com0648bf72013-09-18 12:12:28 +020065static void _clear_joined_ptr(PyObject **p)
Eli Benderskydd3661e2013-09-13 06:24:25 -070066{
67 if (*p) {
68 PyObject *tmp = JOIN_OBJ(*p);
69 *p = NULL;
70 Py_DECREF(tmp);
71 }
72}
73
Ronald Oussoren138d0802013-07-19 11:11:25 +020074/* Types defined by this extension */
75static PyTypeObject Element_Type;
76static PyTypeObject ElementIter_Type;
77static PyTypeObject TreeBuilder_Type;
78static PyTypeObject XMLParser_Type;
79
80
Eli Bendersky532d03e2013-08-10 08:00:39 -070081/* Per-module state; PEP 3121 */
82typedef struct {
83 PyObject *parseerror_obj;
84 PyObject *deepcopy_obj;
85 PyObject *elementpath_obj;
86} elementtreestate;
87
88static struct PyModuleDef elementtreemodule;
89
90/* Given a module object (assumed to be _elementtree), get its per-module
91 * state.
92 */
93#define ET_STATE(mod) ((elementtreestate *) PyModule_GetState(mod))
94
95/* Find the module instance imported in the currently running sub-interpreter
96 * and get its state.
97 */
98#define ET_STATE_GLOBAL \
99 ((elementtreestate *) PyModule_GetState(PyState_FindModule(&elementtreemodule)))
100
101static int
102elementtree_clear(PyObject *m)
103{
104 elementtreestate *st = ET_STATE(m);
105 Py_CLEAR(st->parseerror_obj);
106 Py_CLEAR(st->deepcopy_obj);
107 Py_CLEAR(st->elementpath_obj);
108 return 0;
109}
110
111static int
112elementtree_traverse(PyObject *m, visitproc visit, void *arg)
113{
114 elementtreestate *st = ET_STATE(m);
115 Py_VISIT(st->parseerror_obj);
116 Py_VISIT(st->deepcopy_obj);
117 Py_VISIT(st->elementpath_obj);
118 return 0;
119}
120
121static void
122elementtree_free(void *m)
123{
124 elementtree_clear((PyObject *)m);
125}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000126
127/* helpers */
128
129LOCAL(PyObject*)
130deepcopy(PyObject* object, PyObject* memo)
131{
132 /* do a deep copy of the given object */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000133 PyObject* args;
134 PyObject* result;
Eli Bendersky532d03e2013-08-10 08:00:39 -0700135 elementtreestate *st = ET_STATE_GLOBAL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000136
Eli Bendersky532d03e2013-08-10 08:00:39 -0700137 if (!st->deepcopy_obj) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000138 PyErr_SetString(
139 PyExc_RuntimeError,
140 "deepcopy helper not found"
141 );
142 return NULL;
143 }
144
Antoine Pitrouc1948842012-10-01 23:40:37 +0200145 args = PyTuple_Pack(2, object, memo);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000146 if (!args)
147 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -0700148 result = PyObject_CallObject(st->deepcopy_obj, args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000149 Py_DECREF(args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000150 return result;
151}
152
153LOCAL(PyObject*)
154list_join(PyObject* list)
155{
156 /* join list elements (destroying the list in the process) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000157 PyObject* joiner;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000158 PyObject* result;
159
Antoine Pitrouc1948842012-10-01 23:40:37 +0200160 joiner = PyUnicode_FromStringAndSize("", 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000161 if (!joiner)
162 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200163 result = PyUnicode_Join(joiner, list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000164 Py_DECREF(joiner);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200165 if (result)
166 Py_DECREF(list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000167 return result;
168}
169
Eli Bendersky48d358b2012-05-30 17:57:50 +0300170/* Is the given object an empty dictionary?
171*/
172static int
173is_empty_dict(PyObject *obj)
174{
175 return PyDict_CheckExact(obj) && PyDict_Size(obj) == 0;
176}
177
178
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000179/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200180/* the Element type */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000181
182typedef struct {
183
184 /* attributes (a dictionary object), or None if no attributes */
185 PyObject* attrib;
186
187 /* child elements */
188 int length; /* actual number of items */
189 int allocated; /* allocated items */
190
191 /* this either points to _children or to a malloced buffer */
192 PyObject* *children;
193
194 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100195
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000196} ElementObjectExtra;
197
198typedef struct {
199 PyObject_HEAD
200
201 /* element tag (a string). */
202 PyObject* tag;
203
204 /* text before first child. note that this is a tagged pointer;
205 use JOIN_OBJ to get the object pointer. the join flag is used
206 to distinguish lists created by the tree builder from lists
207 assigned to the attribute by application code; the former
208 should be joined before being returned to the user, the latter
209 should be left intact. */
210 PyObject* text;
211
212 /* text after this element, in parent. note that this is a tagged
213 pointer; use JOIN_OBJ to get the object pointer. */
214 PyObject* tail;
215
216 ElementObjectExtra* extra;
217
Eli Benderskyebf37a22012-04-03 22:02:37 +0300218 PyObject *weakreflist; /* For tp_weaklistoffset */
219
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000220} ElementObject;
221
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000222
Christian Heimes90aa7642007-12-19 02:45:37 +0000223#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000224
225/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200226/* Element constructors and destructor */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000227
228LOCAL(int)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200229create_extra(ElementObject* self, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000230{
231 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
Victor Stinner81aac732013-07-12 02:03:34 +0200232 if (!self->extra) {
233 PyErr_NoMemory();
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000234 return -1;
Victor Stinner81aac732013-07-12 02:03:34 +0200235 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000236
237 if (!attrib)
238 attrib = Py_None;
239
240 Py_INCREF(attrib);
241 self->extra->attrib = attrib;
242
243 self->extra->length = 0;
244 self->extra->allocated = STATIC_CHILDREN;
245 self->extra->children = self->extra->_children;
246
247 return 0;
248}
249
250LOCAL(void)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200251dealloc_extra(ElementObject* self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000252{
Eli Bendersky08b85292012-04-04 15:55:07 +0300253 ElementObjectExtra *myextra;
254 int i;
255
Eli Benderskyebf37a22012-04-03 22:02:37 +0300256 if (!self->extra)
257 return;
258
259 /* Avoid DECREFs calling into this code again (cycles, etc.)
260 */
Eli Bendersky08b85292012-04-04 15:55:07 +0300261 myextra = self->extra;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300262 self->extra = NULL;
263
264 Py_DECREF(myextra->attrib);
265
Eli Benderskyebf37a22012-04-03 22:02:37 +0300266 for (i = 0; i < myextra->length; i++)
267 Py_DECREF(myextra->children[i]);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000268
Eli Benderskyebf37a22012-04-03 22:02:37 +0300269 if (myextra->children != myextra->_children)
270 PyObject_Free(myextra->children);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000271
Eli Benderskyebf37a22012-04-03 22:02:37 +0300272 PyObject_Free(myextra);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000273}
274
Eli Bendersky092af1f2012-03-04 07:14:03 +0200275/* Convenience internal function to create new Element objects with the given
276 * tag and attributes.
277*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000278LOCAL(PyObject*)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200279create_new_element(PyObject* tag, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000280{
281 ElementObject* self;
282
Eli Bendersky0192ba32012-03-30 16:38:33 +0300283 self = PyObject_GC_New(ElementObject, &Element_Type);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000284 if (self == NULL)
285 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000286 self->extra = NULL;
287
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000288 Py_INCREF(tag);
289 self->tag = tag;
290
291 Py_INCREF(Py_None);
292 self->text = Py_None;
293
294 Py_INCREF(Py_None);
295 self->tail = Py_None;
296
Eli Benderskyebf37a22012-04-03 22:02:37 +0300297 self->weakreflist = NULL;
298
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200299 ALLOC(sizeof(ElementObject), "create element");
300 PyObject_GC_Track(self);
301
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200302 if (attrib != Py_None && !is_empty_dict(attrib)) {
303 if (create_extra(self, attrib) < 0) {
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200304 Py_DECREF(self);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200305 return NULL;
306 }
307 }
308
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000309 return (PyObject*) self;
310}
311
Eli Bendersky092af1f2012-03-04 07:14:03 +0200312static PyObject *
313element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
314{
315 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
316 if (e != NULL) {
317 Py_INCREF(Py_None);
318 e->tag = Py_None;
319
320 Py_INCREF(Py_None);
321 e->text = Py_None;
322
323 Py_INCREF(Py_None);
324 e->tail = Py_None;
325
326 e->extra = NULL;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300327 e->weakreflist = NULL;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200328 }
329 return (PyObject *)e;
330}
331
Eli Bendersky737b1732012-05-29 06:02:56 +0300332/* Helper function for extracting the attrib dictionary from a keywords dict.
333 * This is required by some constructors/functions in this module that can
Eli Bendersky45839902013-01-13 05:14:47 -0800334 * either accept attrib as a keyword argument or all attributes splashed
Eli Bendersky737b1732012-05-29 06:02:56 +0300335 * directly into *kwds.
Eli Benderskyd4cb4b72013-04-22 05:25:25 -0700336 *
337 * Return a dictionary with the content of kwds merged into the content of
338 * attrib. If there is no attrib keyword, return a copy of kwds.
Eli Bendersky737b1732012-05-29 06:02:56 +0300339 */
340static PyObject*
341get_attrib_from_keywords(PyObject *kwds)
342{
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700343 PyObject *attrib_str = PyUnicode_FromString("attrib");
344 PyObject *attrib = PyDict_GetItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300345
346 if (attrib) {
347 /* If attrib was found in kwds, copy its value and remove it from
348 * kwds
349 */
350 if (!PyDict_Check(attrib)) {
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700351 Py_DECREF(attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300352 PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
353 Py_TYPE(attrib)->tp_name);
354 return NULL;
355 }
356 attrib = PyDict_Copy(attrib);
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700357 PyDict_DelItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300358 } else {
359 attrib = PyDict_New();
360 }
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700361
362 Py_DECREF(attrib_str);
363
364 /* attrib can be NULL if PyDict_New failed */
365 if (attrib)
Christian Heimes7ed42942013-07-20 15:12:09 +0200366 if (PyDict_Update(attrib, kwds) < 0)
367 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300368 return attrib;
369}
370
Eli Bendersky092af1f2012-03-04 07:14:03 +0200371static int
372element_init(PyObject *self, PyObject *args, PyObject *kwds)
373{
374 PyObject *tag;
375 PyObject *tmp;
376 PyObject *attrib = NULL;
377 ElementObject *self_elem;
378
379 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
380 return -1;
381
Eli Bendersky737b1732012-05-29 06:02:56 +0300382 if (attrib) {
383 /* attrib passed as positional arg */
384 attrib = PyDict_Copy(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200385 if (!attrib)
386 return -1;
Eli Bendersky737b1732012-05-29 06:02:56 +0300387 if (kwds) {
388 if (PyDict_Update(attrib, kwds) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200389 Py_DECREF(attrib);
Eli Bendersky737b1732012-05-29 06:02:56 +0300390 return -1;
391 }
392 }
393 } else if (kwds) {
394 /* have keywords args */
395 attrib = get_attrib_from_keywords(kwds);
396 if (!attrib)
397 return -1;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200398 }
399
400 self_elem = (ElementObject *)self;
401
Antoine Pitrouc1948842012-10-01 23:40:37 +0200402 if (attrib != NULL && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200403 if (create_extra(self_elem, attrib) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200404 Py_DECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200405 return -1;
406 }
407 }
408
Eli Bendersky48d358b2012-05-30 17:57:50 +0300409 /* We own a reference to attrib here and it's no longer needed. */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200410 Py_XDECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200411
412 /* Replace the objects already pointed to by tag, text and tail. */
413 tmp = self_elem->tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200414 Py_INCREF(tag);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200415 self_elem->tag = tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200416 Py_DECREF(tmp);
417
418 tmp = self_elem->text;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200419 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200420 self_elem->text = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200421 Py_DECREF(JOIN_OBJ(tmp));
422
423 tmp = self_elem->tail;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200424 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200425 self_elem->tail = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200426 Py_DECREF(JOIN_OBJ(tmp));
427
428 return 0;
429}
430
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000431LOCAL(int)
432element_resize(ElementObject* self, int extra)
433{
434 int size;
435 PyObject* *children;
436
437 /* make sure self->children can hold the given number of extra
438 elements. set an exception and return -1 if allocation failed */
439
Victor Stinner5f0af232013-07-11 23:01:36 +0200440 if (!self->extra) {
441 if (create_extra(self, NULL) < 0)
442 return -1;
443 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000444
445 size = self->extra->length + extra;
446
447 if (size > self->extra->allocated) {
448 /* use Python 2.4's list growth strategy */
449 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000450 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100451 * which needs at least 4 bytes.
452 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000453 * be safe.
454 */
455 size = size ? size : 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000456 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000457 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100458 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000459 * false alarm always assume at least one child to be safe.
460 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000461 children = PyObject_Realloc(self->extra->children,
462 size * sizeof(PyObject*));
463 if (!children)
464 goto nomemory;
465 } else {
466 children = PyObject_Malloc(size * sizeof(PyObject*));
467 if (!children)
468 goto nomemory;
469 /* copy existing children from static area to malloc buffer */
470 memcpy(children, self->extra->children,
471 self->extra->length * sizeof(PyObject*));
472 }
473 self->extra->children = children;
474 self->extra->allocated = size;
475 }
476
477 return 0;
478
479 nomemory:
480 PyErr_NoMemory();
481 return -1;
482}
483
484LOCAL(int)
485element_add_subelement(ElementObject* self, PyObject* element)
486{
487 /* add a child element to a parent */
488
489 if (element_resize(self, 1) < 0)
490 return -1;
491
492 Py_INCREF(element);
493 self->extra->children[self->extra->length] = element;
494
495 self->extra->length++;
496
497 return 0;
498}
499
500LOCAL(PyObject*)
501element_get_attrib(ElementObject* self)
502{
503 /* return borrowed reference to attrib dictionary */
504 /* note: this function assumes that the extra section exists */
505
506 PyObject* res = self->extra->attrib;
507
508 if (res == Py_None) {
509 /* create missing dictionary */
510 res = PyDict_New();
511 if (!res)
512 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200513 Py_DECREF(Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000514 self->extra->attrib = res;
515 }
516
517 return res;
518}
519
520LOCAL(PyObject*)
521element_get_text(ElementObject* self)
522{
523 /* return borrowed reference to text attribute */
524
525 PyObject* res = self->text;
526
527 if (JOIN_GET(res)) {
528 res = JOIN_OBJ(res);
529 if (PyList_CheckExact(res)) {
530 res = list_join(res);
531 if (!res)
532 return NULL;
533 self->text = res;
534 }
535 }
536
537 return res;
538}
539
540LOCAL(PyObject*)
541element_get_tail(ElementObject* self)
542{
543 /* return borrowed reference to text attribute */
544
545 PyObject* res = self->tail;
546
547 if (JOIN_GET(res)) {
548 res = JOIN_OBJ(res);
549 if (PyList_CheckExact(res)) {
550 res = list_join(res);
551 if (!res)
552 return NULL;
553 self->tail = res;
554 }
555 }
556
557 return res;
558}
559
560static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300561subelement(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000562{
563 PyObject* elem;
564
565 ElementObject* parent;
566 PyObject* tag;
567 PyObject* attrib = NULL;
568 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
569 &Element_Type, &parent, &tag,
570 &PyDict_Type, &attrib))
571 return NULL;
572
Eli Bendersky737b1732012-05-29 06:02:56 +0300573 if (attrib) {
574 /* attrib passed as positional arg */
575 attrib = PyDict_Copy(attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000576 if (!attrib)
577 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300578 if (kwds) {
579 if (PyDict_Update(attrib, kwds) < 0) {
580 return NULL;
581 }
582 }
583 } else if (kwds) {
584 /* have keyword args */
585 attrib = get_attrib_from_keywords(kwds);
586 if (!attrib)
587 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000588 } else {
Eli Bendersky737b1732012-05-29 06:02:56 +0300589 /* no attrib arg, no kwds, so no attribute */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000590 Py_INCREF(Py_None);
591 attrib = Py_None;
592 }
593
Eli Bendersky092af1f2012-03-04 07:14:03 +0200594 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000595 Py_DECREF(attrib);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200596 if (elem == NULL)
597 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000598
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000599 if (element_add_subelement(parent, elem) < 0) {
600 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000601 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000602 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000603
604 return elem;
605}
606
Eli Bendersky0192ba32012-03-30 16:38:33 +0300607static int
608element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
609{
610 Py_VISIT(self->tag);
611 Py_VISIT(JOIN_OBJ(self->text));
612 Py_VISIT(JOIN_OBJ(self->tail));
613
614 if (self->extra) {
615 int i;
616 Py_VISIT(self->extra->attrib);
617
618 for (i = 0; i < self->extra->length; ++i)
619 Py_VISIT(self->extra->children[i]);
620 }
621 return 0;
622}
623
624static int
625element_gc_clear(ElementObject *self)
626{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300627 Py_CLEAR(self->tag);
Eli Benderskydd3661e2013-09-13 06:24:25 -0700628 _clear_joined_ptr(&self->text);
629 _clear_joined_ptr(&self->tail);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300630
631 /* After dropping all references from extra, it's no longer valid anyway,
Eli Benderskyebf37a22012-04-03 22:02:37 +0300632 * so fully deallocate it.
Eli Bendersky0192ba32012-03-30 16:38:33 +0300633 */
Eli Benderskyebf37a22012-04-03 22:02:37 +0300634 dealloc_extra(self);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300635 return 0;
636}
637
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000638static void
639element_dealloc(ElementObject* self)
640{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300641 PyObject_GC_UnTrack(self);
Eli Benderskyebf37a22012-04-03 22:02:37 +0300642
643 if (self->weakreflist != NULL)
644 PyObject_ClearWeakRefs((PyObject *) self);
645
Eli Bendersky0192ba32012-03-30 16:38:33 +0300646 /* element_gc_clear clears all references and deallocates extra
647 */
648 element_gc_clear(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000649
650 RELEASE(sizeof(ElementObject), "destroy element");
Eli Bendersky092af1f2012-03-04 07:14:03 +0200651 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000652}
653
654/* -------------------------------------------------------------------- */
655/* methods (in alphabetical order) */
656
657static PyObject*
658element_append(ElementObject* self, PyObject* args)
659{
660 PyObject* element;
661 if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element))
662 return NULL;
663
664 if (element_add_subelement(self, element) < 0)
665 return NULL;
666
667 Py_RETURN_NONE;
668}
669
670static PyObject*
Eli Bendersky0192ba32012-03-30 16:38:33 +0300671element_clearmethod(ElementObject* self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000672{
673 if (!PyArg_ParseTuple(args, ":clear"))
674 return NULL;
675
Eli Benderskyebf37a22012-04-03 22:02:37 +0300676 dealloc_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000677
678 Py_INCREF(Py_None);
679 Py_DECREF(JOIN_OBJ(self->text));
680 self->text = Py_None;
681
682 Py_INCREF(Py_None);
683 Py_DECREF(JOIN_OBJ(self->tail));
684 self->tail = Py_None;
685
686 Py_RETURN_NONE;
687}
688
689static PyObject*
690element_copy(ElementObject* self, PyObject* args)
691{
692 int i;
693 ElementObject* element;
694
695 if (!PyArg_ParseTuple(args, ":__copy__"))
696 return NULL;
697
Eli Bendersky092af1f2012-03-04 07:14:03 +0200698 element = (ElementObject*) create_new_element(
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000699 self->tag, (self->extra) ? self->extra->attrib : Py_None
700 );
701 if (!element)
702 return NULL;
703
704 Py_DECREF(JOIN_OBJ(element->text));
705 element->text = self->text;
706 Py_INCREF(JOIN_OBJ(element->text));
707
708 Py_DECREF(JOIN_OBJ(element->tail));
709 element->tail = self->tail;
710 Py_INCREF(JOIN_OBJ(element->tail));
711
712 if (self->extra) {
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100713
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000714 if (element_resize(element, self->extra->length) < 0) {
715 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000716 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000717 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000718
719 for (i = 0; i < self->extra->length; i++) {
720 Py_INCREF(self->extra->children[i]);
721 element->extra->children[i] = self->extra->children[i];
722 }
723
724 element->extra->length = self->extra->length;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100725
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000726 }
727
728 return (PyObject*) element;
729}
730
731static PyObject*
732element_deepcopy(ElementObject* self, PyObject* args)
733{
734 int i;
735 ElementObject* element;
736 PyObject* tag;
737 PyObject* attrib;
738 PyObject* text;
739 PyObject* tail;
740 PyObject* id;
741
742 PyObject* memo;
743 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
744 return NULL;
745
746 tag = deepcopy(self->tag, memo);
747 if (!tag)
748 return NULL;
749
750 if (self->extra) {
751 attrib = deepcopy(self->extra->attrib, memo);
752 if (!attrib) {
753 Py_DECREF(tag);
754 return NULL;
755 }
756 } else {
757 Py_INCREF(Py_None);
758 attrib = Py_None;
759 }
760
Eli Bendersky092af1f2012-03-04 07:14:03 +0200761 element = (ElementObject*) create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000762
763 Py_DECREF(tag);
764 Py_DECREF(attrib);
765
766 if (!element)
767 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100768
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000769 text = deepcopy(JOIN_OBJ(self->text), memo);
770 if (!text)
771 goto error;
772 Py_DECREF(element->text);
773 element->text = JOIN_SET(text, JOIN_GET(self->text));
774
775 tail = deepcopy(JOIN_OBJ(self->tail), memo);
776 if (!tail)
777 goto error;
778 Py_DECREF(element->tail);
779 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
780
781 if (self->extra) {
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100782
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000783 if (element_resize(element, self->extra->length) < 0)
784 goto error;
785
786 for (i = 0; i < self->extra->length; i++) {
787 PyObject* child = deepcopy(self->extra->children[i], memo);
788 if (!child) {
789 element->extra->length = i;
790 goto error;
791 }
792 element->extra->children[i] = child;
793 }
794
795 element->extra->length = self->extra->length;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100796
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000797 }
798
799 /* add object to memo dictionary (so deepcopy won't visit it again) */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200800 id = PyLong_FromSsize_t((Py_uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000801 if (!id)
802 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000803
804 i = PyDict_SetItem(memo, id, (PyObject*) element);
805
806 Py_DECREF(id);
807
808 if (i < 0)
809 goto error;
810
811 return (PyObject*) element;
812
813 error:
814 Py_DECREF(element);
815 return NULL;
816}
817
Martin v. Löwisbce16662012-06-17 10:41:22 +0200818static PyObject*
Antoine Pitrou09fcb722013-10-23 19:20:21 +0200819element_sizeof(PyObject* myself, PyObject* args)
Martin v. Löwisbce16662012-06-17 10:41:22 +0200820{
Antoine Pitrou09fcb722013-10-23 19:20:21 +0200821 ElementObject *self = (ElementObject*)myself;
Martin v. Löwisbce16662012-06-17 10:41:22 +0200822 Py_ssize_t result = sizeof(ElementObject);
823 if (self->extra) {
824 result += sizeof(ElementObjectExtra);
825 if (self->extra->children != self->extra->_children)
826 result += sizeof(PyObject*) * self->extra->allocated;
827 }
828 return PyLong_FromSsize_t(result);
829}
830
Eli Bendersky698bdb22013-01-10 06:01:06 -0800831/* dict keys for getstate/setstate. */
832#define PICKLED_TAG "tag"
833#define PICKLED_CHILDREN "_children"
834#define PICKLED_ATTRIB "attrib"
835#define PICKLED_TAIL "tail"
836#define PICKLED_TEXT "text"
837
838/* __getstate__ returns a fabricated instance dict as in the pure-Python
839 * Element implementation, for interoperability/interchangeability. This
840 * makes the pure-Python implementation details an API, but (a) there aren't
841 * any unnecessary structures there; and (b) it buys compatibility with 3.2
842 * pickles. See issue #16076.
843 */
844static PyObject *
845element_getstate(ElementObject *self)
846{
847 int i, noattrib;
848 PyObject *instancedict = NULL, *children;
849
850 /* Build a list of children. */
851 children = PyList_New(self->extra ? self->extra->length : 0);
852 if (!children)
853 return NULL;
854 for (i = 0; i < PyList_GET_SIZE(children); i++) {
855 PyObject *child = self->extra->children[i];
856 Py_INCREF(child);
857 PyList_SET_ITEM(children, i, child);
858 }
859
860 /* Construct the state object. */
861 noattrib = (self->extra == NULL || self->extra->attrib == Py_None);
862 if (noattrib)
863 instancedict = Py_BuildValue("{sOsOs{}sOsO}",
864 PICKLED_TAG, self->tag,
865 PICKLED_CHILDREN, children,
866 PICKLED_ATTRIB,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700867 PICKLED_TEXT, JOIN_OBJ(self->text),
868 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800869 else
870 instancedict = Py_BuildValue("{sOsOsOsOsO}",
871 PICKLED_TAG, self->tag,
872 PICKLED_CHILDREN, children,
873 PICKLED_ATTRIB, self->extra->attrib,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700874 PICKLED_TEXT, JOIN_OBJ(self->text),
875 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800876 if (instancedict) {
877 Py_DECREF(children);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800878 return instancedict;
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800879 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800880 else {
881 for (i = 0; i < PyList_GET_SIZE(children); i++)
882 Py_DECREF(PyList_GET_ITEM(children, i));
883 Py_DECREF(children);
884
885 return NULL;
886 }
887}
888
889static PyObject *
890element_setstate_from_attributes(ElementObject *self,
891 PyObject *tag,
892 PyObject *attrib,
893 PyObject *text,
894 PyObject *tail,
895 PyObject *children)
896{
897 Py_ssize_t i, nchildren;
898
899 if (!tag) {
900 PyErr_SetString(PyExc_TypeError, "tag may not be NULL");
901 return NULL;
902 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800903
904 Py_CLEAR(self->tag);
905 self->tag = tag;
906 Py_INCREF(self->tag);
907
Eli Benderskydd3661e2013-09-13 06:24:25 -0700908 _clear_joined_ptr(&self->text);
909 self->text = text ? JOIN_SET(text, PyList_CheckExact(text)) : Py_None;
910 Py_INCREF(JOIN_OBJ(self->text));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800911
Eli Benderskydd3661e2013-09-13 06:24:25 -0700912 _clear_joined_ptr(&self->tail);
913 self->tail = tail ? JOIN_SET(tail, PyList_CheckExact(tail)) : Py_None;
914 Py_INCREF(JOIN_OBJ(self->tail));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800915
916 /* Handle ATTRIB and CHILDREN. */
917 if (!children && !attrib)
918 Py_RETURN_NONE;
919
920 /* Compute 'nchildren'. */
921 if (children) {
922 if (!PyList_Check(children)) {
923 PyErr_SetString(PyExc_TypeError, "'_children' is not a list");
924 return NULL;
925 }
926 nchildren = PyList_Size(children);
927 }
928 else {
929 nchildren = 0;
930 }
931
932 /* Allocate 'extra'. */
933 if (element_resize(self, nchildren)) {
934 return NULL;
935 }
936 assert(self->extra && self->extra->allocated >= nchildren);
937
938 /* Copy children */
939 for (i = 0; i < nchildren; i++) {
940 self->extra->children[i] = PyList_GET_ITEM(children, i);
941 Py_INCREF(self->extra->children[i]);
942 }
943
944 self->extra->length = nchildren;
945 self->extra->allocated = nchildren;
946
947 /* Stash attrib. */
948 if (attrib) {
949 Py_CLEAR(self->extra->attrib);
950 self->extra->attrib = attrib;
951 Py_INCREF(attrib);
952 }
953
954 Py_RETURN_NONE;
955}
956
957/* __setstate__ for Element instance from the Python implementation.
958 * 'state' should be the instance dict.
959 */
960static PyObject *
961element_setstate_from_Python(ElementObject *self, PyObject *state)
962{
963 static char *kwlist[] = {PICKLED_TAG, PICKLED_ATTRIB, PICKLED_TEXT,
964 PICKLED_TAIL, PICKLED_CHILDREN, 0};
965 PyObject *args;
966 PyObject *tag, *attrib, *text, *tail, *children;
Eli Bendersky799e3ed2013-01-12 05:42:38 -0800967 PyObject *retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800968
Eli Bendersky698bdb22013-01-10 06:01:06 -0800969 tag = attrib = text = tail = children = NULL;
970 args = PyTuple_New(0);
Eli Bendersky799e3ed2013-01-12 05:42:38 -0800971 if (!args)
Eli Bendersky698bdb22013-01-10 06:01:06 -0800972 return NULL;
Eli Bendersky799e3ed2013-01-12 05:42:38 -0800973
974 if (PyArg_ParseTupleAndKeywords(args, state, "|$OOOOO", kwlist, &tag,
975 &attrib, &text, &tail, &children))
976 retval = element_setstate_from_attributes(self, tag, attrib, text,
977 tail, children);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800978 else
Eli Bendersky799e3ed2013-01-12 05:42:38 -0800979 retval = NULL;
980
981 Py_DECREF(args);
982 return retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800983}
984
985static PyObject *
986element_setstate(ElementObject *self, PyObject *state)
987{
988 if (!PyDict_CheckExact(state)) {
989 PyErr_Format(PyExc_TypeError,
990 "Don't know how to unpickle \"%.200R\" as an Element",
991 state);
992 return NULL;
993 }
994 else
995 return element_setstate_from_Python(self, state);
996}
997
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000998LOCAL(int)
999checkpath(PyObject* tag)
1000{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001001 Py_ssize_t i;
1002 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001003
1004 /* check if a tag contains an xpath character */
1005
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001006#define PATHCHAR(ch) \
1007 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001008
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001009 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001010 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
1011 void *data = PyUnicode_DATA(tag);
1012 unsigned int kind = PyUnicode_KIND(tag);
1013 for (i = 0; i < len; i++) {
1014 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1015 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001016 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001017 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001018 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001019 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001020 return 1;
1021 }
1022 return 0;
1023 }
Christian Heimes72b710a2008-05-26 13:28:38 +00001024 if (PyBytes_Check(tag)) {
1025 char *p = PyBytes_AS_STRING(tag);
1026 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001027 if (p[i] == '{')
1028 check = 0;
1029 else if (p[i] == '}')
1030 check = 1;
1031 else if (check && PATHCHAR(p[i]))
1032 return 1;
1033 }
1034 return 0;
1035 }
1036
1037 return 1; /* unknown type; might be path expression */
1038}
1039
1040static PyObject*
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001041element_extend(ElementObject* self, PyObject* args)
1042{
1043 PyObject* seq;
1044 Py_ssize_t i, seqlen = 0;
1045
1046 PyObject* seq_in;
1047 if (!PyArg_ParseTuple(args, "O:extend", &seq_in))
1048 return NULL;
1049
1050 seq = PySequence_Fast(seq_in, "");
1051 if (!seq) {
1052 PyErr_Format(
1053 PyExc_TypeError,
1054 "expected sequence, not \"%.200s\"", Py_TYPE(seq_in)->tp_name
1055 );
1056 return NULL;
1057 }
1058
1059 seqlen = PySequence_Size(seq);
1060 for (i = 0; i < seqlen; i++) {
1061 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001062 if (!PyObject_IsInstance(element, (PyObject *)&Element_Type)) {
1063 Py_DECREF(seq);
1064 PyErr_Format(
1065 PyExc_TypeError,
1066 "expected an Element, not \"%.200s\"",
1067 Py_TYPE(element)->tp_name);
1068 return NULL;
1069 }
1070
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001071 if (element_add_subelement(self, element) < 0) {
1072 Py_DECREF(seq);
1073 return NULL;
1074 }
1075 }
1076
1077 Py_DECREF(seq);
1078
1079 Py_RETURN_NONE;
1080}
1081
1082static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001083element_find(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001084{
1085 int i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001086 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001087 PyObject* namespaces = Py_None;
Eli Bendersky737b1732012-05-29 06:02:56 +03001088 static char *kwlist[] = {"path", "namespaces", 0};
Eli Bendersky532d03e2013-08-10 08:00:39 -07001089 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001090
Eli Bendersky737b1732012-05-29 06:02:56 +03001091 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:find", kwlist,
1092 &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001093 return NULL;
1094
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001095 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001096 _Py_IDENTIFIER(find);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001097 return _PyObject_CallMethodId(
Eli Bendersky532d03e2013-08-10 08:00:39 -07001098 st->elementpath_obj, &PyId_find, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001099 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001100 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001101
1102 if (!self->extra)
1103 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001104
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001105 for (i = 0; i < self->extra->length; i++) {
1106 PyObject* item = self->extra->children[i];
1107 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +00001108 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001109 Py_INCREF(item);
1110 return item;
1111 }
1112 }
1113
1114 Py_RETURN_NONE;
1115}
1116
1117static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001118element_findtext(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001119{
1120 int i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001121 PyObject* tag;
1122 PyObject* default_value = Py_None;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001123 PyObject* namespaces = Py_None;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001124 _Py_IDENTIFIER(findtext);
Eli Bendersky737b1732012-05-29 06:02:56 +03001125 static char *kwlist[] = {"path", "default", "namespaces", 0};
Eli Bendersky532d03e2013-08-10 08:00:39 -07001126 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001127
Eli Bendersky737b1732012-05-29 06:02:56 +03001128 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO:findtext", kwlist,
1129 &tag, &default_value, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001130 return NULL;
1131
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001132 if (checkpath(tag) || namespaces != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001133 return _PyObject_CallMethodId(
Eli Bendersky532d03e2013-08-10 08:00:39 -07001134 st->elementpath_obj, &PyId_findtext, "OOOO", self, tag, default_value, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001135 );
1136
1137 if (!self->extra) {
1138 Py_INCREF(default_value);
1139 return default_value;
1140 }
1141
1142 for (i = 0; i < self->extra->length; i++) {
1143 ElementObject* item = (ElementObject*) self->extra->children[i];
Mark Dickinson211c6252009-02-01 10:28:51 +00001144 if (Element_CheckExact(item) && (PyObject_RichCompareBool(item->tag, tag, Py_EQ) == 1)) {
1145
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001146 PyObject* text = element_get_text(item);
1147 if (text == Py_None)
Eli Bendersky25771b32013-01-13 05:26:07 -08001148 return PyUnicode_New(0, 0);
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001149 Py_XINCREF(text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001150 return text;
1151 }
1152 }
1153
1154 Py_INCREF(default_value);
1155 return default_value;
1156}
1157
1158static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001159element_findall(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001160{
1161 int i;
1162 PyObject* out;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001163 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001164 PyObject* namespaces = Py_None;
Eli Bendersky737b1732012-05-29 06:02:56 +03001165 static char *kwlist[] = {"path", "namespaces", 0};
Eli Bendersky532d03e2013-08-10 08:00:39 -07001166 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001167
Eli Bendersky737b1732012-05-29 06:02:56 +03001168 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:findall", kwlist,
1169 &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001170 return NULL;
1171
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001172 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001173 _Py_IDENTIFIER(findall);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001174 return _PyObject_CallMethodId(
Eli Bendersky532d03e2013-08-10 08:00:39 -07001175 st->elementpath_obj, &PyId_findall, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001176 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001177 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001178
1179 out = PyList_New(0);
1180 if (!out)
1181 return NULL;
1182
1183 if (!self->extra)
1184 return out;
1185
1186 for (i = 0; i < self->extra->length; i++) {
1187 PyObject* item = self->extra->children[i];
1188 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +00001189 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001190 if (PyList_Append(out, item) < 0) {
1191 Py_DECREF(out);
1192 return NULL;
1193 }
1194 }
1195 }
1196
1197 return out;
1198}
1199
1200static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001201element_iterfind(ElementObject *self, PyObject *args, PyObject *kwds)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001202{
1203 PyObject* tag;
1204 PyObject* namespaces = Py_None;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001205 _Py_IDENTIFIER(iterfind);
Eli Bendersky737b1732012-05-29 06:02:56 +03001206 static char *kwlist[] = {"path", "namespaces", 0};
Eli Bendersky532d03e2013-08-10 08:00:39 -07001207 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001208
Eli Bendersky737b1732012-05-29 06:02:56 +03001209 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:iterfind", kwlist,
1210 &tag, &namespaces))
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001211 return NULL;
1212
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001213 return _PyObject_CallMethodId(
Eli Bendersky532d03e2013-08-10 08:00:39 -07001214 st->elementpath_obj, &PyId_iterfind, "OOO", self, tag, namespaces
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001215 );
1216}
1217
1218static PyObject*
Eli Benderskya8736902013-01-05 06:26:39 -08001219element_get(ElementObject* self, PyObject* args, PyObject* kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001220{
1221 PyObject* value;
Eli Benderskya8736902013-01-05 06:26:39 -08001222 static char* kwlist[] = {"key", "default", 0};
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001223
1224 PyObject* key;
1225 PyObject* default_value = Py_None;
Eli Benderskya8736902013-01-05 06:26:39 -08001226
1227 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:get", kwlist, &key,
1228 &default_value))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001229 return NULL;
1230
1231 if (!self->extra || self->extra->attrib == Py_None)
1232 value = default_value;
1233 else {
1234 value = PyDict_GetItem(self->extra->attrib, key);
1235 if (!value)
1236 value = default_value;
1237 }
1238
1239 Py_INCREF(value);
1240 return value;
1241}
1242
1243static PyObject*
1244element_getchildren(ElementObject* self, PyObject* args)
1245{
1246 int i;
1247 PyObject* list;
1248
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001249 /* FIXME: report as deprecated? */
1250
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001251 if (!PyArg_ParseTuple(args, ":getchildren"))
1252 return NULL;
1253
1254 if (!self->extra)
1255 return PyList_New(0);
1256
1257 list = PyList_New(self->extra->length);
1258 if (!list)
1259 return NULL;
1260
1261 for (i = 0; i < self->extra->length; i++) {
1262 PyObject* item = self->extra->children[i];
1263 Py_INCREF(item);
1264 PyList_SET_ITEM(list, i, item);
1265 }
1266
1267 return list;
1268}
1269
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001270
Eli Bendersky64d11e62012-06-15 07:42:50 +03001271static PyObject *
1272create_elementiter(ElementObject *self, PyObject *tag, int gettext);
1273
1274
1275static PyObject *
Eli Benderskya8736902013-01-05 06:26:39 -08001276element_iter(ElementObject *self, PyObject *args, PyObject *kwds)
Eli Bendersky64d11e62012-06-15 07:42:50 +03001277{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001278 PyObject* tag = Py_None;
Eli Benderskya8736902013-01-05 06:26:39 -08001279 static char* kwlist[] = {"tag", 0};
1280
1281 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:iter", kwlist, &tag))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001282 return NULL;
1283
Eli Bendersky64d11e62012-06-15 07:42:50 +03001284 return create_elementiter(self, tag, 0);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001285}
1286
1287
1288static PyObject*
1289element_itertext(ElementObject* self, PyObject* args)
1290{
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001291 if (!PyArg_ParseTuple(args, ":itertext"))
1292 return NULL;
1293
Eli Bendersky64d11e62012-06-15 07:42:50 +03001294 return create_elementiter(self, Py_None, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001295}
1296
Eli Bendersky64d11e62012-06-15 07:42:50 +03001297
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001298static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001299element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001300{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001301 ElementObject* self = (ElementObject*) self_;
1302
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001303 if (!self->extra || index < 0 || index >= self->extra->length) {
1304 PyErr_SetString(
1305 PyExc_IndexError,
1306 "child index out of range"
1307 );
1308 return NULL;
1309 }
1310
1311 Py_INCREF(self->extra->children[index]);
1312 return self->extra->children[index];
1313}
1314
1315static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001316element_insert(ElementObject* self, PyObject* args)
1317{
1318 int i;
1319
1320 int index;
1321 PyObject* element;
1322 if (!PyArg_ParseTuple(args, "iO!:insert", &index,
1323 &Element_Type, &element))
1324 return NULL;
1325
Victor Stinner5f0af232013-07-11 23:01:36 +02001326 if (!self->extra) {
1327 if (create_extra(self, NULL) < 0)
1328 return NULL;
1329 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001330
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001331 if (index < 0) {
1332 index += self->extra->length;
1333 if (index < 0)
1334 index = 0;
1335 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001336 if (index > self->extra->length)
1337 index = self->extra->length;
1338
1339 if (element_resize(self, 1) < 0)
1340 return NULL;
1341
1342 for (i = self->extra->length; i > index; i--)
1343 self->extra->children[i] = self->extra->children[i-1];
1344
1345 Py_INCREF(element);
1346 self->extra->children[index] = element;
1347
1348 self->extra->length++;
1349
1350 Py_RETURN_NONE;
1351}
1352
1353static PyObject*
1354element_items(ElementObject* self, PyObject* args)
1355{
1356 if (!PyArg_ParseTuple(args, ":items"))
1357 return NULL;
1358
1359 if (!self->extra || self->extra->attrib == Py_None)
1360 return PyList_New(0);
1361
1362 return PyDict_Items(self->extra->attrib);
1363}
1364
1365static PyObject*
1366element_keys(ElementObject* self, PyObject* args)
1367{
1368 if (!PyArg_ParseTuple(args, ":keys"))
1369 return NULL;
1370
1371 if (!self->extra || self->extra->attrib == Py_None)
1372 return PyList_New(0);
1373
1374 return PyDict_Keys(self->extra->attrib);
1375}
1376
Martin v. Löwis18e16552006-02-15 17:27:45 +00001377static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001378element_length(ElementObject* self)
1379{
1380 if (!self->extra)
1381 return 0;
1382
1383 return self->extra->length;
1384}
1385
1386static PyObject*
1387element_makeelement(PyObject* self, PyObject* args, PyObject* kw)
1388{
1389 PyObject* elem;
1390
1391 PyObject* tag;
1392 PyObject* attrib;
1393 if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib))
1394 return NULL;
1395
1396 attrib = PyDict_Copy(attrib);
1397 if (!attrib)
1398 return NULL;
1399
Eli Bendersky092af1f2012-03-04 07:14:03 +02001400 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001401
1402 Py_DECREF(attrib);
1403
1404 return elem;
1405}
1406
1407static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001408element_remove(ElementObject* self, PyObject* args)
1409{
1410 int i;
1411
1412 PyObject* element;
1413 if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element))
1414 return NULL;
1415
1416 if (!self->extra) {
1417 /* element has no children, so raise exception */
1418 PyErr_SetString(
1419 PyExc_ValueError,
1420 "list.remove(x): x not in list"
1421 );
1422 return NULL;
1423 }
1424
1425 for (i = 0; i < self->extra->length; i++) {
1426 if (self->extra->children[i] == element)
1427 break;
Mark Dickinson211c6252009-02-01 10:28:51 +00001428 if (PyObject_RichCompareBool(self->extra->children[i], element, Py_EQ) == 1)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001429 break;
1430 }
1431
1432 if (i == self->extra->length) {
1433 /* element is not in children, so raise exception */
1434 PyErr_SetString(
1435 PyExc_ValueError,
1436 "list.remove(x): x not in list"
1437 );
1438 return NULL;
1439 }
1440
1441 Py_DECREF(self->extra->children[i]);
1442
1443 self->extra->length--;
1444
1445 for (; i < self->extra->length; i++)
1446 self->extra->children[i] = self->extra->children[i+1];
1447
1448 Py_RETURN_NONE;
1449}
1450
1451static PyObject*
1452element_repr(ElementObject* self)
1453{
Eli Bendersky092af1f2012-03-04 07:14:03 +02001454 if (self->tag)
1455 return PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1456 else
1457 return PyUnicode_FromFormat("<Element at %p>", self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001458}
1459
1460static PyObject*
1461element_set(ElementObject* self, PyObject* args)
1462{
1463 PyObject* attrib;
1464
1465 PyObject* key;
1466 PyObject* value;
1467 if (!PyArg_ParseTuple(args, "OO:set", &key, &value))
1468 return NULL;
1469
Victor Stinner5f0af232013-07-11 23:01:36 +02001470 if (!self->extra) {
1471 if (create_extra(self, NULL) < 0)
1472 return NULL;
1473 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001474
1475 attrib = element_get_attrib(self);
1476 if (!attrib)
1477 return NULL;
1478
1479 if (PyDict_SetItem(attrib, key, value) < 0)
1480 return NULL;
1481
1482 Py_RETURN_NONE;
1483}
1484
1485static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001486element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001487{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001488 ElementObject* self = (ElementObject*) self_;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001489 int i;
1490 PyObject* old;
1491
1492 if (!self->extra || index < 0 || index >= self->extra->length) {
1493 PyErr_SetString(
1494 PyExc_IndexError,
1495 "child assignment index out of range");
1496 return -1;
1497 }
1498
1499 old = self->extra->children[index];
1500
1501 if (item) {
1502 Py_INCREF(item);
1503 self->extra->children[index] = item;
1504 } else {
1505 self->extra->length--;
1506 for (i = index; i < self->extra->length; i++)
1507 self->extra->children[i] = self->extra->children[i+1];
1508 }
1509
1510 Py_DECREF(old);
1511
1512 return 0;
1513}
1514
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001515static PyObject*
1516element_subscr(PyObject* self_, PyObject* item)
1517{
1518 ElementObject* self = (ElementObject*) self_;
1519
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001520 if (PyIndex_Check(item)) {
1521 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001522
1523 if (i == -1 && PyErr_Occurred()) {
1524 return NULL;
1525 }
1526 if (i < 0 && self->extra)
1527 i += self->extra->length;
1528 return element_getitem(self_, i);
1529 }
1530 else if (PySlice_Check(item)) {
1531 Py_ssize_t start, stop, step, slicelen, cur, i;
1532 PyObject* list;
1533
1534 if (!self->extra)
1535 return PyList_New(0);
1536
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001537 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001538 self->extra->length,
1539 &start, &stop, &step, &slicelen) < 0) {
1540 return NULL;
1541 }
1542
1543 if (slicelen <= 0)
1544 return PyList_New(0);
1545 else {
1546 list = PyList_New(slicelen);
1547 if (!list)
1548 return NULL;
1549
1550 for (cur = start, i = 0; i < slicelen;
1551 cur += step, i++) {
1552 PyObject* item = self->extra->children[cur];
1553 Py_INCREF(item);
1554 PyList_SET_ITEM(list, i, item);
1555 }
1556
1557 return list;
1558 }
1559 }
1560 else {
1561 PyErr_SetString(PyExc_TypeError,
1562 "element indices must be integers");
1563 return NULL;
1564 }
1565}
1566
1567static int
1568element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1569{
1570 ElementObject* self = (ElementObject*) self_;
1571
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001572 if (PyIndex_Check(item)) {
1573 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001574
1575 if (i == -1 && PyErr_Occurred()) {
1576 return -1;
1577 }
1578 if (i < 0 && self->extra)
1579 i += self->extra->length;
1580 return element_setitem(self_, i, value);
1581 }
1582 else if (PySlice_Check(item)) {
1583 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1584
1585 PyObject* recycle = NULL;
1586 PyObject* seq = NULL;
1587
Victor Stinner5f0af232013-07-11 23:01:36 +02001588 if (!self->extra) {
1589 if (create_extra(self, NULL) < 0)
1590 return -1;
1591 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001592
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001593 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001594 self->extra->length,
1595 &start, &stop, &step, &slicelen) < 0) {
1596 return -1;
1597 }
1598
Eli Bendersky865756a2012-03-09 13:38:15 +02001599 if (value == NULL) {
1600 /* Delete slice */
1601 size_t cur;
1602 Py_ssize_t i;
1603
1604 if (slicelen <= 0)
1605 return 0;
1606
1607 /* Since we're deleting, the direction of the range doesn't matter,
1608 * so for simplicity make it always ascending.
1609 */
1610 if (step < 0) {
1611 stop = start + 1;
1612 start = stop + step * (slicelen - 1) - 1;
1613 step = -step;
1614 }
1615
1616 assert((size_t)slicelen <= PY_SIZE_MAX / sizeof(PyObject *));
1617
1618 /* recycle is a list that will contain all the children
1619 * scheduled for removal.
1620 */
1621 if (!(recycle = PyList_New(slicelen))) {
1622 PyErr_NoMemory();
1623 return -1;
1624 }
1625
1626 /* This loop walks over all the children that have to be deleted,
1627 * with cur pointing at them. num_moved is the amount of children
1628 * until the next deleted child that have to be "shifted down" to
1629 * occupy the deleted's places.
1630 * Note that in the ith iteration, shifting is done i+i places down
1631 * because i children were already removed.
1632 */
1633 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1634 /* Compute how many children have to be moved, clipping at the
1635 * list end.
1636 */
1637 Py_ssize_t num_moved = step - 1;
1638 if (cur + step >= (size_t)self->extra->length) {
1639 num_moved = self->extra->length - cur - 1;
1640 }
1641
1642 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1643
1644 memmove(
1645 self->extra->children + cur - i,
1646 self->extra->children + cur + 1,
1647 num_moved * sizeof(PyObject *));
1648 }
1649
1650 /* Leftover "tail" after the last removed child */
1651 cur = start + (size_t)slicelen * step;
1652 if (cur < (size_t)self->extra->length) {
1653 memmove(
1654 self->extra->children + cur - slicelen,
1655 self->extra->children + cur,
1656 (self->extra->length - cur) * sizeof(PyObject *));
1657 }
1658
1659 self->extra->length -= slicelen;
1660
1661 /* Discard the recycle list with all the deleted sub-elements */
1662 Py_XDECREF(recycle);
1663 return 0;
1664 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001665 else {
Eli Bendersky865756a2012-03-09 13:38:15 +02001666 /* A new slice is actually being assigned */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001667 seq = PySequence_Fast(value, "");
1668 if (!seq) {
1669 PyErr_Format(
1670 PyExc_TypeError,
1671 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1672 );
1673 return -1;
1674 }
1675 newlen = PySequence_Size(seq);
1676 }
1677
1678 if (step != 1 && newlen != slicelen)
1679 {
1680 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001681 "attempt to assign sequence of size %zd "
1682 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001683 newlen, slicelen
1684 );
1685 return -1;
1686 }
1687
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001688 /* Resize before creating the recycle bin, to prevent refleaks. */
1689 if (newlen > slicelen) {
1690 if (element_resize(self, newlen - slicelen) < 0) {
1691 if (seq) {
1692 Py_DECREF(seq);
1693 }
1694 return -1;
1695 }
1696 }
1697
1698 if (slicelen > 0) {
1699 /* to avoid recursive calls to this method (via decref), move
1700 old items to the recycle bin here, and get rid of them when
1701 we're done modifying the element */
1702 recycle = PyList_New(slicelen);
1703 if (!recycle) {
1704 if (seq) {
1705 Py_DECREF(seq);
1706 }
1707 return -1;
1708 }
1709 for (cur = start, i = 0; i < slicelen;
1710 cur += step, i++)
1711 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1712 }
1713
1714 if (newlen < slicelen) {
1715 /* delete slice */
1716 for (i = stop; i < self->extra->length; i++)
1717 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1718 } else if (newlen > slicelen) {
1719 /* insert slice */
1720 for (i = self->extra->length-1; i >= stop; i--)
1721 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1722 }
1723
1724 /* replace the slice */
1725 for (cur = start, i = 0; i < newlen;
1726 cur += step, i++) {
1727 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1728 Py_INCREF(element);
1729 self->extra->children[cur] = element;
1730 }
1731
1732 self->extra->length += newlen - slicelen;
1733
1734 if (seq) {
1735 Py_DECREF(seq);
1736 }
1737
1738 /* discard the recycle bin, and everything in it */
1739 Py_XDECREF(recycle);
1740
1741 return 0;
1742 }
1743 else {
1744 PyErr_SetString(PyExc_TypeError,
1745 "element indices must be integers");
1746 return -1;
1747 }
1748}
1749
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001750static PyMethodDef element_methods[] = {
1751
Eli Bendersky0192ba32012-03-30 16:38:33 +03001752 {"clear", (PyCFunction) element_clearmethod, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001753
Eli Benderskya8736902013-01-05 06:26:39 -08001754 {"get", (PyCFunction) element_get, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001755 {"set", (PyCFunction) element_set, METH_VARARGS},
1756
Eli Bendersky737b1732012-05-29 06:02:56 +03001757 {"find", (PyCFunction) element_find, METH_VARARGS | METH_KEYWORDS},
1758 {"findtext", (PyCFunction) element_findtext, METH_VARARGS | METH_KEYWORDS},
1759 {"findall", (PyCFunction) element_findall, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001760
1761 {"append", (PyCFunction) element_append, METH_VARARGS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001762 {"extend", (PyCFunction) element_extend, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001763 {"insert", (PyCFunction) element_insert, METH_VARARGS},
1764 {"remove", (PyCFunction) element_remove, METH_VARARGS},
1765
Eli Benderskya8736902013-01-05 06:26:39 -08001766 {"iter", (PyCFunction) element_iter, METH_VARARGS | METH_KEYWORDS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001767 {"itertext", (PyCFunction) element_itertext, METH_VARARGS},
Eli Bendersky737b1732012-05-29 06:02:56 +03001768 {"iterfind", (PyCFunction) element_iterfind, METH_VARARGS | METH_KEYWORDS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001769
Eli Benderskya8736902013-01-05 06:26:39 -08001770 {"getiterator", (PyCFunction) element_iter, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001771 {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS},
1772
1773 {"items", (PyCFunction) element_items, METH_VARARGS},
1774 {"keys", (PyCFunction) element_keys, METH_VARARGS},
1775
1776 {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS},
1777
1778 {"__copy__", (PyCFunction) element_copy, METH_VARARGS},
1779 {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS},
Martin v. Löwisbce16662012-06-17 10:41:22 +02001780 {"__sizeof__", element_sizeof, METH_NOARGS},
Eli Bendersky698bdb22013-01-10 06:01:06 -08001781 {"__getstate__", (PyCFunction)element_getstate, METH_NOARGS},
1782 {"__setstate__", (PyCFunction)element_setstate, METH_O},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001783
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001784 {NULL, NULL}
1785};
1786
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001787static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001788element_getattro(ElementObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001789{
1790 PyObject* res;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001791 char *name = "";
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001792
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001793 if (PyUnicode_Check(nameobj))
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001794 name = _PyUnicode_AsString(nameobj);
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001795
Alexander Belopolskye239d232010-12-08 23:31:48 +00001796 if (name == NULL)
1797 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001798
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001799 /* handle common attributes first */
1800 if (strcmp(name, "tag") == 0) {
1801 res = self->tag;
1802 Py_INCREF(res);
1803 return res;
1804 } else if (strcmp(name, "text") == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001805 res = element_get_text(self);
Victor Stinner71c8b7e2013-07-11 23:08:39 +02001806 Py_XINCREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001807 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001808 }
1809
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001810 /* methods */
1811 res = PyObject_GenericGetAttr((PyObject*) self, nameobj);
1812 if (res)
1813 return res;
1814
1815 /* less common attributes */
1816 if (strcmp(name, "tail") == 0) {
1817 PyErr_Clear();
1818 res = element_get_tail(self);
1819 } else if (strcmp(name, "attrib") == 0) {
1820 PyErr_Clear();
Victor Stinner5f0af232013-07-11 23:01:36 +02001821 if (!self->extra) {
1822 if (create_extra(self, NULL) < 0)
1823 return NULL;
1824 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001825 res = element_get_attrib(self);
1826 }
1827
1828 if (!res)
1829 return NULL;
1830
1831 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001832 return res;
1833}
1834
Eli Benderskyef9683b2013-05-18 07:52:34 -07001835static int
Eli Benderskyb20df952012-05-20 06:33:29 +03001836element_setattro(ElementObject* self, PyObject* nameobj, PyObject* value)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001837{
Eli Benderskyb20df952012-05-20 06:33:29 +03001838 char *name = "";
1839 if (PyUnicode_Check(nameobj))
1840 name = _PyUnicode_AsString(nameobj);
Victor Stinner4d463432013-07-11 23:05:03 +02001841 if (name == NULL)
Eli Benderskyef9683b2013-05-18 07:52:34 -07001842 return -1;
Victor Stinner4d463432013-07-11 23:05:03 +02001843
1844 if (strcmp(name, "tag") == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001845 Py_DECREF(self->tag);
1846 self->tag = value;
1847 Py_INCREF(self->tag);
1848 } else if (strcmp(name, "text") == 0) {
1849 Py_DECREF(JOIN_OBJ(self->text));
1850 self->text = value;
1851 Py_INCREF(self->text);
1852 } else if (strcmp(name, "tail") == 0) {
1853 Py_DECREF(JOIN_OBJ(self->tail));
1854 self->tail = value;
1855 Py_INCREF(self->tail);
1856 } else if (strcmp(name, "attrib") == 0) {
Victor Stinner5f0af232013-07-11 23:01:36 +02001857 if (!self->extra) {
1858 if (create_extra(self, NULL) < 0)
1859 return -1;
1860 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001861 Py_DECREF(self->extra->attrib);
1862 self->extra->attrib = value;
1863 Py_INCREF(self->extra->attrib);
1864 } else {
Eli Benderskyef9683b2013-05-18 07:52:34 -07001865 PyErr_SetString(PyExc_AttributeError,
Eli Bendersky6a55dc32013-05-19 16:59:59 -07001866 "Can't set arbitrary attributes on Element");
Eli Benderskyef9683b2013-05-18 07:52:34 -07001867 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001868 }
1869
Eli Benderskyef9683b2013-05-18 07:52:34 -07001870 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001871}
1872
1873static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001874 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001875 0, /* sq_concat */
1876 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001877 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001878 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001879 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001880 0,
1881};
1882
1883static PyMappingMethods element_as_mapping = {
1884 (lenfunc) element_length,
1885 (binaryfunc) element_subscr,
1886 (objobjargproc) element_ass_subscr,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001887};
1888
Neal Norwitz227b5332006-03-22 09:28:35 +00001889static PyTypeObject Element_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001890 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08001891 "xml.etree.ElementTree.Element", sizeof(ElementObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001892 /* methods */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001893 (destructor)element_dealloc, /* tp_dealloc */
1894 0, /* tp_print */
1895 0, /* tp_getattr */
Eli Benderskyb20df952012-05-20 06:33:29 +03001896 0, /* tp_setattr */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001897 0, /* tp_reserved */
1898 (reprfunc)element_repr, /* tp_repr */
1899 0, /* tp_as_number */
1900 &element_as_sequence, /* tp_as_sequence */
1901 &element_as_mapping, /* tp_as_mapping */
1902 0, /* tp_hash */
1903 0, /* tp_call */
1904 0, /* tp_str */
1905 (getattrofunc)element_getattro, /* tp_getattro */
Eli Benderskyb20df952012-05-20 06:33:29 +03001906 (setattrofunc)element_setattro, /* tp_setattro */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001907 0, /* tp_as_buffer */
Eli Bendersky0192ba32012-03-30 16:38:33 +03001908 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
1909 /* tp_flags */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001910 0, /* tp_doc */
Eli Bendersky0192ba32012-03-30 16:38:33 +03001911 (traverseproc)element_gc_traverse, /* tp_traverse */
1912 (inquiry)element_gc_clear, /* tp_clear */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001913 0, /* tp_richcompare */
Eli Benderskyebf37a22012-04-03 22:02:37 +03001914 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001915 0, /* tp_iter */
1916 0, /* tp_iternext */
1917 element_methods, /* tp_methods */
1918 0, /* tp_members */
1919 0, /* tp_getset */
1920 0, /* tp_base */
1921 0, /* tp_dict */
1922 0, /* tp_descr_get */
1923 0, /* tp_descr_set */
1924 0, /* tp_dictoffset */
1925 (initproc)element_init, /* tp_init */
1926 PyType_GenericAlloc, /* tp_alloc */
1927 element_new, /* tp_new */
1928 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001929};
1930
Eli Bendersky64d11e62012-06-15 07:42:50 +03001931/******************************* Element iterator ****************************/
1932
1933/* ElementIterObject represents the iteration state over an XML element in
1934 * pre-order traversal. To keep track of which sub-element should be returned
1935 * next, a stack of parents is maintained. This is a standard stack-based
1936 * iterative pre-order traversal of a tree.
1937 * The stack is managed using a single-linked list starting at parent_stack.
1938 * Each stack node contains the saved parent to which we should return after
1939 * the current one is exhausted, and the next child to examine in that parent.
1940 */
1941typedef struct ParentLocator_t {
1942 ElementObject *parent;
1943 Py_ssize_t child_index;
1944 struct ParentLocator_t *next;
1945} ParentLocator;
1946
1947typedef struct {
1948 PyObject_HEAD
1949 ParentLocator *parent_stack;
1950 ElementObject *root_element;
1951 PyObject *sought_tag;
1952 int root_done;
1953 int gettext;
1954} ElementIterObject;
1955
1956
1957static void
1958elementiter_dealloc(ElementIterObject *it)
1959{
1960 ParentLocator *p = it->parent_stack;
1961 while (p) {
1962 ParentLocator *temp = p;
1963 Py_XDECREF(p->parent);
1964 p = p->next;
1965 PyObject_Free(temp);
1966 }
1967
1968 Py_XDECREF(it->sought_tag);
1969 Py_XDECREF(it->root_element);
1970
1971 PyObject_GC_UnTrack(it);
1972 PyObject_GC_Del(it);
1973}
1974
1975static int
1976elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
1977{
1978 ParentLocator *p = it->parent_stack;
1979 while (p) {
1980 Py_VISIT(p->parent);
1981 p = p->next;
1982 }
1983
1984 Py_VISIT(it->root_element);
1985 Py_VISIT(it->sought_tag);
1986 return 0;
1987}
1988
1989/* Helper function for elementiter_next. Add a new parent to the parent stack.
1990 */
1991static ParentLocator *
1992parent_stack_push_new(ParentLocator *stack, ElementObject *parent)
1993{
1994 ParentLocator *new_node = PyObject_Malloc(sizeof(ParentLocator));
1995 if (new_node) {
1996 new_node->parent = parent;
1997 Py_INCREF(parent);
1998 new_node->child_index = 0;
1999 new_node->next = stack;
2000 }
2001 return new_node;
2002}
2003
2004static PyObject *
2005elementiter_next(ElementIterObject *it)
2006{
2007 /* Sub-element iterator.
Eli Bendersky45839902013-01-13 05:14:47 -08002008 *
Eli Bendersky64d11e62012-06-15 07:42:50 +03002009 * A short note on gettext: this function serves both the iter() and
2010 * itertext() methods to avoid code duplication. However, there are a few
2011 * small differences in the way these iterations work. Namely:
2012 * - itertext() only yields text from nodes that have it, and continues
2013 * iterating when a node doesn't have text (so it doesn't return any
2014 * node like iter())
2015 * - itertext() also has to handle tail, after finishing with all the
2016 * children of a node.
2017 */
Eli Bendersky113da642012-06-15 07:52:49 +03002018 ElementObject *cur_parent;
2019 Py_ssize_t child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002020
2021 while (1) {
2022 /* Handle the case reached in the beginning and end of iteration, where
2023 * the parent stack is empty. The root_done flag gives us indication
2024 * whether we've just started iterating (so root_done is 0), in which
2025 * case the root is returned. If root_done is 1 and we're here, the
2026 * iterator is exhausted.
2027 */
2028 if (!it->parent_stack->parent) {
2029 if (it->root_done) {
2030 PyErr_SetNone(PyExc_StopIteration);
2031 return NULL;
2032 } else {
2033 it->parent_stack = parent_stack_push_new(it->parent_stack,
2034 it->root_element);
2035 if (!it->parent_stack) {
2036 PyErr_NoMemory();
2037 return NULL;
2038 }
2039
2040 it->root_done = 1;
2041 if (it->sought_tag == Py_None ||
2042 PyObject_RichCompareBool(it->root_element->tag,
2043 it->sought_tag, Py_EQ) == 1) {
2044 if (it->gettext) {
Eli Benderskye6174ca2013-01-10 06:27:53 -08002045 PyObject *text = element_get_text(it->root_element);
2046 if (!text)
2047 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002048 if (PyObject_IsTrue(text)) {
2049 Py_INCREF(text);
2050 return text;
2051 }
2052 } else {
2053 Py_INCREF(it->root_element);
2054 return (PyObject *)it->root_element;
2055 }
2056 }
2057 }
2058 }
2059
2060 /* See if there are children left to traverse in the current parent. If
2061 * yes, visit the next child. If not, pop the stack and try again.
2062 */
Eli Bendersky113da642012-06-15 07:52:49 +03002063 cur_parent = it->parent_stack->parent;
2064 child_index = it->parent_stack->child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002065 if (cur_parent->extra && child_index < cur_parent->extra->length) {
2066 ElementObject *child = (ElementObject *)
2067 cur_parent->extra->children[child_index];
2068 it->parent_stack->child_index++;
2069 it->parent_stack = parent_stack_push_new(it->parent_stack,
2070 child);
2071 if (!it->parent_stack) {
2072 PyErr_NoMemory();
2073 return NULL;
2074 }
2075
2076 if (it->gettext) {
Eli Benderskye6174ca2013-01-10 06:27:53 -08002077 PyObject *text = element_get_text(child);
2078 if (!text)
2079 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002080 if (PyObject_IsTrue(text)) {
2081 Py_INCREF(text);
2082 return text;
2083 }
2084 } else if (it->sought_tag == Py_None ||
2085 PyObject_RichCompareBool(child->tag,
2086 it->sought_tag, Py_EQ) == 1) {
2087 Py_INCREF(child);
2088 return (PyObject *)child;
2089 }
2090 else
2091 continue;
2092 }
2093 else {
Eli Benderskye6174ca2013-01-10 06:27:53 -08002094 PyObject *tail;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002095 ParentLocator *next = it->parent_stack->next;
Eli Benderskye6174ca2013-01-10 06:27:53 -08002096 if (it->gettext) {
2097 tail = element_get_tail(cur_parent);
2098 if (!tail)
2099 return NULL;
2100 }
2101 else
2102 tail = Py_None;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002103 Py_XDECREF(it->parent_stack->parent);
2104 PyObject_Free(it->parent_stack);
2105 it->parent_stack = next;
2106
2107 /* Note that extra condition on it->parent_stack->parent here;
2108 * this is because itertext() is supposed to only return *inner*
2109 * text, not text following the element it began iteration with.
2110 */
2111 if (it->parent_stack->parent && PyObject_IsTrue(tail)) {
2112 Py_INCREF(tail);
2113 return tail;
2114 }
2115 }
2116 }
2117
2118 return NULL;
2119}
2120
2121
2122static PyTypeObject ElementIter_Type = {
2123 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002124 /* Using the module's name since the pure-Python implementation does not
2125 have such a type. */
Eli Bendersky64d11e62012-06-15 07:42:50 +03002126 "_elementtree._element_iterator", /* tp_name */
2127 sizeof(ElementIterObject), /* tp_basicsize */
2128 0, /* tp_itemsize */
2129 /* methods */
2130 (destructor)elementiter_dealloc, /* tp_dealloc */
2131 0, /* tp_print */
2132 0, /* tp_getattr */
2133 0, /* tp_setattr */
2134 0, /* tp_reserved */
2135 0, /* tp_repr */
2136 0, /* tp_as_number */
2137 0, /* tp_as_sequence */
2138 0, /* tp_as_mapping */
2139 0, /* tp_hash */
2140 0, /* tp_call */
2141 0, /* tp_str */
2142 0, /* tp_getattro */
2143 0, /* tp_setattro */
2144 0, /* tp_as_buffer */
2145 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2146 0, /* tp_doc */
2147 (traverseproc)elementiter_traverse, /* tp_traverse */
2148 0, /* tp_clear */
2149 0, /* tp_richcompare */
2150 0, /* tp_weaklistoffset */
2151 PyObject_SelfIter, /* tp_iter */
2152 (iternextfunc)elementiter_next, /* tp_iternext */
2153 0, /* tp_methods */
2154 0, /* tp_members */
2155 0, /* tp_getset */
2156 0, /* tp_base */
2157 0, /* tp_dict */
2158 0, /* tp_descr_get */
2159 0, /* tp_descr_set */
2160 0, /* tp_dictoffset */
2161 0, /* tp_init */
2162 0, /* tp_alloc */
2163 0, /* tp_new */
2164};
2165
2166
2167static PyObject *
2168create_elementiter(ElementObject *self, PyObject *tag, int gettext)
2169{
2170 ElementIterObject *it;
2171 PyObject *star = NULL;
2172
2173 it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
2174 if (!it)
2175 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002176
2177 if (PyUnicode_Check(tag))
2178 star = PyUnicode_FromString("*");
2179 else if (PyBytes_Check(tag))
2180 star = PyBytes_FromString("*");
2181
2182 if (star && PyObject_RichCompareBool(tag, star, Py_EQ) == 1)
2183 tag = Py_None;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002184 Py_XDECREF(star);
Victor Stinner4d463432013-07-11 23:05:03 +02002185
2186 Py_INCREF(tag);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002187 it->sought_tag = tag;
2188 it->root_done = 0;
2189 it->gettext = gettext;
Victor Stinner4d463432013-07-11 23:05:03 +02002190 Py_INCREF(self);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002191 it->root_element = self;
2192
Eli Bendersky64d11e62012-06-15 07:42:50 +03002193 PyObject_GC_Track(it);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002194
2195 it->parent_stack = PyObject_Malloc(sizeof(ParentLocator));
2196 if (it->parent_stack == NULL) {
2197 Py_DECREF(it);
2198 PyErr_NoMemory();
2199 return NULL;
2200 }
2201 it->parent_stack->parent = NULL;
2202 it->parent_stack->child_index = 0;
2203 it->parent_stack->next = NULL;
2204
Eli Bendersky64d11e62012-06-15 07:42:50 +03002205 return (PyObject *)it;
2206}
2207
2208
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002209/* ==================================================================== */
2210/* the tree builder type */
2211
2212typedef struct {
2213 PyObject_HEAD
2214
Eli Bendersky58d548d2012-05-29 15:45:16 +03002215 PyObject *root; /* root node (first created node) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002216
Antoine Pitrouee329312012-10-04 19:53:29 +02002217 PyObject *this; /* current node */
2218 PyObject *last; /* most recently created node */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002219
Eli Bendersky58d548d2012-05-29 15:45:16 +03002220 PyObject *data; /* data collector (string or list), or NULL */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002221
Eli Bendersky58d548d2012-05-29 15:45:16 +03002222 PyObject *stack; /* element stack */
2223 Py_ssize_t index; /* current stack size (0 means empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002224
Eli Bendersky48d358b2012-05-30 17:57:50 +03002225 PyObject *element_factory;
2226
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002227 /* element tracing */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002228 PyObject *events; /* list of events, or NULL if not collecting */
2229 PyObject *start_event_obj; /* event objects (NULL to ignore) */
2230 PyObject *end_event_obj;
2231 PyObject *start_ns_event_obj;
2232 PyObject *end_ns_event_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002233} TreeBuilderObject;
2234
Christian Heimes90aa7642007-12-19 02:45:37 +00002235#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002236
2237/* -------------------------------------------------------------------- */
2238/* constructor and destructor */
2239
Eli Bendersky58d548d2012-05-29 15:45:16 +03002240static PyObject *
2241treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002242{
Eli Bendersky58d548d2012-05-29 15:45:16 +03002243 TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
2244 if (t != NULL) {
2245 t->root = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002246
Eli Bendersky58d548d2012-05-29 15:45:16 +03002247 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002248 t->this = Py_None;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002249 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002250 t->last = Py_None;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002251
Eli Bendersky58d548d2012-05-29 15:45:16 +03002252 t->data = NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002253 t->element_factory = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002254 t->stack = PyList_New(20);
2255 if (!t->stack) {
2256 Py_DECREF(t->this);
2257 Py_DECREF(t->last);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002258 Py_DECREF((PyObject *) t);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002259 return NULL;
2260 }
2261 t->index = 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002262
Eli Bendersky58d548d2012-05-29 15:45:16 +03002263 t->events = NULL;
2264 t->start_event_obj = t->end_event_obj = NULL;
2265 t->start_ns_event_obj = t->end_ns_event_obj = NULL;
2266 }
2267 return (PyObject *)t;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002268}
2269
Eli Bendersky58d548d2012-05-29 15:45:16 +03002270static int
2271treebuilder_init(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002272{
Eli Benderskyc68e1362012-06-03 06:09:42 +03002273 static char *kwlist[] = {"element_factory", 0};
Eli Bendersky48d358b2012-05-30 17:57:50 +03002274 PyObject *element_factory = NULL;
2275 TreeBuilderObject *self_tb = (TreeBuilderObject *)self;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002276 PyObject *tmp;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002277
2278 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:TreeBuilder", kwlist,
2279 &element_factory)) {
2280 return -1;
2281 }
2282
2283 if (element_factory) {
2284 Py_INCREF(element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002285 tmp = self_tb->element_factory;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002286 self_tb->element_factory = element_factory;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002287 Py_XDECREF(tmp);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002288 }
2289
Eli Bendersky58d548d2012-05-29 15:45:16 +03002290 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002291}
2292
Eli Bendersky48d358b2012-05-30 17:57:50 +03002293static int
2294treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
2295{
2296 Py_VISIT(self->root);
2297 Py_VISIT(self->this);
2298 Py_VISIT(self->last);
2299 Py_VISIT(self->data);
2300 Py_VISIT(self->stack);
2301 Py_VISIT(self->element_factory);
2302 return 0;
2303}
2304
2305static int
2306treebuilder_gc_clear(TreeBuilderObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002307{
Antoine Pitrouc1948842012-10-01 23:40:37 +02002308 Py_CLEAR(self->end_ns_event_obj);
2309 Py_CLEAR(self->start_ns_event_obj);
2310 Py_CLEAR(self->end_event_obj);
2311 Py_CLEAR(self->start_event_obj);
2312 Py_CLEAR(self->events);
2313 Py_CLEAR(self->stack);
2314 Py_CLEAR(self->data);
2315 Py_CLEAR(self->last);
2316 Py_CLEAR(self->this);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002317 Py_CLEAR(self->element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002318 Py_CLEAR(self->root);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002319 return 0;
2320}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002321
Eli Bendersky48d358b2012-05-30 17:57:50 +03002322static void
2323treebuilder_dealloc(TreeBuilderObject *self)
2324{
2325 PyObject_GC_UnTrack(self);
2326 treebuilder_gc_clear(self);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002327 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002328}
2329
2330/* -------------------------------------------------------------------- */
Antoine Pitrouee329312012-10-04 19:53:29 +02002331/* helpers for handling of arbitrary element-like objects */
2332
2333static int
2334treebuilder_set_element_text_or_tail(PyObject *element, PyObject *data,
2335 PyObject **dest, _Py_Identifier *name)
2336{
2337 if (Element_CheckExact(element)) {
2338 Py_DECREF(JOIN_OBJ(*dest));
2339 *dest = JOIN_SET(data, PyList_CheckExact(data));
2340 return 0;
2341 }
2342 else {
2343 PyObject *joined = list_join(data);
2344 int r;
2345 if (joined == NULL)
2346 return -1;
2347 r = _PyObject_SetAttrId(element, name, joined);
2348 Py_DECREF(joined);
2349 return r;
2350 }
2351}
2352
2353/* These two functions steal a reference to data */
2354static int
2355treebuilder_set_element_text(PyObject *element, PyObject *data)
2356{
2357 _Py_IDENTIFIER(text);
2358 return treebuilder_set_element_text_or_tail(
2359 element, data, &((ElementObject *) element)->text, &PyId_text);
2360}
2361
2362static int
2363treebuilder_set_element_tail(PyObject *element, PyObject *data)
2364{
2365 _Py_IDENTIFIER(tail);
2366 return treebuilder_set_element_text_or_tail(
2367 element, data, &((ElementObject *) element)->tail, &PyId_tail);
2368}
2369
2370static int
2371treebuilder_add_subelement(PyObject *element, PyObject *child)
2372{
2373 _Py_IDENTIFIER(append);
2374 if (Element_CheckExact(element)) {
2375 ElementObject *elem = (ElementObject *) element;
2376 return element_add_subelement(elem, child);
2377 }
2378 else {
2379 PyObject *res;
2380 res = _PyObject_CallMethodId(element, &PyId_append, "O", child);
2381 if (res == NULL)
2382 return -1;
2383 Py_DECREF(res);
2384 return 0;
2385 }
2386}
2387
2388/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002389/* handlers */
2390
2391LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002392treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
2393 PyObject* attrib)
2394{
2395 PyObject* node;
2396 PyObject* this;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002397 elementtreestate *st = ET_STATE_GLOBAL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002398
2399 if (self->data) {
2400 if (self->this == self->last) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002401 if (treebuilder_set_element_text(self->last, self->data))
2402 return NULL;
2403 }
2404 else {
2405 if (treebuilder_set_element_tail(self->last, self->data))
2406 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002407 }
2408 self->data = NULL;
2409 }
2410
Eli Bendersky08231a92013-05-18 15:47:16 -07002411 if (self->element_factory && self->element_factory != Py_None) {
Eli Bendersky48d358b2012-05-30 17:57:50 +03002412 node = PyObject_CallFunction(self->element_factory, "OO", tag, attrib);
2413 } else {
2414 node = create_new_element(tag, attrib);
2415 }
2416 if (!node) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002417 return NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002418 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002419
Antoine Pitrouee329312012-10-04 19:53:29 +02002420 this = self->this;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002421
2422 if (this != Py_None) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002423 if (treebuilder_add_subelement(this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002424 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002425 } else {
2426 if (self->root) {
2427 PyErr_SetString(
Eli Bendersky532d03e2013-08-10 08:00:39 -07002428 st->parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002429 "multiple elements on top level"
2430 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002431 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002432 }
2433 Py_INCREF(node);
2434 self->root = node;
2435 }
2436
2437 if (self->index < PyList_GET_SIZE(self->stack)) {
2438 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002439 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002440 Py_INCREF(this);
2441 } else {
2442 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002443 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002444 }
2445 self->index++;
2446
2447 Py_DECREF(this);
2448 Py_INCREF(node);
Antoine Pitrouee329312012-10-04 19:53:29 +02002449 self->this = node;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002450
2451 Py_DECREF(self->last);
2452 Py_INCREF(node);
Antoine Pitrouee329312012-10-04 19:53:29 +02002453 self->last = node;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002454
2455 if (self->start_event_obj) {
2456 PyObject* res;
2457 PyObject* action = self->start_event_obj;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002458 res = PyTuple_Pack(2, action, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002459 if (res) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002460 PyList_Append(self->events, res);
2461 Py_DECREF(res);
2462 } else
2463 PyErr_Clear(); /* FIXME: propagate error */
2464 }
2465
2466 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002467
2468 error:
2469 Py_DECREF(node);
2470 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002471}
2472
2473LOCAL(PyObject*)
2474treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
2475{
2476 if (!self->data) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002477 if (self->last == Py_None) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00002478 /* ignore calls to data before the first call to start */
2479 Py_RETURN_NONE;
2480 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002481 /* store the first item as is */
2482 Py_INCREF(data); self->data = data;
2483 } else {
2484 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00002485 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
2486 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002487 /* XXX this code path unused in Python 3? */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002488 /* expat often generates single character data sections; handle
2489 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00002490 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
2491 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002492 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00002493 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002494 } else if (PyList_CheckExact(self->data)) {
2495 if (PyList_Append(self->data, data) < 0)
2496 return NULL;
2497 } else {
2498 PyObject* list = PyList_New(2);
2499 if (!list)
2500 return NULL;
2501 PyList_SET_ITEM(list, 0, self->data);
2502 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2503 self->data = list;
2504 }
2505 }
2506
2507 Py_RETURN_NONE;
2508}
2509
2510LOCAL(PyObject*)
2511treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2512{
2513 PyObject* item;
2514
2515 if (self->data) {
2516 if (self->this == self->last) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002517 if (treebuilder_set_element_text(self->last, self->data))
2518 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002519 } else {
Antoine Pitrouee329312012-10-04 19:53:29 +02002520 if (treebuilder_set_element_tail(self->last, self->data))
2521 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002522 }
2523 self->data = NULL;
2524 }
2525
2526 if (self->index == 0) {
2527 PyErr_SetString(
2528 PyExc_IndexError,
2529 "pop from empty stack"
2530 );
2531 return NULL;
2532 }
2533
2534 self->index--;
2535
2536 item = PyList_GET_ITEM(self->stack, self->index);
2537 Py_INCREF(item);
2538
2539 Py_DECREF(self->last);
2540
Antoine Pitrouee329312012-10-04 19:53:29 +02002541 self->last = self->this;
2542 self->this = item;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002543
2544 if (self->end_event_obj) {
2545 PyObject* res;
2546 PyObject* action = self->end_event_obj;
2547 PyObject* node = (PyObject*) self->last;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002548 res = PyTuple_Pack(2, action, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002549 if (res) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002550 PyList_Append(self->events, res);
2551 Py_DECREF(res);
2552 } else
2553 PyErr_Clear(); /* FIXME: propagate error */
2554 }
2555
2556 Py_INCREF(self->last);
2557 return (PyObject*) self->last;
2558}
2559
2560LOCAL(void)
2561treebuilder_handle_namespace(TreeBuilderObject* self, int start,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002562 PyObject *prefix, PyObject *uri)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002563{
2564 PyObject* res;
2565 PyObject* action;
2566 PyObject* parcel;
2567
2568 if (!self->events)
2569 return;
2570
2571 if (start) {
2572 if (!self->start_ns_event_obj)
2573 return;
2574 action = self->start_ns_event_obj;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002575 parcel = Py_BuildValue("OO", prefix, uri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002576 if (!parcel)
2577 return;
2578 Py_INCREF(action);
2579 } else {
2580 if (!self->end_ns_event_obj)
2581 return;
2582 action = self->end_ns_event_obj;
2583 Py_INCREF(action);
2584 parcel = Py_None;
2585 Py_INCREF(parcel);
2586 }
2587
2588 res = PyTuple_New(2);
2589
2590 if (res) {
2591 PyTuple_SET_ITEM(res, 0, action);
2592 PyTuple_SET_ITEM(res, 1, parcel);
2593 PyList_Append(self->events, res);
2594 Py_DECREF(res);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002595 }
2596 else {
2597 Py_DECREF(action);
2598 Py_DECREF(parcel);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002599 PyErr_Clear(); /* FIXME: propagate error */
Antoine Pitrouc1948842012-10-01 23:40:37 +02002600 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002601}
2602
2603/* -------------------------------------------------------------------- */
2604/* methods (in alphabetical order) */
2605
2606static PyObject*
2607treebuilder_data(TreeBuilderObject* self, PyObject* args)
2608{
2609 PyObject* data;
2610 if (!PyArg_ParseTuple(args, "O:data", &data))
2611 return NULL;
2612
2613 return treebuilder_handle_data(self, data);
2614}
2615
2616static PyObject*
2617treebuilder_end(TreeBuilderObject* self, PyObject* args)
2618{
2619 PyObject* tag;
2620 if (!PyArg_ParseTuple(args, "O:end", &tag))
2621 return NULL;
2622
2623 return treebuilder_handle_end(self, tag);
2624}
2625
2626LOCAL(PyObject*)
2627treebuilder_done(TreeBuilderObject* self)
2628{
2629 PyObject* res;
2630
2631 /* FIXME: check stack size? */
2632
2633 if (self->root)
2634 res = self->root;
2635 else
2636 res = Py_None;
2637
2638 Py_INCREF(res);
2639 return res;
2640}
2641
2642static PyObject*
2643treebuilder_close(TreeBuilderObject* self, PyObject* args)
2644{
2645 if (!PyArg_ParseTuple(args, ":close"))
2646 return NULL;
2647
2648 return treebuilder_done(self);
2649}
2650
2651static PyObject*
2652treebuilder_start(TreeBuilderObject* self, PyObject* args)
2653{
2654 PyObject* tag;
2655 PyObject* attrib = Py_None;
2656 if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib))
2657 return NULL;
2658
2659 return treebuilder_handle_start(self, tag, attrib);
2660}
2661
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002662static PyMethodDef treebuilder_methods[] = {
2663 {"data", (PyCFunction) treebuilder_data, METH_VARARGS},
2664 {"start", (PyCFunction) treebuilder_start, METH_VARARGS},
2665 {"end", (PyCFunction) treebuilder_end, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002666 {"close", (PyCFunction) treebuilder_close, METH_VARARGS},
2667 {NULL, NULL}
2668};
2669
Neal Norwitz227b5332006-03-22 09:28:35 +00002670static PyTypeObject TreeBuilder_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002671 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002672 "xml.etree.ElementTree.TreeBuilder", sizeof(TreeBuilderObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002673 /* methods */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002674 (destructor)treebuilder_dealloc, /* tp_dealloc */
2675 0, /* tp_print */
2676 0, /* tp_getattr */
2677 0, /* tp_setattr */
2678 0, /* tp_reserved */
2679 0, /* tp_repr */
2680 0, /* tp_as_number */
2681 0, /* tp_as_sequence */
2682 0, /* tp_as_mapping */
2683 0, /* tp_hash */
2684 0, /* tp_call */
2685 0, /* tp_str */
2686 0, /* tp_getattro */
2687 0, /* tp_setattro */
2688 0, /* tp_as_buffer */
Eli Bendersky48d358b2012-05-30 17:57:50 +03002689 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
2690 /* tp_flags */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002691 0, /* tp_doc */
Eli Bendersky48d358b2012-05-30 17:57:50 +03002692 (traverseproc)treebuilder_gc_traverse, /* tp_traverse */
2693 (inquiry)treebuilder_gc_clear, /* tp_clear */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002694 0, /* tp_richcompare */
2695 0, /* tp_weaklistoffset */
2696 0, /* tp_iter */
2697 0, /* tp_iternext */
2698 treebuilder_methods, /* tp_methods */
2699 0, /* tp_members */
2700 0, /* tp_getset */
2701 0, /* tp_base */
2702 0, /* tp_dict */
2703 0, /* tp_descr_get */
2704 0, /* tp_descr_set */
2705 0, /* tp_dictoffset */
2706 (initproc)treebuilder_init, /* tp_init */
2707 PyType_GenericAlloc, /* tp_alloc */
2708 treebuilder_new, /* tp_new */
2709 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002710};
2711
2712/* ==================================================================== */
2713/* the expat interface */
2714
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002715#include "expat.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002716#include "pyexpat.h"
Eli Bendersky532d03e2013-08-10 08:00:39 -07002717
2718/* The PyExpat_CAPI structure is an immutable dispatch table, so it can be
2719 * cached globally without being in per-module state.
2720 */
Eli Bendersky20d41742012-06-01 09:48:37 +03002721static struct PyExpat_CAPI *expat_capi;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002722#define EXPAT(func) (expat_capi->func)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002723
Eli Bendersky52467b12012-06-01 07:13:08 +03002724static XML_Memory_Handling_Suite ExpatMemoryHandler = {
2725 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
2726
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002727typedef struct {
2728 PyObject_HEAD
2729
2730 XML_Parser parser;
2731
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002732 PyObject *target;
2733 PyObject *entity;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002734
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002735 PyObject *names;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002736
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002737 PyObject *handle_start;
2738 PyObject *handle_data;
2739 PyObject *handle_end;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002740
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002741 PyObject *handle_comment;
2742 PyObject *handle_pi;
2743 PyObject *handle_doctype;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002744
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002745 PyObject *handle_close;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002746
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002747} XMLParserObject;
2748
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002749#define XMLParser_CheckExact(op) (Py_TYPE(op) == &XMLParser_Type)
2750
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002751/* helpers */
2752
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002753LOCAL(PyObject*)
2754makeuniversal(XMLParserObject* self, const char* string)
2755{
2756 /* convert a UTF-8 tag/attribute name from the expat parser
2757 to a universal name string */
2758
Antoine Pitrouc1948842012-10-01 23:40:37 +02002759 Py_ssize_t size = (Py_ssize_t) strlen(string);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002760 PyObject* key;
2761 PyObject* value;
2762
2763 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00002764 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002765 if (!key)
2766 return NULL;
2767
2768 value = PyDict_GetItem(self->names, key);
2769
2770 if (value) {
2771 Py_INCREF(value);
2772 } else {
2773 /* new name. convert to universal name, and decode as
2774 necessary */
2775
2776 PyObject* tag;
2777 char* p;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002778 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002779
2780 /* look for namespace separator */
2781 for (i = 0; i < size; i++)
2782 if (string[i] == '}')
2783 break;
2784 if (i != size) {
2785 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002786 tag = PyBytes_FromStringAndSize(NULL, size+1);
Victor Stinner71c8b7e2013-07-11 23:08:39 +02002787 if (tag == NULL) {
2788 Py_DECREF(key);
2789 return NULL;
2790 }
Christian Heimes72b710a2008-05-26 13:28:38 +00002791 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002792 p[0] = '{';
2793 memcpy(p+1, string, size);
2794 size++;
2795 } else {
2796 /* plain name; use key as tag */
2797 Py_INCREF(key);
2798 tag = key;
2799 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002800
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002801 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002802 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00002803 value = PyUnicode_DecodeUTF8(p, size, "strict");
2804 Py_DECREF(tag);
2805 if (!value) {
2806 Py_DECREF(key);
2807 return NULL;
2808 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002809
2810 /* add to names dictionary */
2811 if (PyDict_SetItem(self->names, key, value) < 0) {
2812 Py_DECREF(key);
2813 Py_DECREF(value);
2814 return NULL;
2815 }
2816 }
2817
2818 Py_DECREF(key);
2819 return value;
2820}
2821
Eli Bendersky5b77d812012-03-16 08:20:05 +02002822/* Set the ParseError exception with the given parameters.
2823 * If message is not NULL, it's used as the error string. Otherwise, the
2824 * message string is the default for the given error_code.
2825*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002826static void
Eli Bendersky5b77d812012-03-16 08:20:05 +02002827expat_set_error(enum XML_Error error_code, int line, int column, char *message)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002828{
Eli Bendersky5b77d812012-03-16 08:20:05 +02002829 PyObject *errmsg, *error, *position, *code;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002830 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002831
Victor Stinner499dfcf2011-03-21 13:26:24 +01002832 errmsg = PyUnicode_FromFormat("%s: line %d, column %d",
Eli Bendersky5b77d812012-03-16 08:20:05 +02002833 message ? message : EXPAT(ErrorString)(error_code),
2834 line, column);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002835 if (errmsg == NULL)
2836 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002837
Eli Bendersky532d03e2013-08-10 08:00:39 -07002838 error = PyObject_CallFunction(st->parseerror_obj, "O", errmsg);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002839 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002840 if (!error)
2841 return;
2842
Eli Bendersky5b77d812012-03-16 08:20:05 +02002843 /* Add code and position attributes */
2844 code = PyLong_FromLong((long)error_code);
2845 if (!code) {
2846 Py_DECREF(error);
2847 return;
2848 }
2849 if (PyObject_SetAttrString(error, "code", code) == -1) {
2850 Py_DECREF(error);
2851 Py_DECREF(code);
2852 return;
2853 }
2854 Py_DECREF(code);
2855
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002856 position = Py_BuildValue("(ii)", line, column);
2857 if (!position) {
2858 Py_DECREF(error);
2859 return;
2860 }
2861 if (PyObject_SetAttrString(error, "position", position) == -1) {
2862 Py_DECREF(error);
2863 Py_DECREF(position);
2864 return;
2865 }
2866 Py_DECREF(position);
2867
Eli Bendersky532d03e2013-08-10 08:00:39 -07002868 PyErr_SetObject(st->parseerror_obj, error);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002869 Py_DECREF(error);
2870}
2871
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002872/* -------------------------------------------------------------------- */
2873/* handlers */
2874
2875static void
2876expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2877 int data_len)
2878{
2879 PyObject* key;
2880 PyObject* value;
2881 PyObject* res;
2882
2883 if (data_len < 2 || data_in[0] != '&')
2884 return;
2885
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002886 if (PyErr_Occurred())
2887 return;
2888
Neal Norwitz0269b912007-08-08 06:56:02 +00002889 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002890 if (!key)
2891 return;
2892
2893 value = PyDict_GetItem(self->entity, key);
2894
2895 if (value) {
2896 if (TreeBuilder_CheckExact(self->target))
2897 res = treebuilder_handle_data(
2898 (TreeBuilderObject*) self->target, value
2899 );
2900 else if (self->handle_data)
2901 res = PyObject_CallFunction(self->handle_data, "O", value);
2902 else
2903 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002904 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002905 } else if (!PyErr_Occurred()) {
2906 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00002907 char message[128] = "undefined entity ";
2908 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002909 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02002910 XML_ERROR_UNDEFINED_ENTITY,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002911 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02002912 EXPAT(GetErrorColumnNumber)(self->parser),
2913 message
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002914 );
2915 }
2916
2917 Py_DECREF(key);
2918}
2919
2920static void
2921expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2922 const XML_Char **attrib_in)
2923{
2924 PyObject* res;
2925 PyObject* tag;
2926 PyObject* attrib;
2927 int ok;
2928
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002929 if (PyErr_Occurred())
2930 return;
2931
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002932 /* tag name */
2933 tag = makeuniversal(self, tag_in);
2934 if (!tag)
2935 return; /* parser will look for errors */
2936
2937 /* attributes */
2938 if (attrib_in[0]) {
2939 attrib = PyDict_New();
2940 if (!attrib)
2941 return;
2942 while (attrib_in[0] && attrib_in[1]) {
2943 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00002944 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002945 if (!key || !value) {
2946 Py_XDECREF(value);
2947 Py_XDECREF(key);
2948 Py_DECREF(attrib);
2949 return;
2950 }
2951 ok = PyDict_SetItem(attrib, key, value);
2952 Py_DECREF(value);
2953 Py_DECREF(key);
2954 if (ok < 0) {
2955 Py_DECREF(attrib);
2956 return;
2957 }
2958 attrib_in += 2;
2959 }
2960 } else {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002961 /* Pass an empty dictionary on */
Eli Bendersky48d358b2012-05-30 17:57:50 +03002962 attrib = PyDict_New();
2963 if (!attrib)
2964 return;
2965 }
2966
2967 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002968 /* shortcut */
2969 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2970 tag, attrib);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002971 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002972 else if (self->handle_start) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002973 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002974 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002975 res = NULL;
2976
2977 Py_DECREF(tag);
2978 Py_DECREF(attrib);
2979
2980 Py_XDECREF(res);
2981}
2982
2983static void
2984expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2985 int data_len)
2986{
2987 PyObject* data;
2988 PyObject* res;
2989
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002990 if (PyErr_Occurred())
2991 return;
2992
Neal Norwitz0269b912007-08-08 06:56:02 +00002993 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002994 if (!data)
2995 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002996
2997 if (TreeBuilder_CheckExact(self->target))
2998 /* shortcut */
2999 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
3000 else if (self->handle_data)
3001 res = PyObject_CallFunction(self->handle_data, "O", data);
3002 else
3003 res = NULL;
3004
3005 Py_DECREF(data);
3006
3007 Py_XDECREF(res);
3008}
3009
3010static void
3011expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
3012{
3013 PyObject* tag;
3014 PyObject* res = NULL;
3015
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003016 if (PyErr_Occurred())
3017 return;
3018
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003019 if (TreeBuilder_CheckExact(self->target))
3020 /* shortcut */
3021 /* the standard tree builder doesn't look at the end tag */
3022 res = treebuilder_handle_end(
3023 (TreeBuilderObject*) self->target, Py_None
3024 );
3025 else if (self->handle_end) {
3026 tag = makeuniversal(self, tag_in);
3027 if (tag) {
3028 res = PyObject_CallFunction(self->handle_end, "O", tag);
3029 Py_DECREF(tag);
3030 }
3031 }
3032
3033 Py_XDECREF(res);
3034}
3035
3036static void
3037expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
3038 const XML_Char *uri)
3039{
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003040 PyObject* sprefix = NULL;
3041 PyObject* suri = NULL;
3042
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003043 if (PyErr_Occurred())
3044 return;
3045
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003046 suri = PyUnicode_DecodeUTF8(uri, strlen(uri), "strict");
3047 if (!suri)
3048 return;
3049
3050 if (prefix)
3051 sprefix = PyUnicode_DecodeUTF8(prefix, strlen(prefix), "strict");
3052 else
3053 sprefix = PyUnicode_FromString("");
3054 if (!sprefix) {
3055 Py_DECREF(suri);
3056 return;
3057 }
3058
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003059 treebuilder_handle_namespace(
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003060 (TreeBuilderObject*) self->target, 1, sprefix, suri
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003061 );
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003062
3063 Py_DECREF(sprefix);
3064 Py_DECREF(suri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003065}
3066
3067static void
3068expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
3069{
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003070 if (PyErr_Occurred())
3071 return;
3072
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003073 treebuilder_handle_namespace(
3074 (TreeBuilderObject*) self->target, 0, NULL, NULL
3075 );
3076}
3077
3078static void
3079expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
3080{
3081 PyObject* comment;
3082 PyObject* res;
3083
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003084 if (PyErr_Occurred())
3085 return;
3086
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003087 if (self->handle_comment) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003088 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003089 if (comment) {
3090 res = PyObject_CallFunction(self->handle_comment, "O", comment);
3091 Py_XDECREF(res);
3092 Py_DECREF(comment);
3093 }
3094 }
3095}
3096
Eli Bendersky45839902013-01-13 05:14:47 -08003097static void
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003098expat_start_doctype_handler(XMLParserObject *self,
3099 const XML_Char *doctype_name,
3100 const XML_Char *sysid,
3101 const XML_Char *pubid,
3102 int has_internal_subset)
3103{
3104 PyObject *self_pyobj = (PyObject *)self;
3105 PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
3106 PyObject *parser_doctype = NULL;
3107 PyObject *res = NULL;
3108
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003109 if (PyErr_Occurred())
3110 return;
3111
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003112 doctype_name_obj = makeuniversal(self, doctype_name);
3113 if (!doctype_name_obj)
3114 return;
3115
3116 if (sysid) {
3117 sysid_obj = makeuniversal(self, sysid);
3118 if (!sysid_obj) {
3119 Py_DECREF(doctype_name_obj);
3120 return;
3121 }
3122 } else {
3123 Py_INCREF(Py_None);
3124 sysid_obj = Py_None;
3125 }
3126
3127 if (pubid) {
3128 pubid_obj = makeuniversal(self, pubid);
3129 if (!pubid_obj) {
3130 Py_DECREF(doctype_name_obj);
3131 Py_DECREF(sysid_obj);
3132 return;
3133 }
3134 } else {
3135 Py_INCREF(Py_None);
3136 pubid_obj = Py_None;
3137 }
3138
3139 /* If the target has a handler for doctype, call it. */
3140 if (self->handle_doctype) {
3141 res = PyObject_CallFunction(self->handle_doctype, "OOO",
3142 doctype_name_obj, pubid_obj, sysid_obj);
3143 Py_CLEAR(res);
3144 }
3145
3146 /* Now see if the parser itself has a doctype method. If yes and it's
3147 * a subclass, call it but warn about deprecation. If it's not a subclass
3148 * (i.e. vanilla XMLParser), do nothing.
3149 */
3150 parser_doctype = PyObject_GetAttrString(self_pyobj, "doctype");
3151 if (parser_doctype) {
3152 if (!XMLParser_CheckExact(self_pyobj)) {
3153 if (PyErr_WarnEx(PyExc_DeprecationWarning,
3154 "This method of XMLParser is deprecated. Define"
3155 " doctype() method on the TreeBuilder target.",
3156 1) < 0) {
3157 goto clear;
3158 }
3159 res = PyObject_CallFunction(parser_doctype, "OOO",
3160 doctype_name_obj, pubid_obj, sysid_obj);
3161 Py_CLEAR(res);
3162 }
3163 }
3164
3165clear:
3166 Py_XDECREF(parser_doctype);
3167 Py_DECREF(doctype_name_obj);
3168 Py_DECREF(pubid_obj);
3169 Py_DECREF(sysid_obj);
3170}
3171
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003172static void
3173expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
3174 const XML_Char* data_in)
3175{
3176 PyObject* target;
3177 PyObject* data;
3178 PyObject* res;
3179
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003180 if (PyErr_Occurred())
3181 return;
3182
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003183 if (self->handle_pi) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003184 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3185 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003186 if (target && data) {
3187 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
3188 Py_XDECREF(res);
3189 Py_DECREF(data);
3190 Py_DECREF(target);
3191 } else {
3192 Py_XDECREF(data);
3193 Py_XDECREF(target);
3194 }
3195 }
3196}
3197
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003198/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003199
Eli Bendersky52467b12012-06-01 07:13:08 +03003200static PyObject *
3201xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003202{
Eli Bendersky52467b12012-06-01 07:13:08 +03003203 XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
3204 if (self) {
3205 self->parser = NULL;
3206 self->target = self->entity = self->names = NULL;
3207 self->handle_start = self->handle_data = self->handle_end = NULL;
3208 self->handle_comment = self->handle_pi = self->handle_close = NULL;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003209 self->handle_doctype = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003210 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003211 return (PyObject *)self;
3212}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003213
Eli Bendersky52467b12012-06-01 07:13:08 +03003214static int
3215xmlparser_init(PyObject *self, PyObject *args, PyObject *kwds)
3216{
3217 XMLParserObject *self_xp = (XMLParserObject *)self;
3218 PyObject *target = NULL, *html = NULL;
3219 char *encoding = NULL;
Eli Benderskyc68e1362012-06-03 06:09:42 +03003220 static char *kwlist[] = {"html", "target", "encoding", 0};
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003221
Eli Bendersky52467b12012-06-01 07:13:08 +03003222 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|OOz:XMLParser", kwlist,
3223 &html, &target, &encoding)) {
3224 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003225 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003226
Eli Bendersky52467b12012-06-01 07:13:08 +03003227 self_xp->entity = PyDict_New();
3228 if (!self_xp->entity)
3229 return -1;
3230
3231 self_xp->names = PyDict_New();
3232 if (!self_xp->names) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02003233 Py_CLEAR(self_xp->entity);
Eli Bendersky52467b12012-06-01 07:13:08 +03003234 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003235 }
3236
Eli Bendersky52467b12012-06-01 07:13:08 +03003237 self_xp->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
3238 if (!self_xp->parser) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02003239 Py_CLEAR(self_xp->entity);
3240 Py_CLEAR(self_xp->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003241 PyErr_NoMemory();
Eli Bendersky52467b12012-06-01 07:13:08 +03003242 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003243 }
3244
Eli Bendersky52467b12012-06-01 07:13:08 +03003245 if (target) {
3246 Py_INCREF(target);
3247 } else {
Eli Bendersky58d548d2012-05-29 15:45:16 +03003248 target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003249 if (!target) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02003250 Py_CLEAR(self_xp->entity);
3251 Py_CLEAR(self_xp->names);
Eli Bendersky52467b12012-06-01 07:13:08 +03003252 EXPAT(ParserFree)(self_xp->parser);
3253 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003254 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003255 }
3256 self_xp->target = target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003257
Eli Bendersky52467b12012-06-01 07:13:08 +03003258 self_xp->handle_start = PyObject_GetAttrString(target, "start");
3259 self_xp->handle_data = PyObject_GetAttrString(target, "data");
3260 self_xp->handle_end = PyObject_GetAttrString(target, "end");
3261 self_xp->handle_comment = PyObject_GetAttrString(target, "comment");
3262 self_xp->handle_pi = PyObject_GetAttrString(target, "pi");
3263 self_xp->handle_close = PyObject_GetAttrString(target, "close");
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003264 self_xp->handle_doctype = PyObject_GetAttrString(target, "doctype");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003265
3266 PyErr_Clear();
Eli Bendersky45839902013-01-13 05:14:47 -08003267
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003268 /* configure parser */
Eli Bendersky52467b12012-06-01 07:13:08 +03003269 EXPAT(SetUserData)(self_xp->parser, self_xp);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003270 EXPAT(SetElementHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003271 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003272 (XML_StartElementHandler) expat_start_handler,
3273 (XML_EndElementHandler) expat_end_handler
3274 );
3275 EXPAT(SetDefaultHandlerExpand)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003276 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003277 (XML_DefaultHandler) expat_default_handler
3278 );
3279 EXPAT(SetCharacterDataHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003280 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003281 (XML_CharacterDataHandler) expat_data_handler
3282 );
Eli Bendersky52467b12012-06-01 07:13:08 +03003283 if (self_xp->handle_comment)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003284 EXPAT(SetCommentHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003285 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003286 (XML_CommentHandler) expat_comment_handler
3287 );
Eli Bendersky52467b12012-06-01 07:13:08 +03003288 if (self_xp->handle_pi)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003289 EXPAT(SetProcessingInstructionHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003290 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003291 (XML_ProcessingInstructionHandler) expat_pi_handler
3292 );
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003293 EXPAT(SetStartDoctypeDeclHandler)(
3294 self_xp->parser,
3295 (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
3296 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003297 EXPAT(SetUnknownEncodingHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003298 self_xp->parser,
Eli Bendersky6dc32b32013-05-25 05:25:48 -07003299 EXPAT(DefaultUnknownEncodingHandler), NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003300 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003301
Eli Bendersky52467b12012-06-01 07:13:08 +03003302 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003303}
3304
Eli Bendersky52467b12012-06-01 07:13:08 +03003305static int
3306xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
3307{
3308 Py_VISIT(self->handle_close);
3309 Py_VISIT(self->handle_pi);
3310 Py_VISIT(self->handle_comment);
3311 Py_VISIT(self->handle_end);
3312 Py_VISIT(self->handle_data);
3313 Py_VISIT(self->handle_start);
3314
3315 Py_VISIT(self->target);
3316 Py_VISIT(self->entity);
3317 Py_VISIT(self->names);
3318
3319 return 0;
3320}
3321
3322static int
3323xmlparser_gc_clear(XMLParserObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003324{
3325 EXPAT(ParserFree)(self->parser);
3326
Antoine Pitrouc1948842012-10-01 23:40:37 +02003327 Py_CLEAR(self->handle_close);
3328 Py_CLEAR(self->handle_pi);
3329 Py_CLEAR(self->handle_comment);
3330 Py_CLEAR(self->handle_end);
3331 Py_CLEAR(self->handle_data);
3332 Py_CLEAR(self->handle_start);
3333 Py_CLEAR(self->handle_doctype);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003334
Antoine Pitrouc1948842012-10-01 23:40:37 +02003335 Py_CLEAR(self->target);
3336 Py_CLEAR(self->entity);
3337 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003338
Eli Bendersky52467b12012-06-01 07:13:08 +03003339 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003340}
3341
Eli Bendersky52467b12012-06-01 07:13:08 +03003342static void
3343xmlparser_dealloc(XMLParserObject* self)
3344{
3345 PyObject_GC_UnTrack(self);
3346 xmlparser_gc_clear(self);
3347 Py_TYPE(self)->tp_free((PyObject *)self);
3348}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003349
3350LOCAL(PyObject*)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003351expat_parse(XMLParserObject* self, const char* data, int data_len, int final)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003352{
3353 int ok;
3354
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003355 assert(!PyErr_Occurred());
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003356 ok = EXPAT(Parse)(self->parser, data, data_len, final);
3357
3358 if (PyErr_Occurred())
3359 return NULL;
3360
3361 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003362 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02003363 EXPAT(GetErrorCode)(self->parser),
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003364 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02003365 EXPAT(GetErrorColumnNumber)(self->parser),
3366 NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003367 );
3368 return NULL;
3369 }
3370
3371 Py_RETURN_NONE;
3372}
3373
3374static PyObject*
3375xmlparser_close(XMLParserObject* self, PyObject* args)
3376{
3377 /* end feeding data to parser */
3378
3379 PyObject* res;
3380 if (!PyArg_ParseTuple(args, ":close"))
3381 return NULL;
3382
3383 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003384 if (!res)
3385 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003386
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003387 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003388 Py_DECREF(res);
3389 return treebuilder_done((TreeBuilderObject*) self->target);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003390 }
3391 else if (self->handle_close) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003392 Py_DECREF(res);
3393 return PyObject_CallFunction(self->handle_close, "");
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003394 }
3395 else {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003396 return res;
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003397 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003398}
3399
3400static PyObject*
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003401xmlparser_feed(XMLParserObject* self, PyObject* arg)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003402{
3403 /* feed data to parser */
3404
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003405 if (PyUnicode_Check(arg)) {
3406 Py_ssize_t data_len;
3407 const char *data = PyUnicode_AsUTF8AndSize(arg, &data_len);
3408 if (data == NULL)
3409 return NULL;
3410 if (data_len > INT_MAX) {
3411 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3412 return NULL;
3413 }
3414 /* Explicitly set UTF-8 encoding. Return code ignored. */
3415 (void)EXPAT(SetEncoding)(self->parser, "utf-8");
3416 return expat_parse(self, data, (int)data_len, 0);
3417 }
3418 else {
3419 Py_buffer view;
3420 PyObject *res;
3421 if (PyObject_GetBuffer(arg, &view, PyBUF_SIMPLE) < 0)
3422 return NULL;
3423 if (view.len > INT_MAX) {
3424 PyBuffer_Release(&view);
3425 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3426 return NULL;
3427 }
3428 res = expat_parse(self, view.buf, (int)view.len, 0);
3429 PyBuffer_Release(&view);
3430 return res;
3431 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003432}
3433
3434static PyObject*
Eli Benderskya3699232013-05-19 18:47:23 -07003435xmlparser_parse_whole(XMLParserObject* self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003436{
Eli Benderskya3699232013-05-19 18:47:23 -07003437 /* (internal) parse the whole input, until end of stream */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003438 PyObject* reader;
3439 PyObject* buffer;
Eli Benderskyf996e772012-03-16 05:53:30 +02003440 PyObject* temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003441 PyObject* res;
3442
3443 PyObject* fileobj;
3444 if (!PyArg_ParseTuple(args, "O:_parse", &fileobj))
3445 return NULL;
3446
3447 reader = PyObject_GetAttrString(fileobj, "read");
3448 if (!reader)
3449 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003450
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003451 /* read from open file object */
3452 for (;;) {
3453
3454 buffer = PyObject_CallFunction(reader, "i", 64*1024);
3455
3456 if (!buffer) {
3457 /* read failed (e.g. due to KeyboardInterrupt) */
3458 Py_DECREF(reader);
3459 return NULL;
3460 }
3461
Eli Benderskyf996e772012-03-16 05:53:30 +02003462 if (PyUnicode_CheckExact(buffer)) {
3463 /* A unicode object is encoded into bytes using UTF-8 */
3464 if (PyUnicode_GET_SIZE(buffer) == 0) {
3465 Py_DECREF(buffer);
3466 break;
3467 }
3468 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
Antoine Pitrouc1948842012-10-01 23:40:37 +02003469 Py_DECREF(buffer);
Eli Benderskyf996e772012-03-16 05:53:30 +02003470 if (!temp) {
3471 /* Propagate exception from PyUnicode_AsEncodedString */
Eli Benderskyf996e772012-03-16 05:53:30 +02003472 Py_DECREF(reader);
3473 return NULL;
3474 }
Eli Benderskyf996e772012-03-16 05:53:30 +02003475 buffer = temp;
3476 }
3477 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003478 Py_DECREF(buffer);
3479 break;
3480 }
3481
3482 res = expat_parse(
Christian Heimes72b710a2008-05-26 13:28:38 +00003483 self, PyBytes_AS_STRING(buffer), PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003484 );
3485
3486 Py_DECREF(buffer);
3487
3488 if (!res) {
3489 Py_DECREF(reader);
3490 return NULL;
3491 }
3492 Py_DECREF(res);
3493
3494 }
3495
3496 Py_DECREF(reader);
3497
3498 res = expat_parse(self, "", 0, 1);
3499
3500 if (res && TreeBuilder_CheckExact(self->target)) {
3501 Py_DECREF(res);
3502 return treebuilder_done((TreeBuilderObject*) self->target);
3503 }
3504
3505 return res;
3506}
3507
3508static PyObject*
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003509xmlparser_doctype(XMLParserObject *self, PyObject *args)
3510{
3511 Py_RETURN_NONE;
3512}
3513
3514static PyObject*
3515xmlparser_setevents(XMLParserObject *self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003516{
3517 /* activate element event reporting */
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003518 Py_ssize_t i, seqlen;
3519 TreeBuilderObject *target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003520
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003521 PyObject *events_queue;
3522 PyObject *events_to_report = Py_None;
3523 PyObject *events_seq;
3524 if (!PyArg_ParseTuple(args, "O!|O:_setevents", &PyList_Type, &events_queue,
3525 &events_to_report))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003526 return NULL;
3527
3528 if (!TreeBuilder_CheckExact(self->target)) {
3529 PyErr_SetString(
3530 PyExc_TypeError,
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003531 "event handling only supported for ElementTree.TreeBuilder "
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003532 "targets"
3533 );
3534 return NULL;
3535 }
3536
3537 target = (TreeBuilderObject*) self->target;
3538
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003539 Py_INCREF(events_queue);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003540 Py_XDECREF(target->events);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003541 target->events = events_queue;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003542
3543 /* clear out existing events */
Antoine Pitrouc1948842012-10-01 23:40:37 +02003544 Py_CLEAR(target->start_event_obj);
3545 Py_CLEAR(target->end_event_obj);
3546 Py_CLEAR(target->start_ns_event_obj);
3547 Py_CLEAR(target->end_ns_event_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003548
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003549 if (events_to_report == Py_None) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003550 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003551 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003552 Py_RETURN_NONE;
3553 }
3554
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003555 if (!(events_seq = PySequence_Fast(events_to_report,
3556 "events must be a sequence"))) {
3557 return NULL;
3558 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003559
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003560 seqlen = PySequence_Size(events_seq);
3561 for (i = 0; i < seqlen; ++i) {
3562 PyObject *event_name_obj = PySequence_Fast_GET_ITEM(events_seq, i);
3563 char *event_name = NULL;
3564 if (PyUnicode_Check(event_name_obj)) {
3565 event_name = _PyUnicode_AsString(event_name_obj);
3566 } else if (PyBytes_Check(event_name_obj)) {
3567 event_name = PyBytes_AS_STRING(event_name_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003568 }
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003569
3570 if (event_name == NULL) {
3571 Py_DECREF(events_seq);
3572 PyErr_Format(PyExc_ValueError, "invalid events sequence");
3573 return NULL;
3574 } else if (strcmp(event_name, "start") == 0) {
3575 Py_INCREF(event_name_obj);
3576 target->start_event_obj = event_name_obj;
3577 } else if (strcmp(event_name, "end") == 0) {
3578 Py_INCREF(event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003579 Py_XDECREF(target->end_event_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003580 target->end_event_obj = event_name_obj;
3581 } else if (strcmp(event_name, "start-ns") == 0) {
3582 Py_INCREF(event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003583 Py_XDECREF(target->start_ns_event_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003584 target->start_ns_event_obj = event_name_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003585 EXPAT(SetNamespaceDeclHandler)(
3586 self->parser,
3587 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3588 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3589 );
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003590 } else if (strcmp(event_name, "end-ns") == 0) {
3591 Py_INCREF(event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003592 Py_XDECREF(target->end_ns_event_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003593 target->end_ns_event_obj = event_name_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003594 EXPAT(SetNamespaceDeclHandler)(
3595 self->parser,
3596 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3597 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3598 );
3599 } else {
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003600 Py_DECREF(events_seq);
3601 PyErr_Format(PyExc_ValueError, "unknown event '%s'", event_name);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003602 return NULL;
3603 }
3604 }
3605
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003606 Py_DECREF(events_seq);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003607 Py_RETURN_NONE;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003608}
3609
3610static PyMethodDef xmlparser_methods[] = {
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003611 {"feed", (PyCFunction) xmlparser_feed, METH_O},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003612 {"close", (PyCFunction) xmlparser_close, METH_VARARGS},
Eli Benderskya3699232013-05-19 18:47:23 -07003613 {"_parse_whole", (PyCFunction) xmlparser_parse_whole, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003614 {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003615 {"doctype", (PyCFunction) xmlparser_doctype, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003616 {NULL, NULL}
3617};
3618
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003619static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003620xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003621{
Alexander Belopolskye239d232010-12-08 23:31:48 +00003622 if (PyUnicode_Check(nameobj)) {
3623 PyObject* res;
3624 if (PyUnicode_CompareWithASCIIString(nameobj, "entity") == 0)
3625 res = self->entity;
3626 else if (PyUnicode_CompareWithASCIIString(nameobj, "target") == 0)
3627 res = self->target;
3628 else if (PyUnicode_CompareWithASCIIString(nameobj, "version") == 0) {
3629 return PyUnicode_FromFormat(
3630 "Expat %d.%d.%d", XML_MAJOR_VERSION,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003631 XML_MINOR_VERSION, XML_MICRO_VERSION);
Alexander Belopolskye239d232010-12-08 23:31:48 +00003632 }
3633 else
3634 goto generic;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003635
Alexander Belopolskye239d232010-12-08 23:31:48 +00003636 Py_INCREF(res);
3637 return res;
3638 }
3639 generic:
3640 return PyObject_GenericGetAttr((PyObject*) self, nameobj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003641}
3642
Neal Norwitz227b5332006-03-22 09:28:35 +00003643static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003644 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08003645 "xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003646 /* methods */
Eli Bendersky52467b12012-06-01 07:13:08 +03003647 (destructor)xmlparser_dealloc, /* tp_dealloc */
3648 0, /* tp_print */
3649 0, /* tp_getattr */
3650 0, /* tp_setattr */
3651 0, /* tp_reserved */
3652 0, /* tp_repr */
3653 0, /* tp_as_number */
3654 0, /* tp_as_sequence */
3655 0, /* tp_as_mapping */
3656 0, /* tp_hash */
3657 0, /* tp_call */
3658 0, /* tp_str */
3659 (getattrofunc)xmlparser_getattro, /* tp_getattro */
3660 0, /* tp_setattro */
3661 0, /* tp_as_buffer */
3662 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3663 /* tp_flags */
3664 0, /* tp_doc */
3665 (traverseproc)xmlparser_gc_traverse, /* tp_traverse */
3666 (inquiry)xmlparser_gc_clear, /* tp_clear */
3667 0, /* tp_richcompare */
3668 0, /* tp_weaklistoffset */
3669 0, /* tp_iter */
3670 0, /* tp_iternext */
3671 xmlparser_methods, /* tp_methods */
3672 0, /* tp_members */
3673 0, /* tp_getset */
3674 0, /* tp_base */
3675 0, /* tp_dict */
3676 0, /* tp_descr_get */
3677 0, /* tp_descr_set */
3678 0, /* tp_dictoffset */
3679 (initproc)xmlparser_init, /* tp_init */
3680 PyType_GenericAlloc, /* tp_alloc */
3681 xmlparser_new, /* tp_new */
3682 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003683};
3684
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003685/* ==================================================================== */
3686/* python module interface */
3687
3688static PyMethodDef _functions[] = {
Eli Benderskya8736902013-01-05 06:26:39 -08003689 {"SubElement", (PyCFunction) subelement, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003690 {NULL, NULL}
3691};
3692
Martin v. Löwis1a214512008-06-11 05:26:20 +00003693
Eli Bendersky532d03e2013-08-10 08:00:39 -07003694static struct PyModuleDef elementtreemodule = {
3695 PyModuleDef_HEAD_INIT,
3696 "_elementtree",
3697 NULL,
3698 sizeof(elementtreestate),
3699 _functions,
3700 NULL,
3701 elementtree_traverse,
3702 elementtree_clear,
3703 elementtree_free
Martin v. Löwis1a214512008-06-11 05:26:20 +00003704};
3705
Neal Norwitzf6657e62006-12-28 04:47:50 +00003706PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00003707PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003708{
Eli Bendersky64d11e62012-06-15 07:42:50 +03003709 PyObject *m, *temp;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003710 elementtreestate *st;
3711
3712 m = PyState_FindModule(&elementtreemodule);
3713 if (m) {
3714 Py_INCREF(m);
3715 return m;
3716 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003717
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003718 /* Initialize object types */
Ronald Oussoren138d0802013-07-19 11:11:25 +02003719 if (PyType_Ready(&ElementIter_Type) < 0)
3720 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003721 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003722 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003723 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003724 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003725 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003726 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003727
Eli Bendersky532d03e2013-08-10 08:00:39 -07003728 m = PyModule_Create(&elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003729 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00003730 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003731 st = ET_STATE(m);
Martin v. Löwis1a214512008-06-11 05:26:20 +00003732
Eli Bendersky828efde2012-04-05 05:40:58 +03003733 if (!(temp = PyImport_ImportModule("copy")))
3734 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003735 st->deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
Eli Bendersky828efde2012-04-05 05:40:58 +03003736 Py_XDECREF(temp);
3737
Eli Bendersky532d03e2013-08-10 08:00:39 -07003738 if (!(st->elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
Eli Bendersky828efde2012-04-05 05:40:58 +03003739 return NULL;
3740
Eli Bendersky20d41742012-06-01 09:48:37 +03003741 /* link against pyexpat */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003742 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
3743 if (expat_capi) {
3744 /* check that it's usable */
3745 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
3746 expat_capi->size < sizeof(struct PyExpat_CAPI) ||
3747 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
3748 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
Eli Bendersky52467b12012-06-01 07:13:08 +03003749 expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
Eli Benderskyef391ac2012-07-21 20:28:46 +03003750 PyErr_SetString(PyExc_ImportError,
3751 "pyexpat version is incompatible");
3752 return NULL;
Eli Bendersky52467b12012-06-01 07:13:08 +03003753 }
Eli Benderskyef391ac2012-07-21 20:28:46 +03003754 } else {
Eli Bendersky52467b12012-06-01 07:13:08 +03003755 return NULL;
Eli Benderskyef391ac2012-07-21 20:28:46 +03003756 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003757
Eli Bendersky532d03e2013-08-10 08:00:39 -07003758 st->parseerror_obj = PyErr_NewException(
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003759 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003760 );
Eli Bendersky532d03e2013-08-10 08:00:39 -07003761 Py_INCREF(st->parseerror_obj);
3762 PyModule_AddObject(m, "ParseError", st->parseerror_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003763
Eli Bendersky092af1f2012-03-04 07:14:03 +02003764 Py_INCREF((PyObject *)&Element_Type);
3765 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
3766
Eli Bendersky58d548d2012-05-29 15:45:16 +03003767 Py_INCREF((PyObject *)&TreeBuilder_Type);
3768 PyModule_AddObject(m, "TreeBuilder", (PyObject *)&TreeBuilder_Type);
3769
Eli Bendersky52467b12012-06-01 07:13:08 +03003770 Py_INCREF((PyObject *)&XMLParser_Type);
3771 PyModule_AddObject(m, "XMLParser", (PyObject *)&XMLParser_Type);
Eli Bendersky52467b12012-06-01 07:13:08 +03003772
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003773 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003774}