blob: e886ae939fb0df236ab7fd2d9eb380e34be645c5 [file] [log] [blame]
Eli Benderskybf05df22013-04-20 05:44:01 -07001/*--------------------------------------------------------------------
2 * Licensed to PSF under a Contributor Agreement.
3 * See http://www.python.org/psf/license for licensing details.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
Eli Benderskybf05df22013-04-20 05:44:01 -07005 * _elementtree - C accelerator for xml.etree.ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00006 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
7 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00008 *
9 * info@pythonware.com
10 * http://www.pythonware.com
Eli Benderskybf05df22013-04-20 05:44:01 -070011 *--------------------------------------------------------------------
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000012 */
13
14#include "Python.h"
Eli Benderskyebf37a22012-04-03 22:02:37 +030015#include "structmember.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000016
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000017/* -------------------------------------------------------------------- */
18/* configuration */
19
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000020/* An element can hold this many children without extra memory
21 allocations. */
22#define STATIC_CHILDREN 4
23
24/* For best performance, chose a value so that 80-90% of all nodes
25 have no more than the given number of children. Set this to zero
26 to minimize the size of the element structure itself (this only
27 helps if you have lots of leaf nodes with attributes). */
28
29/* Also note that pymalloc always allocates blocks in multiples of
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010030 eight bytes. For the current C version of ElementTree, this means
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000031 that the number of children should be an even number, at least on
32 32-bit platforms. */
33
34/* -------------------------------------------------------------------- */
35
36#if 0
37static int memory = 0;
38#define ALLOC(size, comment)\
39do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
40#define RELEASE(size, comment)\
41do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
42#else
43#define ALLOC(size, comment)
44#define RELEASE(size, comment)
45#endif
46
47/* compiler tweaks */
48#if defined(_MSC_VER)
49#define LOCAL(type) static __inline type __fastcall
50#else
51#define LOCAL(type) static type
52#endif
53
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000054/* macros used to store 'join' flags in string object pointers. note
55 that all use of text and tail as object pointers must be wrapped in
56 JOIN_OBJ. see comments in the ElementObject definition for more
57 info. */
58#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
59#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
Antoine Pitrouca8aa4a2012-09-20 20:56:47 +020060#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~(Py_uintptr_t)1))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000061
Ronald Oussoren138d0802013-07-19 11:11:25 +020062/* Types defined by this extension */
63static PyTypeObject Element_Type;
64static PyTypeObject ElementIter_Type;
65static PyTypeObject TreeBuilder_Type;
66static PyTypeObject XMLParser_Type;
67
68
Eli Bendersky532d03e2013-08-10 08:00:39 -070069/* Per-module state; PEP 3121 */
70typedef struct {
71 PyObject *parseerror_obj;
72 PyObject *deepcopy_obj;
73 PyObject *elementpath_obj;
74} elementtreestate;
75
76static struct PyModuleDef elementtreemodule;
77
78/* Given a module object (assumed to be _elementtree), get its per-module
79 * state.
80 */
81#define ET_STATE(mod) ((elementtreestate *) PyModule_GetState(mod))
82
83/* Find the module instance imported in the currently running sub-interpreter
84 * and get its state.
85 */
86#define ET_STATE_GLOBAL \
87 ((elementtreestate *) PyModule_GetState(PyState_FindModule(&elementtreemodule)))
88
89static int
90elementtree_clear(PyObject *m)
91{
92 elementtreestate *st = ET_STATE(m);
93 Py_CLEAR(st->parseerror_obj);
94 Py_CLEAR(st->deepcopy_obj);
95 Py_CLEAR(st->elementpath_obj);
96 return 0;
97}
98
99static int
100elementtree_traverse(PyObject *m, visitproc visit, void *arg)
101{
102 elementtreestate *st = ET_STATE(m);
103 Py_VISIT(st->parseerror_obj);
104 Py_VISIT(st->deepcopy_obj);
105 Py_VISIT(st->elementpath_obj);
106 return 0;
107}
108
109static void
110elementtree_free(void *m)
111{
112 elementtree_clear((PyObject *)m);
113}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000114
115/* helpers */
116
117LOCAL(PyObject*)
118deepcopy(PyObject* object, PyObject* memo)
119{
120 /* do a deep copy of the given object */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000121 PyObject* args;
122 PyObject* result;
Eli Bendersky532d03e2013-08-10 08:00:39 -0700123 elementtreestate *st = ET_STATE_GLOBAL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000124
Eli Bendersky532d03e2013-08-10 08:00:39 -0700125 if (!st->deepcopy_obj) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000126 PyErr_SetString(
127 PyExc_RuntimeError,
128 "deepcopy helper not found"
129 );
130 return NULL;
131 }
132
Antoine Pitrouc1948842012-10-01 23:40:37 +0200133 args = PyTuple_Pack(2, object, memo);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000134 if (!args)
135 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -0700136 result = PyObject_CallObject(st->deepcopy_obj, args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000137 Py_DECREF(args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000138 return result;
139}
140
141LOCAL(PyObject*)
142list_join(PyObject* list)
143{
144 /* join list elements (destroying the list in the process) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000145 PyObject* joiner;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000146 PyObject* result;
147
Antoine Pitrouc1948842012-10-01 23:40:37 +0200148 joiner = PyUnicode_FromStringAndSize("", 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000149 if (!joiner)
150 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200151 result = PyUnicode_Join(joiner, list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000152 Py_DECREF(joiner);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200153 if (result)
154 Py_DECREF(list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000155 return result;
156}
157
Eli Bendersky48d358b2012-05-30 17:57:50 +0300158/* Is the given object an empty dictionary?
159*/
160static int
161is_empty_dict(PyObject *obj)
162{
163 return PyDict_CheckExact(obj) && PyDict_Size(obj) == 0;
164}
165
166
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000167/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200168/* the Element type */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000169
170typedef struct {
171
172 /* attributes (a dictionary object), or None if no attributes */
173 PyObject* attrib;
174
175 /* child elements */
176 int length; /* actual number of items */
177 int allocated; /* allocated items */
178
179 /* this either points to _children or to a malloced buffer */
180 PyObject* *children;
181
182 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100183
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000184} ElementObjectExtra;
185
186typedef struct {
187 PyObject_HEAD
188
189 /* element tag (a string). */
190 PyObject* tag;
191
192 /* text before first child. note that this is a tagged pointer;
193 use JOIN_OBJ to get the object pointer. the join flag is used
194 to distinguish lists created by the tree builder from lists
195 assigned to the attribute by application code; the former
196 should be joined before being returned to the user, the latter
197 should be left intact. */
198 PyObject* text;
199
200 /* text after this element, in parent. note that this is a tagged
201 pointer; use JOIN_OBJ to get the object pointer. */
202 PyObject* tail;
203
204 ElementObjectExtra* extra;
205
Eli Benderskyebf37a22012-04-03 22:02:37 +0300206 PyObject *weakreflist; /* For tp_weaklistoffset */
207
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000208} ElementObject;
209
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000210
Christian Heimes90aa7642007-12-19 02:45:37 +0000211#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000212
213/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200214/* Element constructors and destructor */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000215
216LOCAL(int)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200217create_extra(ElementObject* self, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000218{
219 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
Victor Stinner81aac732013-07-12 02:03:34 +0200220 if (!self->extra) {
221 PyErr_NoMemory();
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000222 return -1;
Victor Stinner81aac732013-07-12 02:03:34 +0200223 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000224
225 if (!attrib)
226 attrib = Py_None;
227
228 Py_INCREF(attrib);
229 self->extra->attrib = attrib;
230
231 self->extra->length = 0;
232 self->extra->allocated = STATIC_CHILDREN;
233 self->extra->children = self->extra->_children;
234
235 return 0;
236}
237
238LOCAL(void)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200239dealloc_extra(ElementObject* self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000240{
Eli Bendersky08b85292012-04-04 15:55:07 +0300241 ElementObjectExtra *myextra;
242 int i;
243
Eli Benderskyebf37a22012-04-03 22:02:37 +0300244 if (!self->extra)
245 return;
246
247 /* Avoid DECREFs calling into this code again (cycles, etc.)
248 */
Eli Bendersky08b85292012-04-04 15:55:07 +0300249 myextra = self->extra;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300250 self->extra = NULL;
251
252 Py_DECREF(myextra->attrib);
253
Eli Benderskyebf37a22012-04-03 22:02:37 +0300254 for (i = 0; i < myextra->length; i++)
255 Py_DECREF(myextra->children[i]);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000256
Eli Benderskyebf37a22012-04-03 22:02:37 +0300257 if (myextra->children != myextra->_children)
258 PyObject_Free(myextra->children);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000259
Eli Benderskyebf37a22012-04-03 22:02:37 +0300260 PyObject_Free(myextra);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000261}
262
Eli Bendersky092af1f2012-03-04 07:14:03 +0200263/* Convenience internal function to create new Element objects with the given
264 * tag and attributes.
265*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000266LOCAL(PyObject*)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200267create_new_element(PyObject* tag, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000268{
269 ElementObject* self;
270
Eli Bendersky0192ba32012-03-30 16:38:33 +0300271 self = PyObject_GC_New(ElementObject, &Element_Type);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000272 if (self == NULL)
273 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000274 self->extra = NULL;
275
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000276 Py_INCREF(tag);
277 self->tag = tag;
278
279 Py_INCREF(Py_None);
280 self->text = Py_None;
281
282 Py_INCREF(Py_None);
283 self->tail = Py_None;
284
Eli Benderskyebf37a22012-04-03 22:02:37 +0300285 self->weakreflist = NULL;
286
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200287 ALLOC(sizeof(ElementObject), "create element");
288 PyObject_GC_Track(self);
289
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200290 if (attrib != Py_None && !is_empty_dict(attrib)) {
291 if (create_extra(self, attrib) < 0) {
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200292 Py_DECREF(self);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200293 return NULL;
294 }
295 }
296
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000297 return (PyObject*) self;
298}
299
Eli Bendersky092af1f2012-03-04 07:14:03 +0200300static PyObject *
301element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
302{
303 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
304 if (e != NULL) {
305 Py_INCREF(Py_None);
306 e->tag = Py_None;
307
308 Py_INCREF(Py_None);
309 e->text = Py_None;
310
311 Py_INCREF(Py_None);
312 e->tail = Py_None;
313
314 e->extra = NULL;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300315 e->weakreflist = NULL;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200316 }
317 return (PyObject *)e;
318}
319
Eli Bendersky737b1732012-05-29 06:02:56 +0300320/* Helper function for extracting the attrib dictionary from a keywords dict.
321 * This is required by some constructors/functions in this module that can
Eli Bendersky45839902013-01-13 05:14:47 -0800322 * either accept attrib as a keyword argument or all attributes splashed
Eli Bendersky737b1732012-05-29 06:02:56 +0300323 * directly into *kwds.
Eli Benderskyd4cb4b72013-04-22 05:25:25 -0700324 *
325 * Return a dictionary with the content of kwds merged into the content of
326 * attrib. If there is no attrib keyword, return a copy of kwds.
Eli Bendersky737b1732012-05-29 06:02:56 +0300327 */
328static PyObject*
329get_attrib_from_keywords(PyObject *kwds)
330{
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700331 PyObject *attrib_str = PyUnicode_FromString("attrib");
332 PyObject *attrib = PyDict_GetItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300333
334 if (attrib) {
335 /* If attrib was found in kwds, copy its value and remove it from
336 * kwds
337 */
338 if (!PyDict_Check(attrib)) {
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700339 Py_DECREF(attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300340 PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
341 Py_TYPE(attrib)->tp_name);
342 return NULL;
343 }
344 attrib = PyDict_Copy(attrib);
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700345 PyDict_DelItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300346 } else {
347 attrib = PyDict_New();
348 }
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700349
350 Py_DECREF(attrib_str);
351
352 /* attrib can be NULL if PyDict_New failed */
353 if (attrib)
Christian Heimes7ed42942013-07-20 15:12:09 +0200354 if (PyDict_Update(attrib, kwds) < 0)
355 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300356 return attrib;
357}
358
Eli Bendersky092af1f2012-03-04 07:14:03 +0200359static int
360element_init(PyObject *self, PyObject *args, PyObject *kwds)
361{
362 PyObject *tag;
363 PyObject *tmp;
364 PyObject *attrib = NULL;
365 ElementObject *self_elem;
366
367 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
368 return -1;
369
Eli Bendersky737b1732012-05-29 06:02:56 +0300370 if (attrib) {
371 /* attrib passed as positional arg */
372 attrib = PyDict_Copy(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200373 if (!attrib)
374 return -1;
Eli Bendersky737b1732012-05-29 06:02:56 +0300375 if (kwds) {
376 if (PyDict_Update(attrib, kwds) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200377 Py_DECREF(attrib);
Eli Bendersky737b1732012-05-29 06:02:56 +0300378 return -1;
379 }
380 }
381 } else if (kwds) {
382 /* have keywords args */
383 attrib = get_attrib_from_keywords(kwds);
384 if (!attrib)
385 return -1;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200386 }
387
388 self_elem = (ElementObject *)self;
389
Antoine Pitrouc1948842012-10-01 23:40:37 +0200390 if (attrib != NULL && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200391 if (create_extra(self_elem, attrib) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200392 Py_DECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200393 return -1;
394 }
395 }
396
Eli Bendersky48d358b2012-05-30 17:57:50 +0300397 /* We own a reference to attrib here and it's no longer needed. */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200398 Py_XDECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200399
400 /* Replace the objects already pointed to by tag, text and tail. */
401 tmp = self_elem->tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200402 Py_INCREF(tag);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200403 self_elem->tag = tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200404 Py_DECREF(tmp);
405
406 tmp = self_elem->text;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200407 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200408 self_elem->text = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200409 Py_DECREF(JOIN_OBJ(tmp));
410
411 tmp = self_elem->tail;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200412 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200413 self_elem->tail = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200414 Py_DECREF(JOIN_OBJ(tmp));
415
416 return 0;
417}
418
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000419LOCAL(int)
420element_resize(ElementObject* self, int extra)
421{
422 int size;
423 PyObject* *children;
424
425 /* make sure self->children can hold the given number of extra
426 elements. set an exception and return -1 if allocation failed */
427
Victor Stinner5f0af232013-07-11 23:01:36 +0200428 if (!self->extra) {
429 if (create_extra(self, NULL) < 0)
430 return -1;
431 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000432
433 size = self->extra->length + extra;
434
435 if (size > self->extra->allocated) {
436 /* use Python 2.4's list growth strategy */
437 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000438 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100439 * which needs at least 4 bytes.
440 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000441 * be safe.
442 */
443 size = size ? size : 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000444 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000445 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100446 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000447 * false alarm always assume at least one child to be safe.
448 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000449 children = PyObject_Realloc(self->extra->children,
450 size * sizeof(PyObject*));
451 if (!children)
452 goto nomemory;
453 } else {
454 children = PyObject_Malloc(size * sizeof(PyObject*));
455 if (!children)
456 goto nomemory;
457 /* copy existing children from static area to malloc buffer */
458 memcpy(children, self->extra->children,
459 self->extra->length * sizeof(PyObject*));
460 }
461 self->extra->children = children;
462 self->extra->allocated = size;
463 }
464
465 return 0;
466
467 nomemory:
468 PyErr_NoMemory();
469 return -1;
470}
471
472LOCAL(int)
473element_add_subelement(ElementObject* self, PyObject* element)
474{
475 /* add a child element to a parent */
476
477 if (element_resize(self, 1) < 0)
478 return -1;
479
480 Py_INCREF(element);
481 self->extra->children[self->extra->length] = element;
482
483 self->extra->length++;
484
485 return 0;
486}
487
488LOCAL(PyObject*)
489element_get_attrib(ElementObject* self)
490{
491 /* return borrowed reference to attrib dictionary */
492 /* note: this function assumes that the extra section exists */
493
494 PyObject* res = self->extra->attrib;
495
496 if (res == Py_None) {
497 /* create missing dictionary */
498 res = PyDict_New();
499 if (!res)
500 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200501 Py_DECREF(Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000502 self->extra->attrib = res;
503 }
504
505 return res;
506}
507
508LOCAL(PyObject*)
509element_get_text(ElementObject* self)
510{
511 /* return borrowed reference to text attribute */
512
513 PyObject* res = self->text;
514
515 if (JOIN_GET(res)) {
516 res = JOIN_OBJ(res);
517 if (PyList_CheckExact(res)) {
518 res = list_join(res);
519 if (!res)
520 return NULL;
521 self->text = res;
522 }
523 }
524
525 return res;
526}
527
528LOCAL(PyObject*)
529element_get_tail(ElementObject* self)
530{
531 /* return borrowed reference to text attribute */
532
533 PyObject* res = self->tail;
534
535 if (JOIN_GET(res)) {
536 res = JOIN_OBJ(res);
537 if (PyList_CheckExact(res)) {
538 res = list_join(res);
539 if (!res)
540 return NULL;
541 self->tail = res;
542 }
543 }
544
545 return res;
546}
547
548static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300549subelement(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000550{
551 PyObject* elem;
552
553 ElementObject* parent;
554 PyObject* tag;
555 PyObject* attrib = NULL;
556 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
557 &Element_Type, &parent, &tag,
558 &PyDict_Type, &attrib))
559 return NULL;
560
Eli Bendersky737b1732012-05-29 06:02:56 +0300561 if (attrib) {
562 /* attrib passed as positional arg */
563 attrib = PyDict_Copy(attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000564 if (!attrib)
565 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300566 if (kwds) {
567 if (PyDict_Update(attrib, kwds) < 0) {
568 return NULL;
569 }
570 }
571 } else if (kwds) {
572 /* have keyword args */
573 attrib = get_attrib_from_keywords(kwds);
574 if (!attrib)
575 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000576 } else {
Eli Bendersky737b1732012-05-29 06:02:56 +0300577 /* no attrib arg, no kwds, so no attribute */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000578 Py_INCREF(Py_None);
579 attrib = Py_None;
580 }
581
Eli Bendersky092af1f2012-03-04 07:14:03 +0200582 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000583 Py_DECREF(attrib);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200584 if (elem == NULL)
585 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000586
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000587 if (element_add_subelement(parent, elem) < 0) {
588 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000589 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000590 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000591
592 return elem;
593}
594
Eli Bendersky0192ba32012-03-30 16:38:33 +0300595static int
596element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
597{
598 Py_VISIT(self->tag);
599 Py_VISIT(JOIN_OBJ(self->text));
600 Py_VISIT(JOIN_OBJ(self->tail));
601
602 if (self->extra) {
603 int i;
604 Py_VISIT(self->extra->attrib);
605
606 for (i = 0; i < self->extra->length; ++i)
607 Py_VISIT(self->extra->children[i]);
608 }
609 return 0;
610}
611
612static int
613element_gc_clear(ElementObject *self)
614{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300615 Py_CLEAR(self->tag);
Eli Benderskyebf37a22012-04-03 22:02:37 +0300616
617 /* The following is like Py_CLEAR for self->text and self->tail, but
618 * written explicitily because the real pointers hide behind access
619 * macros.
620 */
621 if (self->text) {
622 PyObject *tmp = JOIN_OBJ(self->text);
623 self->text = NULL;
624 Py_DECREF(tmp);
625 }
626
627 if (self->tail) {
628 PyObject *tmp = JOIN_OBJ(self->tail);
629 self->tail = NULL;
630 Py_DECREF(tmp);
631 }
Eli Bendersky0192ba32012-03-30 16:38:33 +0300632
633 /* After dropping all references from extra, it's no longer valid anyway,
Eli Benderskyebf37a22012-04-03 22:02:37 +0300634 * so fully deallocate it.
Eli Bendersky0192ba32012-03-30 16:38:33 +0300635 */
Eli Benderskyebf37a22012-04-03 22:02:37 +0300636 dealloc_extra(self);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300637 return 0;
638}
639
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000640static void
641element_dealloc(ElementObject* self)
642{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300643 PyObject_GC_UnTrack(self);
Eli Benderskyebf37a22012-04-03 22:02:37 +0300644
645 if (self->weakreflist != NULL)
646 PyObject_ClearWeakRefs((PyObject *) self);
647
Eli Bendersky0192ba32012-03-30 16:38:33 +0300648 /* element_gc_clear clears all references and deallocates extra
649 */
650 element_gc_clear(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000651
652 RELEASE(sizeof(ElementObject), "destroy element");
Eli Bendersky092af1f2012-03-04 07:14:03 +0200653 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000654}
655
656/* -------------------------------------------------------------------- */
657/* methods (in alphabetical order) */
658
659static PyObject*
660element_append(ElementObject* self, PyObject* args)
661{
662 PyObject* element;
663 if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element))
664 return NULL;
665
666 if (element_add_subelement(self, element) < 0)
667 return NULL;
668
669 Py_RETURN_NONE;
670}
671
672static PyObject*
Eli Bendersky0192ba32012-03-30 16:38:33 +0300673element_clearmethod(ElementObject* self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000674{
675 if (!PyArg_ParseTuple(args, ":clear"))
676 return NULL;
677
Eli Benderskyebf37a22012-04-03 22:02:37 +0300678 dealloc_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000679
680 Py_INCREF(Py_None);
681 Py_DECREF(JOIN_OBJ(self->text));
682 self->text = Py_None;
683
684 Py_INCREF(Py_None);
685 Py_DECREF(JOIN_OBJ(self->tail));
686 self->tail = Py_None;
687
688 Py_RETURN_NONE;
689}
690
691static PyObject*
692element_copy(ElementObject* self, PyObject* args)
693{
694 int i;
695 ElementObject* element;
696
697 if (!PyArg_ParseTuple(args, ":__copy__"))
698 return NULL;
699
Eli Bendersky092af1f2012-03-04 07:14:03 +0200700 element = (ElementObject*) create_new_element(
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000701 self->tag, (self->extra) ? self->extra->attrib : Py_None
702 );
703 if (!element)
704 return NULL;
705
706 Py_DECREF(JOIN_OBJ(element->text));
707 element->text = self->text;
708 Py_INCREF(JOIN_OBJ(element->text));
709
710 Py_DECREF(JOIN_OBJ(element->tail));
711 element->tail = self->tail;
712 Py_INCREF(JOIN_OBJ(element->tail));
713
714 if (self->extra) {
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100715
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000716 if (element_resize(element, self->extra->length) < 0) {
717 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000718 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000719 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000720
721 for (i = 0; i < self->extra->length; i++) {
722 Py_INCREF(self->extra->children[i]);
723 element->extra->children[i] = self->extra->children[i];
724 }
725
726 element->extra->length = self->extra->length;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100727
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000728 }
729
730 return (PyObject*) element;
731}
732
733static PyObject*
734element_deepcopy(ElementObject* self, PyObject* args)
735{
736 int i;
737 ElementObject* element;
738 PyObject* tag;
739 PyObject* attrib;
740 PyObject* text;
741 PyObject* tail;
742 PyObject* id;
743
744 PyObject* memo;
745 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
746 return NULL;
747
748 tag = deepcopy(self->tag, memo);
749 if (!tag)
750 return NULL;
751
752 if (self->extra) {
753 attrib = deepcopy(self->extra->attrib, memo);
754 if (!attrib) {
755 Py_DECREF(tag);
756 return NULL;
757 }
758 } else {
759 Py_INCREF(Py_None);
760 attrib = Py_None;
761 }
762
Eli Bendersky092af1f2012-03-04 07:14:03 +0200763 element = (ElementObject*) create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000764
765 Py_DECREF(tag);
766 Py_DECREF(attrib);
767
768 if (!element)
769 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100770
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000771 text = deepcopy(JOIN_OBJ(self->text), memo);
772 if (!text)
773 goto error;
774 Py_DECREF(element->text);
775 element->text = JOIN_SET(text, JOIN_GET(self->text));
776
777 tail = deepcopy(JOIN_OBJ(self->tail), memo);
778 if (!tail)
779 goto error;
780 Py_DECREF(element->tail);
781 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
782
783 if (self->extra) {
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100784
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000785 if (element_resize(element, self->extra->length) < 0)
786 goto error;
787
788 for (i = 0; i < self->extra->length; i++) {
789 PyObject* child = deepcopy(self->extra->children[i], memo);
790 if (!child) {
791 element->extra->length = i;
792 goto error;
793 }
794 element->extra->children[i] = child;
795 }
796
797 element->extra->length = self->extra->length;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100798
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000799 }
800
801 /* add object to memo dictionary (so deepcopy won't visit it again) */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200802 id = PyLong_FromSsize_t((Py_uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000803 if (!id)
804 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000805
806 i = PyDict_SetItem(memo, id, (PyObject*) element);
807
808 Py_DECREF(id);
809
810 if (i < 0)
811 goto error;
812
813 return (PyObject*) element;
814
815 error:
816 Py_DECREF(element);
817 return NULL;
818}
819
Martin v. Löwisbce16662012-06-17 10:41:22 +0200820static PyObject*
821element_sizeof(PyObject* _self, PyObject* args)
822{
823 ElementObject *self = (ElementObject*)_self;
824 Py_ssize_t result = sizeof(ElementObject);
825 if (self->extra) {
826 result += sizeof(ElementObjectExtra);
827 if (self->extra->children != self->extra->_children)
828 result += sizeof(PyObject*) * self->extra->allocated;
829 }
830 return PyLong_FromSsize_t(result);
831}
832
Eli Bendersky698bdb22013-01-10 06:01:06 -0800833/* dict keys for getstate/setstate. */
834#define PICKLED_TAG "tag"
835#define PICKLED_CHILDREN "_children"
836#define PICKLED_ATTRIB "attrib"
837#define PICKLED_TAIL "tail"
838#define PICKLED_TEXT "text"
839
840/* __getstate__ returns a fabricated instance dict as in the pure-Python
841 * Element implementation, for interoperability/interchangeability. This
842 * makes the pure-Python implementation details an API, but (a) there aren't
843 * any unnecessary structures there; and (b) it buys compatibility with 3.2
844 * pickles. See issue #16076.
845 */
846static PyObject *
847element_getstate(ElementObject *self)
848{
849 int i, noattrib;
850 PyObject *instancedict = NULL, *children;
851
852 /* Build a list of children. */
853 children = PyList_New(self->extra ? self->extra->length : 0);
854 if (!children)
855 return NULL;
856 for (i = 0; i < PyList_GET_SIZE(children); i++) {
857 PyObject *child = self->extra->children[i];
858 Py_INCREF(child);
859 PyList_SET_ITEM(children, i, child);
860 }
861
862 /* Construct the state object. */
863 noattrib = (self->extra == NULL || self->extra->attrib == Py_None);
864 if (noattrib)
865 instancedict = Py_BuildValue("{sOsOs{}sOsO}",
866 PICKLED_TAG, self->tag,
867 PICKLED_CHILDREN, children,
868 PICKLED_ATTRIB,
869 PICKLED_TEXT, self->text,
870 PICKLED_TAIL, self->tail);
871 else
872 instancedict = Py_BuildValue("{sOsOsOsOsO}",
873 PICKLED_TAG, self->tag,
874 PICKLED_CHILDREN, children,
875 PICKLED_ATTRIB, self->extra->attrib,
876 PICKLED_TEXT, self->text,
877 PICKLED_TAIL, self->tail);
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800878 if (instancedict) {
879 Py_DECREF(children);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800880 return instancedict;
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800881 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800882 else {
883 for (i = 0; i < PyList_GET_SIZE(children); i++)
884 Py_DECREF(PyList_GET_ITEM(children, i));
885 Py_DECREF(children);
886
887 return NULL;
888 }
889}
890
891static PyObject *
892element_setstate_from_attributes(ElementObject *self,
893 PyObject *tag,
894 PyObject *attrib,
895 PyObject *text,
896 PyObject *tail,
897 PyObject *children)
898{
899 Py_ssize_t i, nchildren;
900
901 if (!tag) {
902 PyErr_SetString(PyExc_TypeError, "tag may not be NULL");
903 return NULL;
904 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800905
906 Py_CLEAR(self->tag);
907 self->tag = tag;
908 Py_INCREF(self->tag);
909
910 Py_CLEAR(self->text);
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800911 self->text = text ? text : Py_None;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800912 Py_INCREF(self->text);
913
914 Py_CLEAR(self->tail);
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800915 self->tail = tail ? tail : Py_None;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800916 Py_INCREF(self->tail);
917
918 /* Handle ATTRIB and CHILDREN. */
919 if (!children && !attrib)
920 Py_RETURN_NONE;
921
922 /* Compute 'nchildren'. */
923 if (children) {
924 if (!PyList_Check(children)) {
925 PyErr_SetString(PyExc_TypeError, "'_children' is not a list");
926 return NULL;
927 }
928 nchildren = PyList_Size(children);
929 }
930 else {
931 nchildren = 0;
932 }
933
934 /* Allocate 'extra'. */
935 if (element_resize(self, nchildren)) {
936 return NULL;
937 }
938 assert(self->extra && self->extra->allocated >= nchildren);
939
940 /* Copy children */
941 for (i = 0; i < nchildren; i++) {
942 self->extra->children[i] = PyList_GET_ITEM(children, i);
943 Py_INCREF(self->extra->children[i]);
944 }
945
946 self->extra->length = nchildren;
947 self->extra->allocated = nchildren;
948
949 /* Stash attrib. */
950 if (attrib) {
951 Py_CLEAR(self->extra->attrib);
952 self->extra->attrib = attrib;
953 Py_INCREF(attrib);
954 }
955
956 Py_RETURN_NONE;
957}
958
959/* __setstate__ for Element instance from the Python implementation.
960 * 'state' should be the instance dict.
961 */
962static PyObject *
963element_setstate_from_Python(ElementObject *self, PyObject *state)
964{
965 static char *kwlist[] = {PICKLED_TAG, PICKLED_ATTRIB, PICKLED_TEXT,
966 PICKLED_TAIL, PICKLED_CHILDREN, 0};
967 PyObject *args;
968 PyObject *tag, *attrib, *text, *tail, *children;
Eli Bendersky799e3ed2013-01-12 05:42:38 -0800969 PyObject *retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800970
Eli Bendersky698bdb22013-01-10 06:01:06 -0800971 tag = attrib = text = tail = children = NULL;
972 args = PyTuple_New(0);
Eli Bendersky799e3ed2013-01-12 05:42:38 -0800973 if (!args)
Eli Bendersky698bdb22013-01-10 06:01:06 -0800974 return NULL;
Eli Bendersky799e3ed2013-01-12 05:42:38 -0800975
976 if (PyArg_ParseTupleAndKeywords(args, state, "|$OOOOO", kwlist, &tag,
977 &attrib, &text, &tail, &children))
978 retval = element_setstate_from_attributes(self, tag, attrib, text,
979 tail, children);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800980 else
Eli Bendersky799e3ed2013-01-12 05:42:38 -0800981 retval = NULL;
982
983 Py_DECREF(args);
984 return retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800985}
986
987static PyObject *
988element_setstate(ElementObject *self, PyObject *state)
989{
990 if (!PyDict_CheckExact(state)) {
991 PyErr_Format(PyExc_TypeError,
992 "Don't know how to unpickle \"%.200R\" as an Element",
993 state);
994 return NULL;
995 }
996 else
997 return element_setstate_from_Python(self, state);
998}
999
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001000LOCAL(int)
1001checkpath(PyObject* tag)
1002{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001003 Py_ssize_t i;
1004 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001005
1006 /* check if a tag contains an xpath character */
1007
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001008#define PATHCHAR(ch) \
1009 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001010
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001011 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001012 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
1013 void *data = PyUnicode_DATA(tag);
1014 unsigned int kind = PyUnicode_KIND(tag);
1015 for (i = 0; i < len; i++) {
1016 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1017 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001018 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001019 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001020 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001021 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001022 return 1;
1023 }
1024 return 0;
1025 }
Christian Heimes72b710a2008-05-26 13:28:38 +00001026 if (PyBytes_Check(tag)) {
1027 char *p = PyBytes_AS_STRING(tag);
1028 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001029 if (p[i] == '{')
1030 check = 0;
1031 else if (p[i] == '}')
1032 check = 1;
1033 else if (check && PATHCHAR(p[i]))
1034 return 1;
1035 }
1036 return 0;
1037 }
1038
1039 return 1; /* unknown type; might be path expression */
1040}
1041
1042static PyObject*
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001043element_extend(ElementObject* self, PyObject* args)
1044{
1045 PyObject* seq;
1046 Py_ssize_t i, seqlen = 0;
1047
1048 PyObject* seq_in;
1049 if (!PyArg_ParseTuple(args, "O:extend", &seq_in))
1050 return NULL;
1051
1052 seq = PySequence_Fast(seq_in, "");
1053 if (!seq) {
1054 PyErr_Format(
1055 PyExc_TypeError,
1056 "expected sequence, not \"%.200s\"", Py_TYPE(seq_in)->tp_name
1057 );
1058 return NULL;
1059 }
1060
1061 seqlen = PySequence_Size(seq);
1062 for (i = 0; i < seqlen; i++) {
1063 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001064 if (!PyObject_IsInstance(element, (PyObject *)&Element_Type)) {
1065 Py_DECREF(seq);
1066 PyErr_Format(
1067 PyExc_TypeError,
1068 "expected an Element, not \"%.200s\"",
1069 Py_TYPE(element)->tp_name);
1070 return NULL;
1071 }
1072
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001073 if (element_add_subelement(self, element) < 0) {
1074 Py_DECREF(seq);
1075 return NULL;
1076 }
1077 }
1078
1079 Py_DECREF(seq);
1080
1081 Py_RETURN_NONE;
1082}
1083
1084static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001085element_find(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001086{
1087 int i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001088 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001089 PyObject* namespaces = Py_None;
Eli Bendersky737b1732012-05-29 06:02:56 +03001090 static char *kwlist[] = {"path", "namespaces", 0};
Eli Bendersky532d03e2013-08-10 08:00:39 -07001091 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001092
Eli Bendersky737b1732012-05-29 06:02:56 +03001093 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:find", kwlist,
1094 &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001095 return NULL;
1096
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001097 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001098 _Py_IDENTIFIER(find);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001099 return _PyObject_CallMethodId(
Eli Bendersky532d03e2013-08-10 08:00:39 -07001100 st->elementpath_obj, &PyId_find, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001101 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001102 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001103
1104 if (!self->extra)
1105 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001106
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001107 for (i = 0; i < self->extra->length; i++) {
1108 PyObject* item = self->extra->children[i];
1109 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +00001110 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001111 Py_INCREF(item);
1112 return item;
1113 }
1114 }
1115
1116 Py_RETURN_NONE;
1117}
1118
1119static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001120element_findtext(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001121{
1122 int i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001123 PyObject* tag;
1124 PyObject* default_value = Py_None;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001125 PyObject* namespaces = Py_None;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001126 _Py_IDENTIFIER(findtext);
Eli Bendersky737b1732012-05-29 06:02:56 +03001127 static char *kwlist[] = {"path", "default", "namespaces", 0};
Eli Bendersky532d03e2013-08-10 08:00:39 -07001128 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001129
Eli Bendersky737b1732012-05-29 06:02:56 +03001130 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO:findtext", kwlist,
1131 &tag, &default_value, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001132 return NULL;
1133
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001134 if (checkpath(tag) || namespaces != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001135 return _PyObject_CallMethodId(
Eli Bendersky532d03e2013-08-10 08:00:39 -07001136 st->elementpath_obj, &PyId_findtext, "OOOO", self, tag, default_value, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001137 );
1138
1139 if (!self->extra) {
1140 Py_INCREF(default_value);
1141 return default_value;
1142 }
1143
1144 for (i = 0; i < self->extra->length; i++) {
1145 ElementObject* item = (ElementObject*) self->extra->children[i];
Mark Dickinson211c6252009-02-01 10:28:51 +00001146 if (Element_CheckExact(item) && (PyObject_RichCompareBool(item->tag, tag, Py_EQ) == 1)) {
1147
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001148 PyObject* text = element_get_text(item);
1149 if (text == Py_None)
Eli Bendersky25771b32013-01-13 05:26:07 -08001150 return PyUnicode_New(0, 0);
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001151 Py_XINCREF(text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001152 return text;
1153 }
1154 }
1155
1156 Py_INCREF(default_value);
1157 return default_value;
1158}
1159
1160static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001161element_findall(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001162{
1163 int i;
1164 PyObject* out;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001165 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001166 PyObject* namespaces = Py_None;
Eli Bendersky737b1732012-05-29 06:02:56 +03001167 static char *kwlist[] = {"path", "namespaces", 0};
Eli Bendersky532d03e2013-08-10 08:00:39 -07001168 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001169
Eli Bendersky737b1732012-05-29 06:02:56 +03001170 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:findall", kwlist,
1171 &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001172 return NULL;
1173
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001174 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001175 _Py_IDENTIFIER(findall);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001176 return _PyObject_CallMethodId(
Eli Bendersky532d03e2013-08-10 08:00:39 -07001177 st->elementpath_obj, &PyId_findall, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001178 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001179 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001180
1181 out = PyList_New(0);
1182 if (!out)
1183 return NULL;
1184
1185 if (!self->extra)
1186 return out;
1187
1188 for (i = 0; i < self->extra->length; i++) {
1189 PyObject* item = self->extra->children[i];
1190 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +00001191 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001192 if (PyList_Append(out, item) < 0) {
1193 Py_DECREF(out);
1194 return NULL;
1195 }
1196 }
1197 }
1198
1199 return out;
1200}
1201
1202static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001203element_iterfind(ElementObject *self, PyObject *args, PyObject *kwds)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001204{
1205 PyObject* tag;
1206 PyObject* namespaces = Py_None;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001207 _Py_IDENTIFIER(iterfind);
Eli Bendersky737b1732012-05-29 06:02:56 +03001208 static char *kwlist[] = {"path", "namespaces", 0};
Eli Bendersky532d03e2013-08-10 08:00:39 -07001209 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001210
Eli Bendersky737b1732012-05-29 06:02:56 +03001211 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:iterfind", kwlist,
1212 &tag, &namespaces))
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001213 return NULL;
1214
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001215 return _PyObject_CallMethodId(
Eli Bendersky532d03e2013-08-10 08:00:39 -07001216 st->elementpath_obj, &PyId_iterfind, "OOO", self, tag, namespaces
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001217 );
1218}
1219
1220static PyObject*
Eli Benderskya8736902013-01-05 06:26:39 -08001221element_get(ElementObject* self, PyObject* args, PyObject* kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001222{
1223 PyObject* value;
Eli Benderskya8736902013-01-05 06:26:39 -08001224 static char* kwlist[] = {"key", "default", 0};
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001225
1226 PyObject* key;
1227 PyObject* default_value = Py_None;
Eli Benderskya8736902013-01-05 06:26:39 -08001228
1229 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:get", kwlist, &key,
1230 &default_value))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001231 return NULL;
1232
1233 if (!self->extra || self->extra->attrib == Py_None)
1234 value = default_value;
1235 else {
1236 value = PyDict_GetItem(self->extra->attrib, key);
1237 if (!value)
1238 value = default_value;
1239 }
1240
1241 Py_INCREF(value);
1242 return value;
1243}
1244
1245static PyObject*
1246element_getchildren(ElementObject* self, PyObject* args)
1247{
1248 int i;
1249 PyObject* list;
1250
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001251 /* FIXME: report as deprecated? */
1252
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001253 if (!PyArg_ParseTuple(args, ":getchildren"))
1254 return NULL;
1255
1256 if (!self->extra)
1257 return PyList_New(0);
1258
1259 list = PyList_New(self->extra->length);
1260 if (!list)
1261 return NULL;
1262
1263 for (i = 0; i < self->extra->length; i++) {
1264 PyObject* item = self->extra->children[i];
1265 Py_INCREF(item);
1266 PyList_SET_ITEM(list, i, item);
1267 }
1268
1269 return list;
1270}
1271
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001272
Eli Bendersky64d11e62012-06-15 07:42:50 +03001273static PyObject *
1274create_elementiter(ElementObject *self, PyObject *tag, int gettext);
1275
1276
1277static PyObject *
Eli Benderskya8736902013-01-05 06:26:39 -08001278element_iter(ElementObject *self, PyObject *args, PyObject *kwds)
Eli Bendersky64d11e62012-06-15 07:42:50 +03001279{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001280 PyObject* tag = Py_None;
Eli Benderskya8736902013-01-05 06:26:39 -08001281 static char* kwlist[] = {"tag", 0};
1282
1283 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:iter", kwlist, &tag))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001284 return NULL;
1285
Eli Bendersky64d11e62012-06-15 07:42:50 +03001286 return create_elementiter(self, tag, 0);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001287}
1288
1289
1290static PyObject*
1291element_itertext(ElementObject* self, PyObject* args)
1292{
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001293 if (!PyArg_ParseTuple(args, ":itertext"))
1294 return NULL;
1295
Eli Bendersky64d11e62012-06-15 07:42:50 +03001296 return create_elementiter(self, Py_None, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001297}
1298
Eli Bendersky64d11e62012-06-15 07:42:50 +03001299
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001300static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001301element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001302{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001303 ElementObject* self = (ElementObject*) self_;
1304
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001305 if (!self->extra || index < 0 || index >= self->extra->length) {
1306 PyErr_SetString(
1307 PyExc_IndexError,
1308 "child index out of range"
1309 );
1310 return NULL;
1311 }
1312
1313 Py_INCREF(self->extra->children[index]);
1314 return self->extra->children[index];
1315}
1316
1317static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001318element_insert(ElementObject* self, PyObject* args)
1319{
1320 int i;
1321
1322 int index;
1323 PyObject* element;
1324 if (!PyArg_ParseTuple(args, "iO!:insert", &index,
1325 &Element_Type, &element))
1326 return NULL;
1327
Victor Stinner5f0af232013-07-11 23:01:36 +02001328 if (!self->extra) {
1329 if (create_extra(self, NULL) < 0)
1330 return NULL;
1331 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001332
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001333 if (index < 0) {
1334 index += self->extra->length;
1335 if (index < 0)
1336 index = 0;
1337 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001338 if (index > self->extra->length)
1339 index = self->extra->length;
1340
1341 if (element_resize(self, 1) < 0)
1342 return NULL;
1343
1344 for (i = self->extra->length; i > index; i--)
1345 self->extra->children[i] = self->extra->children[i-1];
1346
1347 Py_INCREF(element);
1348 self->extra->children[index] = element;
1349
1350 self->extra->length++;
1351
1352 Py_RETURN_NONE;
1353}
1354
1355static PyObject*
1356element_items(ElementObject* self, PyObject* args)
1357{
1358 if (!PyArg_ParseTuple(args, ":items"))
1359 return NULL;
1360
1361 if (!self->extra || self->extra->attrib == Py_None)
1362 return PyList_New(0);
1363
1364 return PyDict_Items(self->extra->attrib);
1365}
1366
1367static PyObject*
1368element_keys(ElementObject* self, PyObject* args)
1369{
1370 if (!PyArg_ParseTuple(args, ":keys"))
1371 return NULL;
1372
1373 if (!self->extra || self->extra->attrib == Py_None)
1374 return PyList_New(0);
1375
1376 return PyDict_Keys(self->extra->attrib);
1377}
1378
Martin v. Löwis18e16552006-02-15 17:27:45 +00001379static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001380element_length(ElementObject* self)
1381{
1382 if (!self->extra)
1383 return 0;
1384
1385 return self->extra->length;
1386}
1387
1388static PyObject*
1389element_makeelement(PyObject* self, PyObject* args, PyObject* kw)
1390{
1391 PyObject* elem;
1392
1393 PyObject* tag;
1394 PyObject* attrib;
1395 if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib))
1396 return NULL;
1397
1398 attrib = PyDict_Copy(attrib);
1399 if (!attrib)
1400 return NULL;
1401
Eli Bendersky092af1f2012-03-04 07:14:03 +02001402 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001403
1404 Py_DECREF(attrib);
1405
1406 return elem;
1407}
1408
1409static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001410element_remove(ElementObject* self, PyObject* args)
1411{
1412 int i;
1413
1414 PyObject* element;
1415 if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element))
1416 return NULL;
1417
1418 if (!self->extra) {
1419 /* element has no children, so raise exception */
1420 PyErr_SetString(
1421 PyExc_ValueError,
1422 "list.remove(x): x not in list"
1423 );
1424 return NULL;
1425 }
1426
1427 for (i = 0; i < self->extra->length; i++) {
1428 if (self->extra->children[i] == element)
1429 break;
Mark Dickinson211c6252009-02-01 10:28:51 +00001430 if (PyObject_RichCompareBool(self->extra->children[i], element, Py_EQ) == 1)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001431 break;
1432 }
1433
1434 if (i == self->extra->length) {
1435 /* element is not in children, so raise exception */
1436 PyErr_SetString(
1437 PyExc_ValueError,
1438 "list.remove(x): x not in list"
1439 );
1440 return NULL;
1441 }
1442
1443 Py_DECREF(self->extra->children[i]);
1444
1445 self->extra->length--;
1446
1447 for (; i < self->extra->length; i++)
1448 self->extra->children[i] = self->extra->children[i+1];
1449
1450 Py_RETURN_NONE;
1451}
1452
1453static PyObject*
1454element_repr(ElementObject* self)
1455{
Eli Bendersky092af1f2012-03-04 07:14:03 +02001456 if (self->tag)
1457 return PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1458 else
1459 return PyUnicode_FromFormat("<Element at %p>", self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001460}
1461
1462static PyObject*
1463element_set(ElementObject* self, PyObject* args)
1464{
1465 PyObject* attrib;
1466
1467 PyObject* key;
1468 PyObject* value;
1469 if (!PyArg_ParseTuple(args, "OO:set", &key, &value))
1470 return NULL;
1471
Victor Stinner5f0af232013-07-11 23:01:36 +02001472 if (!self->extra) {
1473 if (create_extra(self, NULL) < 0)
1474 return NULL;
1475 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001476
1477 attrib = element_get_attrib(self);
1478 if (!attrib)
1479 return NULL;
1480
1481 if (PyDict_SetItem(attrib, key, value) < 0)
1482 return NULL;
1483
1484 Py_RETURN_NONE;
1485}
1486
1487static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001488element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001489{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001490 ElementObject* self = (ElementObject*) self_;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001491 int i;
1492 PyObject* old;
1493
1494 if (!self->extra || index < 0 || index >= self->extra->length) {
1495 PyErr_SetString(
1496 PyExc_IndexError,
1497 "child assignment index out of range");
1498 return -1;
1499 }
1500
1501 old = self->extra->children[index];
1502
1503 if (item) {
1504 Py_INCREF(item);
1505 self->extra->children[index] = item;
1506 } else {
1507 self->extra->length--;
1508 for (i = index; i < self->extra->length; i++)
1509 self->extra->children[i] = self->extra->children[i+1];
1510 }
1511
1512 Py_DECREF(old);
1513
1514 return 0;
1515}
1516
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001517static PyObject*
1518element_subscr(PyObject* self_, PyObject* item)
1519{
1520 ElementObject* self = (ElementObject*) self_;
1521
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001522 if (PyIndex_Check(item)) {
1523 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001524
1525 if (i == -1 && PyErr_Occurred()) {
1526 return NULL;
1527 }
1528 if (i < 0 && self->extra)
1529 i += self->extra->length;
1530 return element_getitem(self_, i);
1531 }
1532 else if (PySlice_Check(item)) {
1533 Py_ssize_t start, stop, step, slicelen, cur, i;
1534 PyObject* list;
1535
1536 if (!self->extra)
1537 return PyList_New(0);
1538
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001539 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001540 self->extra->length,
1541 &start, &stop, &step, &slicelen) < 0) {
1542 return NULL;
1543 }
1544
1545 if (slicelen <= 0)
1546 return PyList_New(0);
1547 else {
1548 list = PyList_New(slicelen);
1549 if (!list)
1550 return NULL;
1551
1552 for (cur = start, i = 0; i < slicelen;
1553 cur += step, i++) {
1554 PyObject* item = self->extra->children[cur];
1555 Py_INCREF(item);
1556 PyList_SET_ITEM(list, i, item);
1557 }
1558
1559 return list;
1560 }
1561 }
1562 else {
1563 PyErr_SetString(PyExc_TypeError,
1564 "element indices must be integers");
1565 return NULL;
1566 }
1567}
1568
1569static int
1570element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1571{
1572 ElementObject* self = (ElementObject*) self_;
1573
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001574 if (PyIndex_Check(item)) {
1575 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001576
1577 if (i == -1 && PyErr_Occurred()) {
1578 return -1;
1579 }
1580 if (i < 0 && self->extra)
1581 i += self->extra->length;
1582 return element_setitem(self_, i, value);
1583 }
1584 else if (PySlice_Check(item)) {
1585 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1586
1587 PyObject* recycle = NULL;
1588 PyObject* seq = NULL;
1589
Victor Stinner5f0af232013-07-11 23:01:36 +02001590 if (!self->extra) {
1591 if (create_extra(self, NULL) < 0)
1592 return -1;
1593 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001594
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001595 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001596 self->extra->length,
1597 &start, &stop, &step, &slicelen) < 0) {
1598 return -1;
1599 }
1600
Eli Bendersky865756a2012-03-09 13:38:15 +02001601 if (value == NULL) {
1602 /* Delete slice */
1603 size_t cur;
1604 Py_ssize_t i;
1605
1606 if (slicelen <= 0)
1607 return 0;
1608
1609 /* Since we're deleting, the direction of the range doesn't matter,
1610 * so for simplicity make it always ascending.
1611 */
1612 if (step < 0) {
1613 stop = start + 1;
1614 start = stop + step * (slicelen - 1) - 1;
1615 step = -step;
1616 }
1617
1618 assert((size_t)slicelen <= PY_SIZE_MAX / sizeof(PyObject *));
1619
1620 /* recycle is a list that will contain all the children
1621 * scheduled for removal.
1622 */
1623 if (!(recycle = PyList_New(slicelen))) {
1624 PyErr_NoMemory();
1625 return -1;
1626 }
1627
1628 /* This loop walks over all the children that have to be deleted,
1629 * with cur pointing at them. num_moved is the amount of children
1630 * until the next deleted child that have to be "shifted down" to
1631 * occupy the deleted's places.
1632 * Note that in the ith iteration, shifting is done i+i places down
1633 * because i children were already removed.
1634 */
1635 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1636 /* Compute how many children have to be moved, clipping at the
1637 * list end.
1638 */
1639 Py_ssize_t num_moved = step - 1;
1640 if (cur + step >= (size_t)self->extra->length) {
1641 num_moved = self->extra->length - cur - 1;
1642 }
1643
1644 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1645
1646 memmove(
1647 self->extra->children + cur - i,
1648 self->extra->children + cur + 1,
1649 num_moved * sizeof(PyObject *));
1650 }
1651
1652 /* Leftover "tail" after the last removed child */
1653 cur = start + (size_t)slicelen * step;
1654 if (cur < (size_t)self->extra->length) {
1655 memmove(
1656 self->extra->children + cur - slicelen,
1657 self->extra->children + cur,
1658 (self->extra->length - cur) * sizeof(PyObject *));
1659 }
1660
1661 self->extra->length -= slicelen;
1662
1663 /* Discard the recycle list with all the deleted sub-elements */
1664 Py_XDECREF(recycle);
1665 return 0;
1666 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001667 else {
Eli Bendersky865756a2012-03-09 13:38:15 +02001668 /* A new slice is actually being assigned */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001669 seq = PySequence_Fast(value, "");
1670 if (!seq) {
1671 PyErr_Format(
1672 PyExc_TypeError,
1673 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1674 );
1675 return -1;
1676 }
1677 newlen = PySequence_Size(seq);
1678 }
1679
1680 if (step != 1 && newlen != slicelen)
1681 {
1682 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001683 "attempt to assign sequence of size %zd "
1684 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001685 newlen, slicelen
1686 );
1687 return -1;
1688 }
1689
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001690 /* Resize before creating the recycle bin, to prevent refleaks. */
1691 if (newlen > slicelen) {
1692 if (element_resize(self, newlen - slicelen) < 0) {
1693 if (seq) {
1694 Py_DECREF(seq);
1695 }
1696 return -1;
1697 }
1698 }
1699
1700 if (slicelen > 0) {
1701 /* to avoid recursive calls to this method (via decref), move
1702 old items to the recycle bin here, and get rid of them when
1703 we're done modifying the element */
1704 recycle = PyList_New(slicelen);
1705 if (!recycle) {
1706 if (seq) {
1707 Py_DECREF(seq);
1708 }
1709 return -1;
1710 }
1711 for (cur = start, i = 0; i < slicelen;
1712 cur += step, i++)
1713 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1714 }
1715
1716 if (newlen < slicelen) {
1717 /* delete slice */
1718 for (i = stop; i < self->extra->length; i++)
1719 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1720 } else if (newlen > slicelen) {
1721 /* insert slice */
1722 for (i = self->extra->length-1; i >= stop; i--)
1723 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1724 }
1725
1726 /* replace the slice */
1727 for (cur = start, i = 0; i < newlen;
1728 cur += step, i++) {
1729 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1730 Py_INCREF(element);
1731 self->extra->children[cur] = element;
1732 }
1733
1734 self->extra->length += newlen - slicelen;
1735
1736 if (seq) {
1737 Py_DECREF(seq);
1738 }
1739
1740 /* discard the recycle bin, and everything in it */
1741 Py_XDECREF(recycle);
1742
1743 return 0;
1744 }
1745 else {
1746 PyErr_SetString(PyExc_TypeError,
1747 "element indices must be integers");
1748 return -1;
1749 }
1750}
1751
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001752static PyMethodDef element_methods[] = {
1753
Eli Bendersky0192ba32012-03-30 16:38:33 +03001754 {"clear", (PyCFunction) element_clearmethod, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001755
Eli Benderskya8736902013-01-05 06:26:39 -08001756 {"get", (PyCFunction) element_get, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001757 {"set", (PyCFunction) element_set, METH_VARARGS},
1758
Eli Bendersky737b1732012-05-29 06:02:56 +03001759 {"find", (PyCFunction) element_find, METH_VARARGS | METH_KEYWORDS},
1760 {"findtext", (PyCFunction) element_findtext, METH_VARARGS | METH_KEYWORDS},
1761 {"findall", (PyCFunction) element_findall, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001762
1763 {"append", (PyCFunction) element_append, METH_VARARGS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001764 {"extend", (PyCFunction) element_extend, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001765 {"insert", (PyCFunction) element_insert, METH_VARARGS},
1766 {"remove", (PyCFunction) element_remove, METH_VARARGS},
1767
Eli Benderskya8736902013-01-05 06:26:39 -08001768 {"iter", (PyCFunction) element_iter, METH_VARARGS | METH_KEYWORDS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001769 {"itertext", (PyCFunction) element_itertext, METH_VARARGS},
Eli Bendersky737b1732012-05-29 06:02:56 +03001770 {"iterfind", (PyCFunction) element_iterfind, METH_VARARGS | METH_KEYWORDS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001771
Eli Benderskya8736902013-01-05 06:26:39 -08001772 {"getiterator", (PyCFunction) element_iter, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001773 {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS},
1774
1775 {"items", (PyCFunction) element_items, METH_VARARGS},
1776 {"keys", (PyCFunction) element_keys, METH_VARARGS},
1777
1778 {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS},
1779
1780 {"__copy__", (PyCFunction) element_copy, METH_VARARGS},
1781 {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS},
Martin v. Löwisbce16662012-06-17 10:41:22 +02001782 {"__sizeof__", element_sizeof, METH_NOARGS},
Eli Bendersky698bdb22013-01-10 06:01:06 -08001783 {"__getstate__", (PyCFunction)element_getstate, METH_NOARGS},
1784 {"__setstate__", (PyCFunction)element_setstate, METH_O},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001785
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001786 {NULL, NULL}
1787};
1788
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001789static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001790element_getattro(ElementObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001791{
1792 PyObject* res;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001793 char *name = "";
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001794
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001795 if (PyUnicode_Check(nameobj))
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001796 name = _PyUnicode_AsString(nameobj);
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001797
Alexander Belopolskye239d232010-12-08 23:31:48 +00001798 if (name == NULL)
1799 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001800
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001801 /* handle common attributes first */
1802 if (strcmp(name, "tag") == 0) {
1803 res = self->tag;
1804 Py_INCREF(res);
1805 return res;
1806 } else if (strcmp(name, "text") == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001807 res = element_get_text(self);
Victor Stinner71c8b7e2013-07-11 23:08:39 +02001808 Py_XINCREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001809 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001810 }
1811
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001812 /* methods */
1813 res = PyObject_GenericGetAttr((PyObject*) self, nameobj);
1814 if (res)
1815 return res;
1816
1817 /* less common attributes */
1818 if (strcmp(name, "tail") == 0) {
1819 PyErr_Clear();
1820 res = element_get_tail(self);
1821 } else if (strcmp(name, "attrib") == 0) {
1822 PyErr_Clear();
Victor Stinner5f0af232013-07-11 23:01:36 +02001823 if (!self->extra) {
1824 if (create_extra(self, NULL) < 0)
1825 return NULL;
1826 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001827 res = element_get_attrib(self);
1828 }
1829
1830 if (!res)
1831 return NULL;
1832
1833 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001834 return res;
1835}
1836
Eli Benderskyef9683b2013-05-18 07:52:34 -07001837static int
Eli Benderskyb20df952012-05-20 06:33:29 +03001838element_setattro(ElementObject* self, PyObject* nameobj, PyObject* value)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001839{
Eli Benderskyb20df952012-05-20 06:33:29 +03001840 char *name = "";
1841 if (PyUnicode_Check(nameobj))
1842 name = _PyUnicode_AsString(nameobj);
Victor Stinner4d463432013-07-11 23:05:03 +02001843 if (name == NULL)
Eli Benderskyef9683b2013-05-18 07:52:34 -07001844 return -1;
Victor Stinner4d463432013-07-11 23:05:03 +02001845
1846 if (strcmp(name, "tag") == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001847 Py_DECREF(self->tag);
1848 self->tag = value;
1849 Py_INCREF(self->tag);
1850 } else if (strcmp(name, "text") == 0) {
1851 Py_DECREF(JOIN_OBJ(self->text));
1852 self->text = value;
1853 Py_INCREF(self->text);
1854 } else if (strcmp(name, "tail") == 0) {
1855 Py_DECREF(JOIN_OBJ(self->tail));
1856 self->tail = value;
1857 Py_INCREF(self->tail);
1858 } else if (strcmp(name, "attrib") == 0) {
Victor Stinner5f0af232013-07-11 23:01:36 +02001859 if (!self->extra) {
1860 if (create_extra(self, NULL) < 0)
1861 return -1;
1862 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001863 Py_DECREF(self->extra->attrib);
1864 self->extra->attrib = value;
1865 Py_INCREF(self->extra->attrib);
1866 } else {
Eli Benderskyef9683b2013-05-18 07:52:34 -07001867 PyErr_SetString(PyExc_AttributeError,
Eli Bendersky6a55dc32013-05-19 16:59:59 -07001868 "Can't set arbitrary attributes on Element");
Eli Benderskyef9683b2013-05-18 07:52:34 -07001869 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001870 }
1871
Eli Benderskyef9683b2013-05-18 07:52:34 -07001872 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001873}
1874
1875static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001876 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001877 0, /* sq_concat */
1878 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001879 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001880 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001881 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001882 0,
1883};
1884
1885static PyMappingMethods element_as_mapping = {
1886 (lenfunc) element_length,
1887 (binaryfunc) element_subscr,
1888 (objobjargproc) element_ass_subscr,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001889};
1890
Neal Norwitz227b5332006-03-22 09:28:35 +00001891static PyTypeObject Element_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001892 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08001893 "xml.etree.ElementTree.Element", sizeof(ElementObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001894 /* methods */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001895 (destructor)element_dealloc, /* tp_dealloc */
1896 0, /* tp_print */
1897 0, /* tp_getattr */
Eli Benderskyb20df952012-05-20 06:33:29 +03001898 0, /* tp_setattr */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001899 0, /* tp_reserved */
1900 (reprfunc)element_repr, /* tp_repr */
1901 0, /* tp_as_number */
1902 &element_as_sequence, /* tp_as_sequence */
1903 &element_as_mapping, /* tp_as_mapping */
1904 0, /* tp_hash */
1905 0, /* tp_call */
1906 0, /* tp_str */
1907 (getattrofunc)element_getattro, /* tp_getattro */
Eli Benderskyb20df952012-05-20 06:33:29 +03001908 (setattrofunc)element_setattro, /* tp_setattro */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001909 0, /* tp_as_buffer */
Eli Bendersky0192ba32012-03-30 16:38:33 +03001910 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
1911 /* tp_flags */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001912 0, /* tp_doc */
Eli Bendersky0192ba32012-03-30 16:38:33 +03001913 (traverseproc)element_gc_traverse, /* tp_traverse */
1914 (inquiry)element_gc_clear, /* tp_clear */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001915 0, /* tp_richcompare */
Eli Benderskyebf37a22012-04-03 22:02:37 +03001916 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001917 0, /* tp_iter */
1918 0, /* tp_iternext */
1919 element_methods, /* tp_methods */
1920 0, /* tp_members */
1921 0, /* tp_getset */
1922 0, /* tp_base */
1923 0, /* tp_dict */
1924 0, /* tp_descr_get */
1925 0, /* tp_descr_set */
1926 0, /* tp_dictoffset */
1927 (initproc)element_init, /* tp_init */
1928 PyType_GenericAlloc, /* tp_alloc */
1929 element_new, /* tp_new */
1930 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001931};
1932
Eli Bendersky64d11e62012-06-15 07:42:50 +03001933/******************************* Element iterator ****************************/
1934
1935/* ElementIterObject represents the iteration state over an XML element in
1936 * pre-order traversal. To keep track of which sub-element should be returned
1937 * next, a stack of parents is maintained. This is a standard stack-based
1938 * iterative pre-order traversal of a tree.
1939 * The stack is managed using a single-linked list starting at parent_stack.
1940 * Each stack node contains the saved parent to which we should return after
1941 * the current one is exhausted, and the next child to examine in that parent.
1942 */
1943typedef struct ParentLocator_t {
1944 ElementObject *parent;
1945 Py_ssize_t child_index;
1946 struct ParentLocator_t *next;
1947} ParentLocator;
1948
1949typedef struct {
1950 PyObject_HEAD
1951 ParentLocator *parent_stack;
1952 ElementObject *root_element;
1953 PyObject *sought_tag;
1954 int root_done;
1955 int gettext;
1956} ElementIterObject;
1957
1958
1959static void
1960elementiter_dealloc(ElementIterObject *it)
1961{
1962 ParentLocator *p = it->parent_stack;
1963 while (p) {
1964 ParentLocator *temp = p;
1965 Py_XDECREF(p->parent);
1966 p = p->next;
1967 PyObject_Free(temp);
1968 }
1969
1970 Py_XDECREF(it->sought_tag);
1971 Py_XDECREF(it->root_element);
1972
1973 PyObject_GC_UnTrack(it);
1974 PyObject_GC_Del(it);
1975}
1976
1977static int
1978elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
1979{
1980 ParentLocator *p = it->parent_stack;
1981 while (p) {
1982 Py_VISIT(p->parent);
1983 p = p->next;
1984 }
1985
1986 Py_VISIT(it->root_element);
1987 Py_VISIT(it->sought_tag);
1988 return 0;
1989}
1990
1991/* Helper function for elementiter_next. Add a new parent to the parent stack.
1992 */
1993static ParentLocator *
1994parent_stack_push_new(ParentLocator *stack, ElementObject *parent)
1995{
1996 ParentLocator *new_node = PyObject_Malloc(sizeof(ParentLocator));
1997 if (new_node) {
1998 new_node->parent = parent;
1999 Py_INCREF(parent);
2000 new_node->child_index = 0;
2001 new_node->next = stack;
2002 }
2003 return new_node;
2004}
2005
2006static PyObject *
2007elementiter_next(ElementIterObject *it)
2008{
2009 /* Sub-element iterator.
Eli Bendersky45839902013-01-13 05:14:47 -08002010 *
Eli Bendersky64d11e62012-06-15 07:42:50 +03002011 * A short note on gettext: this function serves both the iter() and
2012 * itertext() methods to avoid code duplication. However, there are a few
2013 * small differences in the way these iterations work. Namely:
2014 * - itertext() only yields text from nodes that have it, and continues
2015 * iterating when a node doesn't have text (so it doesn't return any
2016 * node like iter())
2017 * - itertext() also has to handle tail, after finishing with all the
2018 * children of a node.
2019 */
Eli Bendersky113da642012-06-15 07:52:49 +03002020 ElementObject *cur_parent;
2021 Py_ssize_t child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002022
2023 while (1) {
2024 /* Handle the case reached in the beginning and end of iteration, where
2025 * the parent stack is empty. The root_done flag gives us indication
2026 * whether we've just started iterating (so root_done is 0), in which
2027 * case the root is returned. If root_done is 1 and we're here, the
2028 * iterator is exhausted.
2029 */
2030 if (!it->parent_stack->parent) {
2031 if (it->root_done) {
2032 PyErr_SetNone(PyExc_StopIteration);
2033 return NULL;
2034 } else {
2035 it->parent_stack = parent_stack_push_new(it->parent_stack,
2036 it->root_element);
2037 if (!it->parent_stack) {
2038 PyErr_NoMemory();
2039 return NULL;
2040 }
2041
2042 it->root_done = 1;
2043 if (it->sought_tag == Py_None ||
2044 PyObject_RichCompareBool(it->root_element->tag,
2045 it->sought_tag, Py_EQ) == 1) {
2046 if (it->gettext) {
Eli Benderskye6174ca2013-01-10 06:27:53 -08002047 PyObject *text = element_get_text(it->root_element);
2048 if (!text)
2049 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002050 if (PyObject_IsTrue(text)) {
2051 Py_INCREF(text);
2052 return text;
2053 }
2054 } else {
2055 Py_INCREF(it->root_element);
2056 return (PyObject *)it->root_element;
2057 }
2058 }
2059 }
2060 }
2061
2062 /* See if there are children left to traverse in the current parent. If
2063 * yes, visit the next child. If not, pop the stack and try again.
2064 */
Eli Bendersky113da642012-06-15 07:52:49 +03002065 cur_parent = it->parent_stack->parent;
2066 child_index = it->parent_stack->child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002067 if (cur_parent->extra && child_index < cur_parent->extra->length) {
2068 ElementObject *child = (ElementObject *)
2069 cur_parent->extra->children[child_index];
2070 it->parent_stack->child_index++;
2071 it->parent_stack = parent_stack_push_new(it->parent_stack,
2072 child);
2073 if (!it->parent_stack) {
2074 PyErr_NoMemory();
2075 return NULL;
2076 }
2077
2078 if (it->gettext) {
Eli Benderskye6174ca2013-01-10 06:27:53 -08002079 PyObject *text = element_get_text(child);
2080 if (!text)
2081 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002082 if (PyObject_IsTrue(text)) {
2083 Py_INCREF(text);
2084 return text;
2085 }
2086 } else if (it->sought_tag == Py_None ||
2087 PyObject_RichCompareBool(child->tag,
2088 it->sought_tag, Py_EQ) == 1) {
2089 Py_INCREF(child);
2090 return (PyObject *)child;
2091 }
2092 else
2093 continue;
2094 }
2095 else {
Eli Benderskye6174ca2013-01-10 06:27:53 -08002096 PyObject *tail;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002097 ParentLocator *next = it->parent_stack->next;
Eli Benderskye6174ca2013-01-10 06:27:53 -08002098 if (it->gettext) {
2099 tail = element_get_tail(cur_parent);
2100 if (!tail)
2101 return NULL;
2102 }
2103 else
2104 tail = Py_None;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002105 Py_XDECREF(it->parent_stack->parent);
2106 PyObject_Free(it->parent_stack);
2107 it->parent_stack = next;
2108
2109 /* Note that extra condition on it->parent_stack->parent here;
2110 * this is because itertext() is supposed to only return *inner*
2111 * text, not text following the element it began iteration with.
2112 */
2113 if (it->parent_stack->parent && PyObject_IsTrue(tail)) {
2114 Py_INCREF(tail);
2115 return tail;
2116 }
2117 }
2118 }
2119
2120 return NULL;
2121}
2122
2123
2124static PyTypeObject ElementIter_Type = {
2125 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002126 /* Using the module's name since the pure-Python implementation does not
2127 have such a type. */
Eli Bendersky64d11e62012-06-15 07:42:50 +03002128 "_elementtree._element_iterator", /* tp_name */
2129 sizeof(ElementIterObject), /* tp_basicsize */
2130 0, /* tp_itemsize */
2131 /* methods */
2132 (destructor)elementiter_dealloc, /* tp_dealloc */
2133 0, /* tp_print */
2134 0, /* tp_getattr */
2135 0, /* tp_setattr */
2136 0, /* tp_reserved */
2137 0, /* tp_repr */
2138 0, /* tp_as_number */
2139 0, /* tp_as_sequence */
2140 0, /* tp_as_mapping */
2141 0, /* tp_hash */
2142 0, /* tp_call */
2143 0, /* tp_str */
2144 0, /* tp_getattro */
2145 0, /* tp_setattro */
2146 0, /* tp_as_buffer */
2147 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2148 0, /* tp_doc */
2149 (traverseproc)elementiter_traverse, /* tp_traverse */
2150 0, /* tp_clear */
2151 0, /* tp_richcompare */
2152 0, /* tp_weaklistoffset */
2153 PyObject_SelfIter, /* tp_iter */
2154 (iternextfunc)elementiter_next, /* tp_iternext */
2155 0, /* tp_methods */
2156 0, /* tp_members */
2157 0, /* tp_getset */
2158 0, /* tp_base */
2159 0, /* tp_dict */
2160 0, /* tp_descr_get */
2161 0, /* tp_descr_set */
2162 0, /* tp_dictoffset */
2163 0, /* tp_init */
2164 0, /* tp_alloc */
2165 0, /* tp_new */
2166};
2167
2168
2169static PyObject *
2170create_elementiter(ElementObject *self, PyObject *tag, int gettext)
2171{
2172 ElementIterObject *it;
2173 PyObject *star = NULL;
2174
2175 it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
2176 if (!it)
2177 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002178
2179 if (PyUnicode_Check(tag))
2180 star = PyUnicode_FromString("*");
2181 else if (PyBytes_Check(tag))
2182 star = PyBytes_FromString("*");
2183
2184 if (star && PyObject_RichCompareBool(tag, star, Py_EQ) == 1)
2185 tag = Py_None;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002186 Py_XDECREF(star);
Victor Stinner4d463432013-07-11 23:05:03 +02002187
2188 Py_INCREF(tag);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002189 it->sought_tag = tag;
2190 it->root_done = 0;
2191 it->gettext = gettext;
Victor Stinner4d463432013-07-11 23:05:03 +02002192 Py_INCREF(self);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002193 it->root_element = self;
2194
Eli Bendersky64d11e62012-06-15 07:42:50 +03002195 PyObject_GC_Track(it);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002196
2197 it->parent_stack = PyObject_Malloc(sizeof(ParentLocator));
2198 if (it->parent_stack == NULL) {
2199 Py_DECREF(it);
2200 PyErr_NoMemory();
2201 return NULL;
2202 }
2203 it->parent_stack->parent = NULL;
2204 it->parent_stack->child_index = 0;
2205 it->parent_stack->next = NULL;
2206
Eli Bendersky64d11e62012-06-15 07:42:50 +03002207 return (PyObject *)it;
2208}
2209
2210
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002211/* ==================================================================== */
2212/* the tree builder type */
2213
2214typedef struct {
2215 PyObject_HEAD
2216
Eli Bendersky58d548d2012-05-29 15:45:16 +03002217 PyObject *root; /* root node (first created node) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002218
Antoine Pitrouee329312012-10-04 19:53:29 +02002219 PyObject *this; /* current node */
2220 PyObject *last; /* most recently created node */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002221
Eli Bendersky58d548d2012-05-29 15:45:16 +03002222 PyObject *data; /* data collector (string or list), or NULL */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002223
Eli Bendersky58d548d2012-05-29 15:45:16 +03002224 PyObject *stack; /* element stack */
2225 Py_ssize_t index; /* current stack size (0 means empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002226
Eli Bendersky48d358b2012-05-30 17:57:50 +03002227 PyObject *element_factory;
2228
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002229 /* element tracing */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002230 PyObject *events; /* list of events, or NULL if not collecting */
2231 PyObject *start_event_obj; /* event objects (NULL to ignore) */
2232 PyObject *end_event_obj;
2233 PyObject *start_ns_event_obj;
2234 PyObject *end_ns_event_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002235} TreeBuilderObject;
2236
Christian Heimes90aa7642007-12-19 02:45:37 +00002237#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002238
2239/* -------------------------------------------------------------------- */
2240/* constructor and destructor */
2241
Eli Bendersky58d548d2012-05-29 15:45:16 +03002242static PyObject *
2243treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002244{
Eli Bendersky58d548d2012-05-29 15:45:16 +03002245 TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
2246 if (t != NULL) {
2247 t->root = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002248
Eli Bendersky58d548d2012-05-29 15:45:16 +03002249 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002250 t->this = Py_None;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002251 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002252 t->last = Py_None;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002253
Eli Bendersky58d548d2012-05-29 15:45:16 +03002254 t->data = NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002255 t->element_factory = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002256 t->stack = PyList_New(20);
2257 if (!t->stack) {
2258 Py_DECREF(t->this);
2259 Py_DECREF(t->last);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002260 Py_DECREF((PyObject *) t);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002261 return NULL;
2262 }
2263 t->index = 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002264
Eli Bendersky58d548d2012-05-29 15:45:16 +03002265 t->events = NULL;
2266 t->start_event_obj = t->end_event_obj = NULL;
2267 t->start_ns_event_obj = t->end_ns_event_obj = NULL;
2268 }
2269 return (PyObject *)t;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002270}
2271
Eli Bendersky58d548d2012-05-29 15:45:16 +03002272static int
2273treebuilder_init(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002274{
Eli Benderskyc68e1362012-06-03 06:09:42 +03002275 static char *kwlist[] = {"element_factory", 0};
Eli Bendersky48d358b2012-05-30 17:57:50 +03002276 PyObject *element_factory = NULL;
2277 TreeBuilderObject *self_tb = (TreeBuilderObject *)self;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002278 PyObject *tmp;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002279
2280 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:TreeBuilder", kwlist,
2281 &element_factory)) {
2282 return -1;
2283 }
2284
2285 if (element_factory) {
2286 Py_INCREF(element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002287 tmp = self_tb->element_factory;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002288 self_tb->element_factory = element_factory;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002289 Py_XDECREF(tmp);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002290 }
2291
Eli Bendersky58d548d2012-05-29 15:45:16 +03002292 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002293}
2294
Eli Bendersky48d358b2012-05-30 17:57:50 +03002295static int
2296treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
2297{
2298 Py_VISIT(self->root);
2299 Py_VISIT(self->this);
2300 Py_VISIT(self->last);
2301 Py_VISIT(self->data);
2302 Py_VISIT(self->stack);
2303 Py_VISIT(self->element_factory);
2304 return 0;
2305}
2306
2307static int
2308treebuilder_gc_clear(TreeBuilderObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002309{
Antoine Pitrouc1948842012-10-01 23:40:37 +02002310 Py_CLEAR(self->end_ns_event_obj);
2311 Py_CLEAR(self->start_ns_event_obj);
2312 Py_CLEAR(self->end_event_obj);
2313 Py_CLEAR(self->start_event_obj);
2314 Py_CLEAR(self->events);
2315 Py_CLEAR(self->stack);
2316 Py_CLEAR(self->data);
2317 Py_CLEAR(self->last);
2318 Py_CLEAR(self->this);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002319 Py_CLEAR(self->element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002320 Py_CLEAR(self->root);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002321 return 0;
2322}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002323
Eli Bendersky48d358b2012-05-30 17:57:50 +03002324static void
2325treebuilder_dealloc(TreeBuilderObject *self)
2326{
2327 PyObject_GC_UnTrack(self);
2328 treebuilder_gc_clear(self);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002329 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002330}
2331
2332/* -------------------------------------------------------------------- */
Antoine Pitrouee329312012-10-04 19:53:29 +02002333/* helpers for handling of arbitrary element-like objects */
2334
2335static int
2336treebuilder_set_element_text_or_tail(PyObject *element, PyObject *data,
2337 PyObject **dest, _Py_Identifier *name)
2338{
2339 if (Element_CheckExact(element)) {
2340 Py_DECREF(JOIN_OBJ(*dest));
2341 *dest = JOIN_SET(data, PyList_CheckExact(data));
2342 return 0;
2343 }
2344 else {
2345 PyObject *joined = list_join(data);
2346 int r;
2347 if (joined == NULL)
2348 return -1;
2349 r = _PyObject_SetAttrId(element, name, joined);
2350 Py_DECREF(joined);
2351 return r;
2352 }
2353}
2354
2355/* These two functions steal a reference to data */
2356static int
2357treebuilder_set_element_text(PyObject *element, PyObject *data)
2358{
2359 _Py_IDENTIFIER(text);
2360 return treebuilder_set_element_text_or_tail(
2361 element, data, &((ElementObject *) element)->text, &PyId_text);
2362}
2363
2364static int
2365treebuilder_set_element_tail(PyObject *element, PyObject *data)
2366{
2367 _Py_IDENTIFIER(tail);
2368 return treebuilder_set_element_text_or_tail(
2369 element, data, &((ElementObject *) element)->tail, &PyId_tail);
2370}
2371
2372static int
2373treebuilder_add_subelement(PyObject *element, PyObject *child)
2374{
2375 _Py_IDENTIFIER(append);
2376 if (Element_CheckExact(element)) {
2377 ElementObject *elem = (ElementObject *) element;
2378 return element_add_subelement(elem, child);
2379 }
2380 else {
2381 PyObject *res;
2382 res = _PyObject_CallMethodId(element, &PyId_append, "O", child);
2383 if (res == NULL)
2384 return -1;
2385 Py_DECREF(res);
2386 return 0;
2387 }
2388}
2389
2390/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002391/* handlers */
2392
2393LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002394treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
2395 PyObject* attrib)
2396{
2397 PyObject* node;
2398 PyObject* this;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002399 elementtreestate *st = ET_STATE_GLOBAL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002400
2401 if (self->data) {
2402 if (self->this == self->last) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002403 if (treebuilder_set_element_text(self->last, self->data))
2404 return NULL;
2405 }
2406 else {
2407 if (treebuilder_set_element_tail(self->last, self->data))
2408 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002409 }
2410 self->data = NULL;
2411 }
2412
Eli Bendersky08231a92013-05-18 15:47:16 -07002413 if (self->element_factory && self->element_factory != Py_None) {
Eli Bendersky48d358b2012-05-30 17:57:50 +03002414 node = PyObject_CallFunction(self->element_factory, "OO", tag, attrib);
2415 } else {
2416 node = create_new_element(tag, attrib);
2417 }
2418 if (!node) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002419 return NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002420 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002421
Antoine Pitrouee329312012-10-04 19:53:29 +02002422 this = self->this;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002423
2424 if (this != Py_None) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002425 if (treebuilder_add_subelement(this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002426 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002427 } else {
2428 if (self->root) {
2429 PyErr_SetString(
Eli Bendersky532d03e2013-08-10 08:00:39 -07002430 st->parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002431 "multiple elements on top level"
2432 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002433 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002434 }
2435 Py_INCREF(node);
2436 self->root = node;
2437 }
2438
2439 if (self->index < PyList_GET_SIZE(self->stack)) {
2440 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002441 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002442 Py_INCREF(this);
2443 } else {
2444 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002445 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002446 }
2447 self->index++;
2448
2449 Py_DECREF(this);
2450 Py_INCREF(node);
Antoine Pitrouee329312012-10-04 19:53:29 +02002451 self->this = node;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002452
2453 Py_DECREF(self->last);
2454 Py_INCREF(node);
Antoine Pitrouee329312012-10-04 19:53:29 +02002455 self->last = node;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002456
2457 if (self->start_event_obj) {
2458 PyObject* res;
2459 PyObject* action = self->start_event_obj;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002460 res = PyTuple_Pack(2, action, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002461 if (res) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002462 PyList_Append(self->events, res);
2463 Py_DECREF(res);
2464 } else
2465 PyErr_Clear(); /* FIXME: propagate error */
2466 }
2467
2468 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002469
2470 error:
2471 Py_DECREF(node);
2472 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002473}
2474
2475LOCAL(PyObject*)
2476treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
2477{
2478 if (!self->data) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002479 if (self->last == Py_None) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00002480 /* ignore calls to data before the first call to start */
2481 Py_RETURN_NONE;
2482 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002483 /* store the first item as is */
2484 Py_INCREF(data); self->data = data;
2485 } else {
2486 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00002487 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
2488 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002489 /* XXX this code path unused in Python 3? */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002490 /* expat often generates single character data sections; handle
2491 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00002492 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
2493 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002494 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00002495 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002496 } else if (PyList_CheckExact(self->data)) {
2497 if (PyList_Append(self->data, data) < 0)
2498 return NULL;
2499 } else {
2500 PyObject* list = PyList_New(2);
2501 if (!list)
2502 return NULL;
2503 PyList_SET_ITEM(list, 0, self->data);
2504 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2505 self->data = list;
2506 }
2507 }
2508
2509 Py_RETURN_NONE;
2510}
2511
2512LOCAL(PyObject*)
2513treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2514{
2515 PyObject* item;
2516
2517 if (self->data) {
2518 if (self->this == self->last) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002519 if (treebuilder_set_element_text(self->last, self->data))
2520 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002521 } else {
Antoine Pitrouee329312012-10-04 19:53:29 +02002522 if (treebuilder_set_element_tail(self->last, self->data))
2523 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002524 }
2525 self->data = NULL;
2526 }
2527
2528 if (self->index == 0) {
2529 PyErr_SetString(
2530 PyExc_IndexError,
2531 "pop from empty stack"
2532 );
2533 return NULL;
2534 }
2535
2536 self->index--;
2537
2538 item = PyList_GET_ITEM(self->stack, self->index);
2539 Py_INCREF(item);
2540
2541 Py_DECREF(self->last);
2542
Antoine Pitrouee329312012-10-04 19:53:29 +02002543 self->last = self->this;
2544 self->this = item;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002545
2546 if (self->end_event_obj) {
2547 PyObject* res;
2548 PyObject* action = self->end_event_obj;
2549 PyObject* node = (PyObject*) self->last;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002550 res = PyTuple_Pack(2, action, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002551 if (res) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002552 PyList_Append(self->events, res);
2553 Py_DECREF(res);
2554 } else
2555 PyErr_Clear(); /* FIXME: propagate error */
2556 }
2557
2558 Py_INCREF(self->last);
2559 return (PyObject*) self->last;
2560}
2561
2562LOCAL(void)
2563treebuilder_handle_namespace(TreeBuilderObject* self, int start,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002564 PyObject *prefix, PyObject *uri)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002565{
2566 PyObject* res;
2567 PyObject* action;
2568 PyObject* parcel;
2569
2570 if (!self->events)
2571 return;
2572
2573 if (start) {
2574 if (!self->start_ns_event_obj)
2575 return;
2576 action = self->start_ns_event_obj;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002577 parcel = Py_BuildValue("OO", prefix, uri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002578 if (!parcel)
2579 return;
2580 Py_INCREF(action);
2581 } else {
2582 if (!self->end_ns_event_obj)
2583 return;
2584 action = self->end_ns_event_obj;
2585 Py_INCREF(action);
2586 parcel = Py_None;
2587 Py_INCREF(parcel);
2588 }
2589
2590 res = PyTuple_New(2);
2591
2592 if (res) {
2593 PyTuple_SET_ITEM(res, 0, action);
2594 PyTuple_SET_ITEM(res, 1, parcel);
2595 PyList_Append(self->events, res);
2596 Py_DECREF(res);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002597 }
2598 else {
2599 Py_DECREF(action);
2600 Py_DECREF(parcel);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002601 PyErr_Clear(); /* FIXME: propagate error */
Antoine Pitrouc1948842012-10-01 23:40:37 +02002602 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002603}
2604
2605/* -------------------------------------------------------------------- */
2606/* methods (in alphabetical order) */
2607
2608static PyObject*
2609treebuilder_data(TreeBuilderObject* self, PyObject* args)
2610{
2611 PyObject* data;
2612 if (!PyArg_ParseTuple(args, "O:data", &data))
2613 return NULL;
2614
2615 return treebuilder_handle_data(self, data);
2616}
2617
2618static PyObject*
2619treebuilder_end(TreeBuilderObject* self, PyObject* args)
2620{
2621 PyObject* tag;
2622 if (!PyArg_ParseTuple(args, "O:end", &tag))
2623 return NULL;
2624
2625 return treebuilder_handle_end(self, tag);
2626}
2627
2628LOCAL(PyObject*)
2629treebuilder_done(TreeBuilderObject* self)
2630{
2631 PyObject* res;
2632
2633 /* FIXME: check stack size? */
2634
2635 if (self->root)
2636 res = self->root;
2637 else
2638 res = Py_None;
2639
2640 Py_INCREF(res);
2641 return res;
2642}
2643
2644static PyObject*
2645treebuilder_close(TreeBuilderObject* self, PyObject* args)
2646{
2647 if (!PyArg_ParseTuple(args, ":close"))
2648 return NULL;
2649
2650 return treebuilder_done(self);
2651}
2652
2653static PyObject*
2654treebuilder_start(TreeBuilderObject* self, PyObject* args)
2655{
2656 PyObject* tag;
2657 PyObject* attrib = Py_None;
2658 if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib))
2659 return NULL;
2660
2661 return treebuilder_handle_start(self, tag, attrib);
2662}
2663
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002664static PyMethodDef treebuilder_methods[] = {
2665 {"data", (PyCFunction) treebuilder_data, METH_VARARGS},
2666 {"start", (PyCFunction) treebuilder_start, METH_VARARGS},
2667 {"end", (PyCFunction) treebuilder_end, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002668 {"close", (PyCFunction) treebuilder_close, METH_VARARGS},
2669 {NULL, NULL}
2670};
2671
Neal Norwitz227b5332006-03-22 09:28:35 +00002672static PyTypeObject TreeBuilder_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002673 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002674 "xml.etree.ElementTree.TreeBuilder", sizeof(TreeBuilderObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002675 /* methods */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002676 (destructor)treebuilder_dealloc, /* tp_dealloc */
2677 0, /* tp_print */
2678 0, /* tp_getattr */
2679 0, /* tp_setattr */
2680 0, /* tp_reserved */
2681 0, /* tp_repr */
2682 0, /* tp_as_number */
2683 0, /* tp_as_sequence */
2684 0, /* tp_as_mapping */
2685 0, /* tp_hash */
2686 0, /* tp_call */
2687 0, /* tp_str */
2688 0, /* tp_getattro */
2689 0, /* tp_setattro */
2690 0, /* tp_as_buffer */
Eli Bendersky48d358b2012-05-30 17:57:50 +03002691 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
2692 /* tp_flags */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002693 0, /* tp_doc */
Eli Bendersky48d358b2012-05-30 17:57:50 +03002694 (traverseproc)treebuilder_gc_traverse, /* tp_traverse */
2695 (inquiry)treebuilder_gc_clear, /* tp_clear */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002696 0, /* tp_richcompare */
2697 0, /* tp_weaklistoffset */
2698 0, /* tp_iter */
2699 0, /* tp_iternext */
2700 treebuilder_methods, /* tp_methods */
2701 0, /* tp_members */
2702 0, /* tp_getset */
2703 0, /* tp_base */
2704 0, /* tp_dict */
2705 0, /* tp_descr_get */
2706 0, /* tp_descr_set */
2707 0, /* tp_dictoffset */
2708 (initproc)treebuilder_init, /* tp_init */
2709 PyType_GenericAlloc, /* tp_alloc */
2710 treebuilder_new, /* tp_new */
2711 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002712};
2713
2714/* ==================================================================== */
2715/* the expat interface */
2716
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002717#include "expat.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002718#include "pyexpat.h"
Eli Bendersky532d03e2013-08-10 08:00:39 -07002719
2720/* The PyExpat_CAPI structure is an immutable dispatch table, so it can be
2721 * cached globally without being in per-module state.
2722 */
Eli Bendersky20d41742012-06-01 09:48:37 +03002723static struct PyExpat_CAPI *expat_capi;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002724#define EXPAT(func) (expat_capi->func)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002725
Eli Bendersky52467b12012-06-01 07:13:08 +03002726static XML_Memory_Handling_Suite ExpatMemoryHandler = {
2727 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
2728
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002729typedef struct {
2730 PyObject_HEAD
2731
2732 XML_Parser parser;
2733
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002734 PyObject *target;
2735 PyObject *entity;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002736
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002737 PyObject *names;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002738
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002739 PyObject *handle_start;
2740 PyObject *handle_data;
2741 PyObject *handle_end;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002742
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002743 PyObject *handle_comment;
2744 PyObject *handle_pi;
2745 PyObject *handle_doctype;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002746
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002747 PyObject *handle_close;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002748
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002749} XMLParserObject;
2750
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002751#define XMLParser_CheckExact(op) (Py_TYPE(op) == &XMLParser_Type)
2752
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002753/* helpers */
2754
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002755LOCAL(PyObject*)
2756makeuniversal(XMLParserObject* self, const char* string)
2757{
2758 /* convert a UTF-8 tag/attribute name from the expat parser
2759 to a universal name string */
2760
Antoine Pitrouc1948842012-10-01 23:40:37 +02002761 Py_ssize_t size = (Py_ssize_t) strlen(string);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002762 PyObject* key;
2763 PyObject* value;
2764
2765 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00002766 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002767 if (!key)
2768 return NULL;
2769
2770 value = PyDict_GetItem(self->names, key);
2771
2772 if (value) {
2773 Py_INCREF(value);
2774 } else {
2775 /* new name. convert to universal name, and decode as
2776 necessary */
2777
2778 PyObject* tag;
2779 char* p;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002780 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002781
2782 /* look for namespace separator */
2783 for (i = 0; i < size; i++)
2784 if (string[i] == '}')
2785 break;
2786 if (i != size) {
2787 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002788 tag = PyBytes_FromStringAndSize(NULL, size+1);
Victor Stinner71c8b7e2013-07-11 23:08:39 +02002789 if (tag == NULL) {
2790 Py_DECREF(key);
2791 return NULL;
2792 }
Christian Heimes72b710a2008-05-26 13:28:38 +00002793 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002794 p[0] = '{';
2795 memcpy(p+1, string, size);
2796 size++;
2797 } else {
2798 /* plain name; use key as tag */
2799 Py_INCREF(key);
2800 tag = key;
2801 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002802
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002803 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002804 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00002805 value = PyUnicode_DecodeUTF8(p, size, "strict");
2806 Py_DECREF(tag);
2807 if (!value) {
2808 Py_DECREF(key);
2809 return NULL;
2810 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002811
2812 /* add to names dictionary */
2813 if (PyDict_SetItem(self->names, key, value) < 0) {
2814 Py_DECREF(key);
2815 Py_DECREF(value);
2816 return NULL;
2817 }
2818 }
2819
2820 Py_DECREF(key);
2821 return value;
2822}
2823
Eli Bendersky5b77d812012-03-16 08:20:05 +02002824/* Set the ParseError exception with the given parameters.
2825 * If message is not NULL, it's used as the error string. Otherwise, the
2826 * message string is the default for the given error_code.
2827*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002828static void
Eli Bendersky5b77d812012-03-16 08:20:05 +02002829expat_set_error(enum XML_Error error_code, int line, int column, char *message)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002830{
Eli Bendersky5b77d812012-03-16 08:20:05 +02002831 PyObject *errmsg, *error, *position, *code;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002832 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002833
Victor Stinner499dfcf2011-03-21 13:26:24 +01002834 errmsg = PyUnicode_FromFormat("%s: line %d, column %d",
Eli Bendersky5b77d812012-03-16 08:20:05 +02002835 message ? message : EXPAT(ErrorString)(error_code),
2836 line, column);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002837 if (errmsg == NULL)
2838 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002839
Eli Bendersky532d03e2013-08-10 08:00:39 -07002840 error = PyObject_CallFunction(st->parseerror_obj, "O", errmsg);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002841 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002842 if (!error)
2843 return;
2844
Eli Bendersky5b77d812012-03-16 08:20:05 +02002845 /* Add code and position attributes */
2846 code = PyLong_FromLong((long)error_code);
2847 if (!code) {
2848 Py_DECREF(error);
2849 return;
2850 }
2851 if (PyObject_SetAttrString(error, "code", code) == -1) {
2852 Py_DECREF(error);
2853 Py_DECREF(code);
2854 return;
2855 }
2856 Py_DECREF(code);
2857
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002858 position = Py_BuildValue("(ii)", line, column);
2859 if (!position) {
2860 Py_DECREF(error);
2861 return;
2862 }
2863 if (PyObject_SetAttrString(error, "position", position) == -1) {
2864 Py_DECREF(error);
2865 Py_DECREF(position);
2866 return;
2867 }
2868 Py_DECREF(position);
2869
Eli Bendersky532d03e2013-08-10 08:00:39 -07002870 PyErr_SetObject(st->parseerror_obj, error);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002871 Py_DECREF(error);
2872}
2873
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002874/* -------------------------------------------------------------------- */
2875/* handlers */
2876
2877static void
2878expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2879 int data_len)
2880{
2881 PyObject* key;
2882 PyObject* value;
2883 PyObject* res;
2884
2885 if (data_len < 2 || data_in[0] != '&')
2886 return;
2887
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002888 if (PyErr_Occurred())
2889 return;
2890
Neal Norwitz0269b912007-08-08 06:56:02 +00002891 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002892 if (!key)
2893 return;
2894
2895 value = PyDict_GetItem(self->entity, key);
2896
2897 if (value) {
2898 if (TreeBuilder_CheckExact(self->target))
2899 res = treebuilder_handle_data(
2900 (TreeBuilderObject*) self->target, value
2901 );
2902 else if (self->handle_data)
2903 res = PyObject_CallFunction(self->handle_data, "O", value);
2904 else
2905 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002906 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002907 } else if (!PyErr_Occurred()) {
2908 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00002909 char message[128] = "undefined entity ";
2910 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002911 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02002912 XML_ERROR_UNDEFINED_ENTITY,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002913 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02002914 EXPAT(GetErrorColumnNumber)(self->parser),
2915 message
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002916 );
2917 }
2918
2919 Py_DECREF(key);
2920}
2921
2922static void
2923expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2924 const XML_Char **attrib_in)
2925{
2926 PyObject* res;
2927 PyObject* tag;
2928 PyObject* attrib;
2929 int ok;
2930
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002931 if (PyErr_Occurred())
2932 return;
2933
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002934 /* tag name */
2935 tag = makeuniversal(self, tag_in);
2936 if (!tag)
2937 return; /* parser will look for errors */
2938
2939 /* attributes */
2940 if (attrib_in[0]) {
2941 attrib = PyDict_New();
2942 if (!attrib)
2943 return;
2944 while (attrib_in[0] && attrib_in[1]) {
2945 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00002946 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002947 if (!key || !value) {
2948 Py_XDECREF(value);
2949 Py_XDECREF(key);
2950 Py_DECREF(attrib);
2951 return;
2952 }
2953 ok = PyDict_SetItem(attrib, key, value);
2954 Py_DECREF(value);
2955 Py_DECREF(key);
2956 if (ok < 0) {
2957 Py_DECREF(attrib);
2958 return;
2959 }
2960 attrib_in += 2;
2961 }
2962 } else {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002963 /* Pass an empty dictionary on */
Eli Bendersky48d358b2012-05-30 17:57:50 +03002964 attrib = PyDict_New();
2965 if (!attrib)
2966 return;
2967 }
2968
2969 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002970 /* shortcut */
2971 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2972 tag, attrib);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002973 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002974 else if (self->handle_start) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002975 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002976 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002977 res = NULL;
2978
2979 Py_DECREF(tag);
2980 Py_DECREF(attrib);
2981
2982 Py_XDECREF(res);
2983}
2984
2985static void
2986expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2987 int data_len)
2988{
2989 PyObject* data;
2990 PyObject* res;
2991
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002992 if (PyErr_Occurred())
2993 return;
2994
Neal Norwitz0269b912007-08-08 06:56:02 +00002995 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002996 if (!data)
2997 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002998
2999 if (TreeBuilder_CheckExact(self->target))
3000 /* shortcut */
3001 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
3002 else if (self->handle_data)
3003 res = PyObject_CallFunction(self->handle_data, "O", data);
3004 else
3005 res = NULL;
3006
3007 Py_DECREF(data);
3008
3009 Py_XDECREF(res);
3010}
3011
3012static void
3013expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
3014{
3015 PyObject* tag;
3016 PyObject* res = NULL;
3017
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003018 if (PyErr_Occurred())
3019 return;
3020
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003021 if (TreeBuilder_CheckExact(self->target))
3022 /* shortcut */
3023 /* the standard tree builder doesn't look at the end tag */
3024 res = treebuilder_handle_end(
3025 (TreeBuilderObject*) self->target, Py_None
3026 );
3027 else if (self->handle_end) {
3028 tag = makeuniversal(self, tag_in);
3029 if (tag) {
3030 res = PyObject_CallFunction(self->handle_end, "O", tag);
3031 Py_DECREF(tag);
3032 }
3033 }
3034
3035 Py_XDECREF(res);
3036}
3037
3038static void
3039expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
3040 const XML_Char *uri)
3041{
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003042 PyObject* sprefix = NULL;
3043 PyObject* suri = NULL;
3044
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003045 if (PyErr_Occurred())
3046 return;
3047
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003048 suri = PyUnicode_DecodeUTF8(uri, strlen(uri), "strict");
3049 if (!suri)
3050 return;
3051
3052 if (prefix)
3053 sprefix = PyUnicode_DecodeUTF8(prefix, strlen(prefix), "strict");
3054 else
3055 sprefix = PyUnicode_FromString("");
3056 if (!sprefix) {
3057 Py_DECREF(suri);
3058 return;
3059 }
3060
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003061 treebuilder_handle_namespace(
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003062 (TreeBuilderObject*) self->target, 1, sprefix, suri
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003063 );
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003064
3065 Py_DECREF(sprefix);
3066 Py_DECREF(suri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003067}
3068
3069static void
3070expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
3071{
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003072 if (PyErr_Occurred())
3073 return;
3074
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003075 treebuilder_handle_namespace(
3076 (TreeBuilderObject*) self->target, 0, NULL, NULL
3077 );
3078}
3079
3080static void
3081expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
3082{
3083 PyObject* comment;
3084 PyObject* res;
3085
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003086 if (PyErr_Occurred())
3087 return;
3088
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003089 if (self->handle_comment) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003090 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003091 if (comment) {
3092 res = PyObject_CallFunction(self->handle_comment, "O", comment);
3093 Py_XDECREF(res);
3094 Py_DECREF(comment);
3095 }
3096 }
3097}
3098
Eli Bendersky45839902013-01-13 05:14:47 -08003099static void
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003100expat_start_doctype_handler(XMLParserObject *self,
3101 const XML_Char *doctype_name,
3102 const XML_Char *sysid,
3103 const XML_Char *pubid,
3104 int has_internal_subset)
3105{
3106 PyObject *self_pyobj = (PyObject *)self;
3107 PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
3108 PyObject *parser_doctype = NULL;
3109 PyObject *res = NULL;
3110
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003111 if (PyErr_Occurred())
3112 return;
3113
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003114 doctype_name_obj = makeuniversal(self, doctype_name);
3115 if (!doctype_name_obj)
3116 return;
3117
3118 if (sysid) {
3119 sysid_obj = makeuniversal(self, sysid);
3120 if (!sysid_obj) {
3121 Py_DECREF(doctype_name_obj);
3122 return;
3123 }
3124 } else {
3125 Py_INCREF(Py_None);
3126 sysid_obj = Py_None;
3127 }
3128
3129 if (pubid) {
3130 pubid_obj = makeuniversal(self, pubid);
3131 if (!pubid_obj) {
3132 Py_DECREF(doctype_name_obj);
3133 Py_DECREF(sysid_obj);
3134 return;
3135 }
3136 } else {
3137 Py_INCREF(Py_None);
3138 pubid_obj = Py_None;
3139 }
3140
3141 /* If the target has a handler for doctype, call it. */
3142 if (self->handle_doctype) {
3143 res = PyObject_CallFunction(self->handle_doctype, "OOO",
3144 doctype_name_obj, pubid_obj, sysid_obj);
3145 Py_CLEAR(res);
3146 }
3147
3148 /* Now see if the parser itself has a doctype method. If yes and it's
3149 * a subclass, call it but warn about deprecation. If it's not a subclass
3150 * (i.e. vanilla XMLParser), do nothing.
3151 */
3152 parser_doctype = PyObject_GetAttrString(self_pyobj, "doctype");
3153 if (parser_doctype) {
3154 if (!XMLParser_CheckExact(self_pyobj)) {
3155 if (PyErr_WarnEx(PyExc_DeprecationWarning,
3156 "This method of XMLParser is deprecated. Define"
3157 " doctype() method on the TreeBuilder target.",
3158 1) < 0) {
3159 goto clear;
3160 }
3161 res = PyObject_CallFunction(parser_doctype, "OOO",
3162 doctype_name_obj, pubid_obj, sysid_obj);
3163 Py_CLEAR(res);
3164 }
3165 }
3166
3167clear:
3168 Py_XDECREF(parser_doctype);
3169 Py_DECREF(doctype_name_obj);
3170 Py_DECREF(pubid_obj);
3171 Py_DECREF(sysid_obj);
3172}
3173
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003174static void
3175expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
3176 const XML_Char* data_in)
3177{
3178 PyObject* target;
3179 PyObject* data;
3180 PyObject* res;
3181
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003182 if (PyErr_Occurred())
3183 return;
3184
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003185 if (self->handle_pi) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003186 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3187 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003188 if (target && data) {
3189 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
3190 Py_XDECREF(res);
3191 Py_DECREF(data);
3192 Py_DECREF(target);
3193 } else {
3194 Py_XDECREF(data);
3195 Py_XDECREF(target);
3196 }
3197 }
3198}
3199
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003200/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003201
Eli Bendersky52467b12012-06-01 07:13:08 +03003202static PyObject *
3203xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003204{
Eli Bendersky52467b12012-06-01 07:13:08 +03003205 XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
3206 if (self) {
3207 self->parser = NULL;
3208 self->target = self->entity = self->names = NULL;
3209 self->handle_start = self->handle_data = self->handle_end = NULL;
3210 self->handle_comment = self->handle_pi = self->handle_close = NULL;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003211 self->handle_doctype = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003212 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003213 return (PyObject *)self;
3214}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003215
Eli Bendersky52467b12012-06-01 07:13:08 +03003216static int
3217xmlparser_init(PyObject *self, PyObject *args, PyObject *kwds)
3218{
3219 XMLParserObject *self_xp = (XMLParserObject *)self;
3220 PyObject *target = NULL, *html = NULL;
3221 char *encoding = NULL;
Eli Benderskyc68e1362012-06-03 06:09:42 +03003222 static char *kwlist[] = {"html", "target", "encoding", 0};
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003223
Eli Bendersky52467b12012-06-01 07:13:08 +03003224 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|OOz:XMLParser", kwlist,
3225 &html, &target, &encoding)) {
3226 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003227 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003228
Eli Bendersky52467b12012-06-01 07:13:08 +03003229 self_xp->entity = PyDict_New();
3230 if (!self_xp->entity)
3231 return -1;
3232
3233 self_xp->names = PyDict_New();
3234 if (!self_xp->names) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02003235 Py_CLEAR(self_xp->entity);
Eli Bendersky52467b12012-06-01 07:13:08 +03003236 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003237 }
3238
Eli Bendersky52467b12012-06-01 07:13:08 +03003239 self_xp->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
3240 if (!self_xp->parser) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02003241 Py_CLEAR(self_xp->entity);
3242 Py_CLEAR(self_xp->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003243 PyErr_NoMemory();
Eli Bendersky52467b12012-06-01 07:13:08 +03003244 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003245 }
3246
Eli Bendersky52467b12012-06-01 07:13:08 +03003247 if (target) {
3248 Py_INCREF(target);
3249 } else {
Eli Bendersky58d548d2012-05-29 15:45:16 +03003250 target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003251 if (!target) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02003252 Py_CLEAR(self_xp->entity);
3253 Py_CLEAR(self_xp->names);
Eli Bendersky52467b12012-06-01 07:13:08 +03003254 EXPAT(ParserFree)(self_xp->parser);
3255 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003256 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003257 }
3258 self_xp->target = target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003259
Eli Bendersky52467b12012-06-01 07:13:08 +03003260 self_xp->handle_start = PyObject_GetAttrString(target, "start");
3261 self_xp->handle_data = PyObject_GetAttrString(target, "data");
3262 self_xp->handle_end = PyObject_GetAttrString(target, "end");
3263 self_xp->handle_comment = PyObject_GetAttrString(target, "comment");
3264 self_xp->handle_pi = PyObject_GetAttrString(target, "pi");
3265 self_xp->handle_close = PyObject_GetAttrString(target, "close");
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003266 self_xp->handle_doctype = PyObject_GetAttrString(target, "doctype");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003267
3268 PyErr_Clear();
Eli Bendersky45839902013-01-13 05:14:47 -08003269
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003270 /* configure parser */
Eli Bendersky52467b12012-06-01 07:13:08 +03003271 EXPAT(SetUserData)(self_xp->parser, self_xp);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003272 EXPAT(SetElementHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003273 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003274 (XML_StartElementHandler) expat_start_handler,
3275 (XML_EndElementHandler) expat_end_handler
3276 );
3277 EXPAT(SetDefaultHandlerExpand)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003278 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003279 (XML_DefaultHandler) expat_default_handler
3280 );
3281 EXPAT(SetCharacterDataHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003282 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003283 (XML_CharacterDataHandler) expat_data_handler
3284 );
Eli Bendersky52467b12012-06-01 07:13:08 +03003285 if (self_xp->handle_comment)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003286 EXPAT(SetCommentHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003287 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003288 (XML_CommentHandler) expat_comment_handler
3289 );
Eli Bendersky52467b12012-06-01 07:13:08 +03003290 if (self_xp->handle_pi)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003291 EXPAT(SetProcessingInstructionHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003292 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003293 (XML_ProcessingInstructionHandler) expat_pi_handler
3294 );
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003295 EXPAT(SetStartDoctypeDeclHandler)(
3296 self_xp->parser,
3297 (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
3298 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003299 EXPAT(SetUnknownEncodingHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003300 self_xp->parser,
Eli Bendersky6dc32b32013-05-25 05:25:48 -07003301 EXPAT(DefaultUnknownEncodingHandler), NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003302 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003303
Eli Bendersky52467b12012-06-01 07:13:08 +03003304 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003305}
3306
Eli Bendersky52467b12012-06-01 07:13:08 +03003307static int
3308xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
3309{
3310 Py_VISIT(self->handle_close);
3311 Py_VISIT(self->handle_pi);
3312 Py_VISIT(self->handle_comment);
3313 Py_VISIT(self->handle_end);
3314 Py_VISIT(self->handle_data);
3315 Py_VISIT(self->handle_start);
3316
3317 Py_VISIT(self->target);
3318 Py_VISIT(self->entity);
3319 Py_VISIT(self->names);
3320
3321 return 0;
3322}
3323
3324static int
3325xmlparser_gc_clear(XMLParserObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003326{
3327 EXPAT(ParserFree)(self->parser);
3328
Antoine Pitrouc1948842012-10-01 23:40:37 +02003329 Py_CLEAR(self->handle_close);
3330 Py_CLEAR(self->handle_pi);
3331 Py_CLEAR(self->handle_comment);
3332 Py_CLEAR(self->handle_end);
3333 Py_CLEAR(self->handle_data);
3334 Py_CLEAR(self->handle_start);
3335 Py_CLEAR(self->handle_doctype);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003336
Antoine Pitrouc1948842012-10-01 23:40:37 +02003337 Py_CLEAR(self->target);
3338 Py_CLEAR(self->entity);
3339 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003340
Eli Bendersky52467b12012-06-01 07:13:08 +03003341 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003342}
3343
Eli Bendersky52467b12012-06-01 07:13:08 +03003344static void
3345xmlparser_dealloc(XMLParserObject* self)
3346{
3347 PyObject_GC_UnTrack(self);
3348 xmlparser_gc_clear(self);
3349 Py_TYPE(self)->tp_free((PyObject *)self);
3350}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003351
3352LOCAL(PyObject*)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003353expat_parse(XMLParserObject* self, const char* data, int data_len, int final)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003354{
3355 int ok;
3356
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003357 assert(!PyErr_Occurred());
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003358 ok = EXPAT(Parse)(self->parser, data, data_len, final);
3359
3360 if (PyErr_Occurred())
3361 return NULL;
3362
3363 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003364 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02003365 EXPAT(GetErrorCode)(self->parser),
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003366 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02003367 EXPAT(GetErrorColumnNumber)(self->parser),
3368 NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003369 );
3370 return NULL;
3371 }
3372
3373 Py_RETURN_NONE;
3374}
3375
3376static PyObject*
3377xmlparser_close(XMLParserObject* self, PyObject* args)
3378{
3379 /* end feeding data to parser */
3380
3381 PyObject* res;
3382 if (!PyArg_ParseTuple(args, ":close"))
3383 return NULL;
3384
3385 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003386 if (!res)
3387 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003388
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003389 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003390 Py_DECREF(res);
3391 return treebuilder_done((TreeBuilderObject*) self->target);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003392 }
3393 else if (self->handle_close) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003394 Py_DECREF(res);
3395 return PyObject_CallFunction(self->handle_close, "");
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003396 }
3397 else {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003398 return res;
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003399 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003400}
3401
3402static PyObject*
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003403xmlparser_feed(XMLParserObject* self, PyObject* arg)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003404{
3405 /* feed data to parser */
3406
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003407 if (PyUnicode_Check(arg)) {
3408 Py_ssize_t data_len;
3409 const char *data = PyUnicode_AsUTF8AndSize(arg, &data_len);
3410 if (data == NULL)
3411 return NULL;
3412 if (data_len > INT_MAX) {
3413 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3414 return NULL;
3415 }
3416 /* Explicitly set UTF-8 encoding. Return code ignored. */
3417 (void)EXPAT(SetEncoding)(self->parser, "utf-8");
3418 return expat_parse(self, data, (int)data_len, 0);
3419 }
3420 else {
3421 Py_buffer view;
3422 PyObject *res;
3423 if (PyObject_GetBuffer(arg, &view, PyBUF_SIMPLE) < 0)
3424 return NULL;
3425 if (view.len > INT_MAX) {
3426 PyBuffer_Release(&view);
3427 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3428 return NULL;
3429 }
3430 res = expat_parse(self, view.buf, (int)view.len, 0);
3431 PyBuffer_Release(&view);
3432 return res;
3433 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003434}
3435
3436static PyObject*
Eli Benderskya3699232013-05-19 18:47:23 -07003437xmlparser_parse_whole(XMLParserObject* self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003438{
Eli Benderskya3699232013-05-19 18:47:23 -07003439 /* (internal) parse the whole input, until end of stream */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003440 PyObject* reader;
3441 PyObject* buffer;
Eli Benderskyf996e772012-03-16 05:53:30 +02003442 PyObject* temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003443 PyObject* res;
3444
3445 PyObject* fileobj;
3446 if (!PyArg_ParseTuple(args, "O:_parse", &fileobj))
3447 return NULL;
3448
3449 reader = PyObject_GetAttrString(fileobj, "read");
3450 if (!reader)
3451 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003452
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003453 /* read from open file object */
3454 for (;;) {
3455
3456 buffer = PyObject_CallFunction(reader, "i", 64*1024);
3457
3458 if (!buffer) {
3459 /* read failed (e.g. due to KeyboardInterrupt) */
3460 Py_DECREF(reader);
3461 return NULL;
3462 }
3463
Eli Benderskyf996e772012-03-16 05:53:30 +02003464 if (PyUnicode_CheckExact(buffer)) {
3465 /* A unicode object is encoded into bytes using UTF-8 */
3466 if (PyUnicode_GET_SIZE(buffer) == 0) {
3467 Py_DECREF(buffer);
3468 break;
3469 }
3470 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
Antoine Pitrouc1948842012-10-01 23:40:37 +02003471 Py_DECREF(buffer);
Eli Benderskyf996e772012-03-16 05:53:30 +02003472 if (!temp) {
3473 /* Propagate exception from PyUnicode_AsEncodedString */
Eli Benderskyf996e772012-03-16 05:53:30 +02003474 Py_DECREF(reader);
3475 return NULL;
3476 }
Eli Benderskyf996e772012-03-16 05:53:30 +02003477 buffer = temp;
3478 }
3479 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003480 Py_DECREF(buffer);
3481 break;
3482 }
3483
3484 res = expat_parse(
Christian Heimes72b710a2008-05-26 13:28:38 +00003485 self, PyBytes_AS_STRING(buffer), PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003486 );
3487
3488 Py_DECREF(buffer);
3489
3490 if (!res) {
3491 Py_DECREF(reader);
3492 return NULL;
3493 }
3494 Py_DECREF(res);
3495
3496 }
3497
3498 Py_DECREF(reader);
3499
3500 res = expat_parse(self, "", 0, 1);
3501
3502 if (res && TreeBuilder_CheckExact(self->target)) {
3503 Py_DECREF(res);
3504 return treebuilder_done((TreeBuilderObject*) self->target);
3505 }
3506
3507 return res;
3508}
3509
3510static PyObject*
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003511xmlparser_doctype(XMLParserObject *self, PyObject *args)
3512{
3513 Py_RETURN_NONE;
3514}
3515
3516static PyObject*
3517xmlparser_setevents(XMLParserObject *self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003518{
3519 /* activate element event reporting */
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003520 Py_ssize_t i, seqlen;
3521 TreeBuilderObject *target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003522
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003523 PyObject *events_queue;
3524 PyObject *events_to_report = Py_None;
3525 PyObject *events_seq;
3526 if (!PyArg_ParseTuple(args, "O!|O:_setevents", &PyList_Type, &events_queue,
3527 &events_to_report))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003528 return NULL;
3529
3530 if (!TreeBuilder_CheckExact(self->target)) {
3531 PyErr_SetString(
3532 PyExc_TypeError,
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003533 "event handling only supported for ElementTree.TreeBuilder "
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003534 "targets"
3535 );
3536 return NULL;
3537 }
3538
3539 target = (TreeBuilderObject*) self->target;
3540
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003541 Py_INCREF(events_queue);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003542 Py_XDECREF(target->events);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003543 target->events = events_queue;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003544
3545 /* clear out existing events */
Antoine Pitrouc1948842012-10-01 23:40:37 +02003546 Py_CLEAR(target->start_event_obj);
3547 Py_CLEAR(target->end_event_obj);
3548 Py_CLEAR(target->start_ns_event_obj);
3549 Py_CLEAR(target->end_ns_event_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003550
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003551 if (events_to_report == Py_None) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003552 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003553 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003554 Py_RETURN_NONE;
3555 }
3556
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003557 if (!(events_seq = PySequence_Fast(events_to_report,
3558 "events must be a sequence"))) {
3559 return NULL;
3560 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003561
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003562 seqlen = PySequence_Size(events_seq);
3563 for (i = 0; i < seqlen; ++i) {
3564 PyObject *event_name_obj = PySequence_Fast_GET_ITEM(events_seq, i);
3565 char *event_name = NULL;
3566 if (PyUnicode_Check(event_name_obj)) {
3567 event_name = _PyUnicode_AsString(event_name_obj);
3568 } else if (PyBytes_Check(event_name_obj)) {
3569 event_name = PyBytes_AS_STRING(event_name_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003570 }
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003571
3572 if (event_name == NULL) {
3573 Py_DECREF(events_seq);
3574 PyErr_Format(PyExc_ValueError, "invalid events sequence");
3575 return NULL;
3576 } else if (strcmp(event_name, "start") == 0) {
3577 Py_INCREF(event_name_obj);
3578 target->start_event_obj = event_name_obj;
3579 } else if (strcmp(event_name, "end") == 0) {
3580 Py_INCREF(event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003581 Py_XDECREF(target->end_event_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003582 target->end_event_obj = event_name_obj;
3583 } else if (strcmp(event_name, "start-ns") == 0) {
3584 Py_INCREF(event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003585 Py_XDECREF(target->start_ns_event_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003586 target->start_ns_event_obj = event_name_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003587 EXPAT(SetNamespaceDeclHandler)(
3588 self->parser,
3589 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3590 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3591 );
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003592 } else if (strcmp(event_name, "end-ns") == 0) {
3593 Py_INCREF(event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003594 Py_XDECREF(target->end_ns_event_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003595 target->end_ns_event_obj = event_name_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003596 EXPAT(SetNamespaceDeclHandler)(
3597 self->parser,
3598 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3599 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3600 );
3601 } else {
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003602 Py_DECREF(events_seq);
3603 PyErr_Format(PyExc_ValueError, "unknown event '%s'", event_name);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003604 return NULL;
3605 }
3606 }
3607
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003608 Py_DECREF(events_seq);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003609 Py_RETURN_NONE;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003610}
3611
3612static PyMethodDef xmlparser_methods[] = {
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003613 {"feed", (PyCFunction) xmlparser_feed, METH_O},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003614 {"close", (PyCFunction) xmlparser_close, METH_VARARGS},
Eli Benderskya3699232013-05-19 18:47:23 -07003615 {"_parse_whole", (PyCFunction) xmlparser_parse_whole, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003616 {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003617 {"doctype", (PyCFunction) xmlparser_doctype, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003618 {NULL, NULL}
3619};
3620
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003621static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003622xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003623{
Alexander Belopolskye239d232010-12-08 23:31:48 +00003624 if (PyUnicode_Check(nameobj)) {
3625 PyObject* res;
3626 if (PyUnicode_CompareWithASCIIString(nameobj, "entity") == 0)
3627 res = self->entity;
3628 else if (PyUnicode_CompareWithASCIIString(nameobj, "target") == 0)
3629 res = self->target;
3630 else if (PyUnicode_CompareWithASCIIString(nameobj, "version") == 0) {
3631 return PyUnicode_FromFormat(
3632 "Expat %d.%d.%d", XML_MAJOR_VERSION,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003633 XML_MINOR_VERSION, XML_MICRO_VERSION);
Alexander Belopolskye239d232010-12-08 23:31:48 +00003634 }
3635 else
3636 goto generic;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003637
Alexander Belopolskye239d232010-12-08 23:31:48 +00003638 Py_INCREF(res);
3639 return res;
3640 }
3641 generic:
3642 return PyObject_GenericGetAttr((PyObject*) self, nameobj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003643}
3644
Neal Norwitz227b5332006-03-22 09:28:35 +00003645static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003646 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08003647 "xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003648 /* methods */
Eli Bendersky52467b12012-06-01 07:13:08 +03003649 (destructor)xmlparser_dealloc, /* tp_dealloc */
3650 0, /* tp_print */
3651 0, /* tp_getattr */
3652 0, /* tp_setattr */
3653 0, /* tp_reserved */
3654 0, /* tp_repr */
3655 0, /* tp_as_number */
3656 0, /* tp_as_sequence */
3657 0, /* tp_as_mapping */
3658 0, /* tp_hash */
3659 0, /* tp_call */
3660 0, /* tp_str */
3661 (getattrofunc)xmlparser_getattro, /* tp_getattro */
3662 0, /* tp_setattro */
3663 0, /* tp_as_buffer */
3664 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3665 /* tp_flags */
3666 0, /* tp_doc */
3667 (traverseproc)xmlparser_gc_traverse, /* tp_traverse */
3668 (inquiry)xmlparser_gc_clear, /* tp_clear */
3669 0, /* tp_richcompare */
3670 0, /* tp_weaklistoffset */
3671 0, /* tp_iter */
3672 0, /* tp_iternext */
3673 xmlparser_methods, /* tp_methods */
3674 0, /* tp_members */
3675 0, /* tp_getset */
3676 0, /* tp_base */
3677 0, /* tp_dict */
3678 0, /* tp_descr_get */
3679 0, /* tp_descr_set */
3680 0, /* tp_dictoffset */
3681 (initproc)xmlparser_init, /* tp_init */
3682 PyType_GenericAlloc, /* tp_alloc */
3683 xmlparser_new, /* tp_new */
3684 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003685};
3686
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003687/* ==================================================================== */
3688/* python module interface */
3689
3690static PyMethodDef _functions[] = {
Eli Benderskya8736902013-01-05 06:26:39 -08003691 {"SubElement", (PyCFunction) subelement, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003692 {NULL, NULL}
3693};
3694
Martin v. Löwis1a214512008-06-11 05:26:20 +00003695
Eli Bendersky532d03e2013-08-10 08:00:39 -07003696static struct PyModuleDef elementtreemodule = {
3697 PyModuleDef_HEAD_INIT,
3698 "_elementtree",
3699 NULL,
3700 sizeof(elementtreestate),
3701 _functions,
3702 NULL,
3703 elementtree_traverse,
3704 elementtree_clear,
3705 elementtree_free
Martin v. Löwis1a214512008-06-11 05:26:20 +00003706};
3707
Neal Norwitzf6657e62006-12-28 04:47:50 +00003708PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00003709PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003710{
Eli Bendersky64d11e62012-06-15 07:42:50 +03003711 PyObject *m, *temp;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003712 elementtreestate *st;
3713
3714 m = PyState_FindModule(&elementtreemodule);
3715 if (m) {
3716 Py_INCREF(m);
3717 return m;
3718 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003719
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003720 /* Initialize object types */
Ronald Oussoren138d0802013-07-19 11:11:25 +02003721 if (PyType_Ready(&ElementIter_Type) < 0)
3722 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003723 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003724 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003725 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003726 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003727 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003728 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003729
Eli Bendersky532d03e2013-08-10 08:00:39 -07003730 m = PyModule_Create(&elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003731 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00003732 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003733 st = ET_STATE(m);
Martin v. Löwis1a214512008-06-11 05:26:20 +00003734
Eli Bendersky828efde2012-04-05 05:40:58 +03003735 if (!(temp = PyImport_ImportModule("copy")))
3736 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003737 st->deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
Eli Bendersky828efde2012-04-05 05:40:58 +03003738 Py_XDECREF(temp);
3739
Eli Bendersky532d03e2013-08-10 08:00:39 -07003740 if (!(st->elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
Eli Bendersky828efde2012-04-05 05:40:58 +03003741 return NULL;
3742
Eli Bendersky20d41742012-06-01 09:48:37 +03003743 /* link against pyexpat */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003744 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
3745 if (expat_capi) {
3746 /* check that it's usable */
3747 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
3748 expat_capi->size < sizeof(struct PyExpat_CAPI) ||
3749 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
3750 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
Eli Bendersky52467b12012-06-01 07:13:08 +03003751 expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
Eli Benderskyef391ac2012-07-21 20:28:46 +03003752 PyErr_SetString(PyExc_ImportError,
3753 "pyexpat version is incompatible");
3754 return NULL;
Eli Bendersky52467b12012-06-01 07:13:08 +03003755 }
Eli Benderskyef391ac2012-07-21 20:28:46 +03003756 } else {
Eli Bendersky52467b12012-06-01 07:13:08 +03003757 return NULL;
Eli Benderskyef391ac2012-07-21 20:28:46 +03003758 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003759
Eli Bendersky532d03e2013-08-10 08:00:39 -07003760 st->parseerror_obj = PyErr_NewException(
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003761 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003762 );
Eli Bendersky532d03e2013-08-10 08:00:39 -07003763 Py_INCREF(st->parseerror_obj);
3764 PyModule_AddObject(m, "ParseError", st->parseerror_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003765
Eli Bendersky092af1f2012-03-04 07:14:03 +02003766 Py_INCREF((PyObject *)&Element_Type);
3767 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
3768
Eli Bendersky58d548d2012-05-29 15:45:16 +03003769 Py_INCREF((PyObject *)&TreeBuilder_Type);
3770 PyModule_AddObject(m, "TreeBuilder", (PyObject *)&TreeBuilder_Type);
3771
Eli Bendersky52467b12012-06-01 07:13:08 +03003772 Py_INCREF((PyObject *)&XMLParser_Type);
3773 PyModule_AddObject(m, "XMLParser", (PyObject *)&XMLParser_Type);
Eli Bendersky52467b12012-06-01 07:13:08 +03003774
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003775 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003776}