blob: d488f01b803a0d28095d6458eaa9622b17921daf [file] [log] [blame]
Eli Benderskybf05df22013-04-20 05:44:01 -07001/*--------------------------------------------------------------------
2 * Licensed to PSF under a Contributor Agreement.
3 * See http://www.python.org/psf/license for licensing details.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
Eli Benderskybf05df22013-04-20 05:44:01 -07005 * _elementtree - C accelerator for xml.etree.ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00006 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
7 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00008 *
9 * info@pythonware.com
10 * http://www.pythonware.com
Eli Benderskybf05df22013-04-20 05:44:01 -070011 *--------------------------------------------------------------------
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000012 */
13
14#include "Python.h"
Eli Benderskyebf37a22012-04-03 22:02:37 +030015#include "structmember.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000016
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000017/* -------------------------------------------------------------------- */
18/* configuration */
19
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000020/* An element can hold this many children without extra memory
21 allocations. */
22#define STATIC_CHILDREN 4
23
24/* For best performance, chose a value so that 80-90% of all nodes
25 have no more than the given number of children. Set this to zero
26 to minimize the size of the element structure itself (this only
27 helps if you have lots of leaf nodes with attributes). */
28
29/* Also note that pymalloc always allocates blocks in multiples of
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010030 eight bytes. For the current C version of ElementTree, this means
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000031 that the number of children should be an even number, at least on
32 32-bit platforms. */
33
34/* -------------------------------------------------------------------- */
35
36#if 0
37static int memory = 0;
38#define ALLOC(size, comment)\
39do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
40#define RELEASE(size, comment)\
41do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
42#else
43#define ALLOC(size, comment)
44#define RELEASE(size, comment)
45#endif
46
47/* compiler tweaks */
48#if defined(_MSC_VER)
49#define LOCAL(type) static __inline type __fastcall
50#else
51#define LOCAL(type) static type
52#endif
53
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000054/* macros used to store 'join' flags in string object pointers. note
55 that all use of text and tail as object pointers must be wrapped in
56 JOIN_OBJ. see comments in the ElementObject definition for more
57 info. */
58#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
59#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
Antoine Pitrouca8aa4a2012-09-20 20:56:47 +020060#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~(Py_uintptr_t)1))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000061
Ronald Oussoren138d0802013-07-19 11:11:25 +020062/* Types defined by this extension */
63static PyTypeObject Element_Type;
64static PyTypeObject ElementIter_Type;
65static PyTypeObject TreeBuilder_Type;
66static PyTypeObject XMLParser_Type;
67
68
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000069/* glue functions (see the init function for details) */
Florent Xiclunaf15351d2010-03-13 23:24:31 +000070static PyObject* elementtree_parseerror_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000071static PyObject* elementtree_deepcopy_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000072static PyObject* elementpath_obj;
73
74/* helpers */
75
76LOCAL(PyObject*)
77deepcopy(PyObject* object, PyObject* memo)
78{
79 /* do a deep copy of the given object */
80
81 PyObject* args;
82 PyObject* result;
83
84 if (!elementtree_deepcopy_obj) {
85 PyErr_SetString(
86 PyExc_RuntimeError,
87 "deepcopy helper not found"
88 );
89 return NULL;
90 }
91
Antoine Pitrouc1948842012-10-01 23:40:37 +020092 args = PyTuple_Pack(2, object, memo);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +000093 if (!args)
94 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000095 result = PyObject_CallObject(elementtree_deepcopy_obj, args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000096 Py_DECREF(args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000097 return result;
98}
99
100LOCAL(PyObject*)
101list_join(PyObject* list)
102{
103 /* join list elements (destroying the list in the process) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000104 PyObject* joiner;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000105 PyObject* result;
106
Antoine Pitrouc1948842012-10-01 23:40:37 +0200107 joiner = PyUnicode_FromStringAndSize("", 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000108 if (!joiner)
109 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200110 result = PyUnicode_Join(joiner, list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000111 Py_DECREF(joiner);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200112 if (result)
113 Py_DECREF(list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000114 return result;
115}
116
Eli Bendersky48d358b2012-05-30 17:57:50 +0300117/* Is the given object an empty dictionary?
118*/
119static int
120is_empty_dict(PyObject *obj)
121{
122 return PyDict_CheckExact(obj) && PyDict_Size(obj) == 0;
123}
124
125
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000126/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200127/* the Element type */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000128
129typedef struct {
130
131 /* attributes (a dictionary object), or None if no attributes */
132 PyObject* attrib;
133
134 /* child elements */
135 int length; /* actual number of items */
136 int allocated; /* allocated items */
137
138 /* this either points to _children or to a malloced buffer */
139 PyObject* *children;
140
141 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100142
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000143} ElementObjectExtra;
144
145typedef struct {
146 PyObject_HEAD
147
148 /* element tag (a string). */
149 PyObject* tag;
150
151 /* text before first child. note that this is a tagged pointer;
152 use JOIN_OBJ to get the object pointer. the join flag is used
153 to distinguish lists created by the tree builder from lists
154 assigned to the attribute by application code; the former
155 should be joined before being returned to the user, the latter
156 should be left intact. */
157 PyObject* text;
158
159 /* text after this element, in parent. note that this is a tagged
160 pointer; use JOIN_OBJ to get the object pointer. */
161 PyObject* tail;
162
163 ElementObjectExtra* extra;
164
Eli Benderskyebf37a22012-04-03 22:02:37 +0300165 PyObject *weakreflist; /* For tp_weaklistoffset */
166
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000167} ElementObject;
168
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000169
Christian Heimes90aa7642007-12-19 02:45:37 +0000170#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000171
172/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200173/* Element constructors and destructor */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000174
175LOCAL(int)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200176create_extra(ElementObject* self, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000177{
178 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
Victor Stinner81aac732013-07-12 02:03:34 +0200179 if (!self->extra) {
180 PyErr_NoMemory();
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000181 return -1;
Victor Stinner81aac732013-07-12 02:03:34 +0200182 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000183
184 if (!attrib)
185 attrib = Py_None;
186
187 Py_INCREF(attrib);
188 self->extra->attrib = attrib;
189
190 self->extra->length = 0;
191 self->extra->allocated = STATIC_CHILDREN;
192 self->extra->children = self->extra->_children;
193
194 return 0;
195}
196
197LOCAL(void)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200198dealloc_extra(ElementObject* self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000199{
Eli Bendersky08b85292012-04-04 15:55:07 +0300200 ElementObjectExtra *myextra;
201 int i;
202
Eli Benderskyebf37a22012-04-03 22:02:37 +0300203 if (!self->extra)
204 return;
205
206 /* Avoid DECREFs calling into this code again (cycles, etc.)
207 */
Eli Bendersky08b85292012-04-04 15:55:07 +0300208 myextra = self->extra;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300209 self->extra = NULL;
210
211 Py_DECREF(myextra->attrib);
212
Eli Benderskyebf37a22012-04-03 22:02:37 +0300213 for (i = 0; i < myextra->length; i++)
214 Py_DECREF(myextra->children[i]);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000215
Eli Benderskyebf37a22012-04-03 22:02:37 +0300216 if (myextra->children != myextra->_children)
217 PyObject_Free(myextra->children);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000218
Eli Benderskyebf37a22012-04-03 22:02:37 +0300219 PyObject_Free(myextra);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000220}
221
Eli Bendersky092af1f2012-03-04 07:14:03 +0200222/* Convenience internal function to create new Element objects with the given
223 * tag and attributes.
224*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000225LOCAL(PyObject*)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200226create_new_element(PyObject* tag, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000227{
228 ElementObject* self;
229
Eli Bendersky0192ba32012-03-30 16:38:33 +0300230 self = PyObject_GC_New(ElementObject, &Element_Type);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000231 if (self == NULL)
232 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000233 self->extra = NULL;
234
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000235 Py_INCREF(tag);
236 self->tag = tag;
237
238 Py_INCREF(Py_None);
239 self->text = Py_None;
240
241 Py_INCREF(Py_None);
242 self->tail = Py_None;
243
Eli Benderskyebf37a22012-04-03 22:02:37 +0300244 self->weakreflist = NULL;
245
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200246 ALLOC(sizeof(ElementObject), "create element");
247 PyObject_GC_Track(self);
248
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200249 if (attrib != Py_None && !is_empty_dict(attrib)) {
250 if (create_extra(self, attrib) < 0) {
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200251 Py_DECREF(self);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200252 return NULL;
253 }
254 }
255
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000256 return (PyObject*) self;
257}
258
Eli Bendersky092af1f2012-03-04 07:14:03 +0200259static PyObject *
260element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
261{
262 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
263 if (e != NULL) {
264 Py_INCREF(Py_None);
265 e->tag = Py_None;
266
267 Py_INCREF(Py_None);
268 e->text = Py_None;
269
270 Py_INCREF(Py_None);
271 e->tail = Py_None;
272
273 e->extra = NULL;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300274 e->weakreflist = NULL;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200275 }
276 return (PyObject *)e;
277}
278
Eli Bendersky737b1732012-05-29 06:02:56 +0300279/* Helper function for extracting the attrib dictionary from a keywords dict.
280 * This is required by some constructors/functions in this module that can
Eli Bendersky45839902013-01-13 05:14:47 -0800281 * either accept attrib as a keyword argument or all attributes splashed
Eli Bendersky737b1732012-05-29 06:02:56 +0300282 * directly into *kwds.
Eli Benderskyd4cb4b72013-04-22 05:25:25 -0700283 *
284 * Return a dictionary with the content of kwds merged into the content of
285 * attrib. If there is no attrib keyword, return a copy of kwds.
Eli Bendersky737b1732012-05-29 06:02:56 +0300286 */
287static PyObject*
288get_attrib_from_keywords(PyObject *kwds)
289{
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700290 PyObject *attrib_str = PyUnicode_FromString("attrib");
291 PyObject *attrib = PyDict_GetItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300292
293 if (attrib) {
294 /* If attrib was found in kwds, copy its value and remove it from
295 * kwds
296 */
297 if (!PyDict_Check(attrib)) {
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700298 Py_DECREF(attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300299 PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
300 Py_TYPE(attrib)->tp_name);
301 return NULL;
302 }
303 attrib = PyDict_Copy(attrib);
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700304 PyDict_DelItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300305 } else {
306 attrib = PyDict_New();
307 }
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700308
309 Py_DECREF(attrib_str);
310
311 /* attrib can be NULL if PyDict_New failed */
312 if (attrib)
Christian Heimes7ed42942013-07-20 15:12:09 +0200313 if (PyDict_Update(attrib, kwds) < 0)
314 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300315 return attrib;
316}
317
Eli Bendersky092af1f2012-03-04 07:14:03 +0200318static int
319element_init(PyObject *self, PyObject *args, PyObject *kwds)
320{
321 PyObject *tag;
322 PyObject *tmp;
323 PyObject *attrib = NULL;
324 ElementObject *self_elem;
325
326 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
327 return -1;
328
Eli Bendersky737b1732012-05-29 06:02:56 +0300329 if (attrib) {
330 /* attrib passed as positional arg */
331 attrib = PyDict_Copy(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200332 if (!attrib)
333 return -1;
Eli Bendersky737b1732012-05-29 06:02:56 +0300334 if (kwds) {
335 if (PyDict_Update(attrib, kwds) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200336 Py_DECREF(attrib);
Eli Bendersky737b1732012-05-29 06:02:56 +0300337 return -1;
338 }
339 }
340 } else if (kwds) {
341 /* have keywords args */
342 attrib = get_attrib_from_keywords(kwds);
343 if (!attrib)
344 return -1;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200345 }
346
347 self_elem = (ElementObject *)self;
348
Antoine Pitrouc1948842012-10-01 23:40:37 +0200349 if (attrib != NULL && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200350 if (create_extra(self_elem, attrib) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200351 Py_DECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200352 return -1;
353 }
354 }
355
Eli Bendersky48d358b2012-05-30 17:57:50 +0300356 /* We own a reference to attrib here and it's no longer needed. */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200357 Py_XDECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200358
359 /* Replace the objects already pointed to by tag, text and tail. */
360 tmp = self_elem->tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200361 Py_INCREF(tag);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200362 self_elem->tag = tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200363 Py_DECREF(tmp);
364
365 tmp = self_elem->text;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200366 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200367 self_elem->text = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200368 Py_DECREF(JOIN_OBJ(tmp));
369
370 tmp = self_elem->tail;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200371 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200372 self_elem->tail = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200373 Py_DECREF(JOIN_OBJ(tmp));
374
375 return 0;
376}
377
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000378LOCAL(int)
379element_resize(ElementObject* self, int extra)
380{
381 int size;
382 PyObject* *children;
383
384 /* make sure self->children can hold the given number of extra
385 elements. set an exception and return -1 if allocation failed */
386
Victor Stinner5f0af232013-07-11 23:01:36 +0200387 if (!self->extra) {
388 if (create_extra(self, NULL) < 0)
389 return -1;
390 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000391
392 size = self->extra->length + extra;
393
394 if (size > self->extra->allocated) {
395 /* use Python 2.4's list growth strategy */
396 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000397 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100398 * which needs at least 4 bytes.
399 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000400 * be safe.
401 */
402 size = size ? size : 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000403 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000404 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100405 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000406 * false alarm always assume at least one child to be safe.
407 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000408 children = PyObject_Realloc(self->extra->children,
409 size * sizeof(PyObject*));
410 if (!children)
411 goto nomemory;
412 } else {
413 children = PyObject_Malloc(size * sizeof(PyObject*));
414 if (!children)
415 goto nomemory;
416 /* copy existing children from static area to malloc buffer */
417 memcpy(children, self->extra->children,
418 self->extra->length * sizeof(PyObject*));
419 }
420 self->extra->children = children;
421 self->extra->allocated = size;
422 }
423
424 return 0;
425
426 nomemory:
427 PyErr_NoMemory();
428 return -1;
429}
430
431LOCAL(int)
432element_add_subelement(ElementObject* self, PyObject* element)
433{
434 /* add a child element to a parent */
435
436 if (element_resize(self, 1) < 0)
437 return -1;
438
439 Py_INCREF(element);
440 self->extra->children[self->extra->length] = element;
441
442 self->extra->length++;
443
444 return 0;
445}
446
447LOCAL(PyObject*)
448element_get_attrib(ElementObject* self)
449{
450 /* return borrowed reference to attrib dictionary */
451 /* note: this function assumes that the extra section exists */
452
453 PyObject* res = self->extra->attrib;
454
455 if (res == Py_None) {
456 /* create missing dictionary */
457 res = PyDict_New();
458 if (!res)
459 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200460 Py_DECREF(Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000461 self->extra->attrib = res;
462 }
463
464 return res;
465}
466
467LOCAL(PyObject*)
468element_get_text(ElementObject* self)
469{
470 /* return borrowed reference to text attribute */
471
472 PyObject* res = self->text;
473
474 if (JOIN_GET(res)) {
475 res = JOIN_OBJ(res);
476 if (PyList_CheckExact(res)) {
477 res = list_join(res);
478 if (!res)
479 return NULL;
480 self->text = res;
481 }
482 }
483
484 return res;
485}
486
487LOCAL(PyObject*)
488element_get_tail(ElementObject* self)
489{
490 /* return borrowed reference to text attribute */
491
492 PyObject* res = self->tail;
493
494 if (JOIN_GET(res)) {
495 res = JOIN_OBJ(res);
496 if (PyList_CheckExact(res)) {
497 res = list_join(res);
498 if (!res)
499 return NULL;
500 self->tail = res;
501 }
502 }
503
504 return res;
505}
506
507static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300508subelement(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000509{
510 PyObject* elem;
511
512 ElementObject* parent;
513 PyObject* tag;
514 PyObject* attrib = NULL;
515 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
516 &Element_Type, &parent, &tag,
517 &PyDict_Type, &attrib))
518 return NULL;
519
Eli Bendersky737b1732012-05-29 06:02:56 +0300520 if (attrib) {
521 /* attrib passed as positional arg */
522 attrib = PyDict_Copy(attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000523 if (!attrib)
524 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300525 if (kwds) {
526 if (PyDict_Update(attrib, kwds) < 0) {
527 return NULL;
528 }
529 }
530 } else if (kwds) {
531 /* have keyword args */
532 attrib = get_attrib_from_keywords(kwds);
533 if (!attrib)
534 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000535 } else {
Eli Bendersky737b1732012-05-29 06:02:56 +0300536 /* no attrib arg, no kwds, so no attribute */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000537 Py_INCREF(Py_None);
538 attrib = Py_None;
539 }
540
Eli Bendersky092af1f2012-03-04 07:14:03 +0200541 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000542 Py_DECREF(attrib);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200543 if (elem == NULL)
544 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000545
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000546 if (element_add_subelement(parent, elem) < 0) {
547 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000548 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000549 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000550
551 return elem;
552}
553
Eli Bendersky0192ba32012-03-30 16:38:33 +0300554static int
555element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
556{
557 Py_VISIT(self->tag);
558 Py_VISIT(JOIN_OBJ(self->text));
559 Py_VISIT(JOIN_OBJ(self->tail));
560
561 if (self->extra) {
562 int i;
563 Py_VISIT(self->extra->attrib);
564
565 for (i = 0; i < self->extra->length; ++i)
566 Py_VISIT(self->extra->children[i]);
567 }
568 return 0;
569}
570
571static int
572element_gc_clear(ElementObject *self)
573{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300574 Py_CLEAR(self->tag);
Eli Benderskyebf37a22012-04-03 22:02:37 +0300575
576 /* The following is like Py_CLEAR for self->text and self->tail, but
577 * written explicitily because the real pointers hide behind access
578 * macros.
579 */
580 if (self->text) {
581 PyObject *tmp = JOIN_OBJ(self->text);
582 self->text = NULL;
583 Py_DECREF(tmp);
584 }
585
586 if (self->tail) {
587 PyObject *tmp = JOIN_OBJ(self->tail);
588 self->tail = NULL;
589 Py_DECREF(tmp);
590 }
Eli Bendersky0192ba32012-03-30 16:38:33 +0300591
592 /* After dropping all references from extra, it's no longer valid anyway,
Eli Benderskyebf37a22012-04-03 22:02:37 +0300593 * so fully deallocate it.
Eli Bendersky0192ba32012-03-30 16:38:33 +0300594 */
Eli Benderskyebf37a22012-04-03 22:02:37 +0300595 dealloc_extra(self);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300596 return 0;
597}
598
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000599static void
600element_dealloc(ElementObject* self)
601{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300602 PyObject_GC_UnTrack(self);
Eli Benderskyebf37a22012-04-03 22:02:37 +0300603
604 if (self->weakreflist != NULL)
605 PyObject_ClearWeakRefs((PyObject *) self);
606
Eli Bendersky0192ba32012-03-30 16:38:33 +0300607 /* element_gc_clear clears all references and deallocates extra
608 */
609 element_gc_clear(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000610
611 RELEASE(sizeof(ElementObject), "destroy element");
Eli Bendersky092af1f2012-03-04 07:14:03 +0200612 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000613}
614
615/* -------------------------------------------------------------------- */
616/* methods (in alphabetical order) */
617
618static PyObject*
619element_append(ElementObject* self, PyObject* args)
620{
621 PyObject* element;
622 if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element))
623 return NULL;
624
625 if (element_add_subelement(self, element) < 0)
626 return NULL;
627
628 Py_RETURN_NONE;
629}
630
631static PyObject*
Eli Bendersky0192ba32012-03-30 16:38:33 +0300632element_clearmethod(ElementObject* self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000633{
634 if (!PyArg_ParseTuple(args, ":clear"))
635 return NULL;
636
Eli Benderskyebf37a22012-04-03 22:02:37 +0300637 dealloc_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000638
639 Py_INCREF(Py_None);
640 Py_DECREF(JOIN_OBJ(self->text));
641 self->text = Py_None;
642
643 Py_INCREF(Py_None);
644 Py_DECREF(JOIN_OBJ(self->tail));
645 self->tail = Py_None;
646
647 Py_RETURN_NONE;
648}
649
650static PyObject*
651element_copy(ElementObject* self, PyObject* args)
652{
653 int i;
654 ElementObject* element;
655
656 if (!PyArg_ParseTuple(args, ":__copy__"))
657 return NULL;
658
Eli Bendersky092af1f2012-03-04 07:14:03 +0200659 element = (ElementObject*) create_new_element(
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000660 self->tag, (self->extra) ? self->extra->attrib : Py_None
661 );
662 if (!element)
663 return NULL;
664
665 Py_DECREF(JOIN_OBJ(element->text));
666 element->text = self->text;
667 Py_INCREF(JOIN_OBJ(element->text));
668
669 Py_DECREF(JOIN_OBJ(element->tail));
670 element->tail = self->tail;
671 Py_INCREF(JOIN_OBJ(element->tail));
672
673 if (self->extra) {
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100674
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000675 if (element_resize(element, self->extra->length) < 0) {
676 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000677 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000678 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000679
680 for (i = 0; i < self->extra->length; i++) {
681 Py_INCREF(self->extra->children[i]);
682 element->extra->children[i] = self->extra->children[i];
683 }
684
685 element->extra->length = self->extra->length;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100686
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000687 }
688
689 return (PyObject*) element;
690}
691
692static PyObject*
693element_deepcopy(ElementObject* self, PyObject* args)
694{
695 int i;
696 ElementObject* element;
697 PyObject* tag;
698 PyObject* attrib;
699 PyObject* text;
700 PyObject* tail;
701 PyObject* id;
702
703 PyObject* memo;
704 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
705 return NULL;
706
707 tag = deepcopy(self->tag, memo);
708 if (!tag)
709 return NULL;
710
711 if (self->extra) {
712 attrib = deepcopy(self->extra->attrib, memo);
713 if (!attrib) {
714 Py_DECREF(tag);
715 return NULL;
716 }
717 } else {
718 Py_INCREF(Py_None);
719 attrib = Py_None;
720 }
721
Eli Bendersky092af1f2012-03-04 07:14:03 +0200722 element = (ElementObject*) create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000723
724 Py_DECREF(tag);
725 Py_DECREF(attrib);
726
727 if (!element)
728 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100729
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000730 text = deepcopy(JOIN_OBJ(self->text), memo);
731 if (!text)
732 goto error;
733 Py_DECREF(element->text);
734 element->text = JOIN_SET(text, JOIN_GET(self->text));
735
736 tail = deepcopy(JOIN_OBJ(self->tail), memo);
737 if (!tail)
738 goto error;
739 Py_DECREF(element->tail);
740 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
741
742 if (self->extra) {
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100743
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000744 if (element_resize(element, self->extra->length) < 0)
745 goto error;
746
747 for (i = 0; i < self->extra->length; i++) {
748 PyObject* child = deepcopy(self->extra->children[i], memo);
749 if (!child) {
750 element->extra->length = i;
751 goto error;
752 }
753 element->extra->children[i] = child;
754 }
755
756 element->extra->length = self->extra->length;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100757
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000758 }
759
760 /* add object to memo dictionary (so deepcopy won't visit it again) */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200761 id = PyLong_FromSsize_t((Py_uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000762 if (!id)
763 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000764
765 i = PyDict_SetItem(memo, id, (PyObject*) element);
766
767 Py_DECREF(id);
768
769 if (i < 0)
770 goto error;
771
772 return (PyObject*) element;
773
774 error:
775 Py_DECREF(element);
776 return NULL;
777}
778
Martin v. Löwisbce16662012-06-17 10:41:22 +0200779static PyObject*
780element_sizeof(PyObject* _self, PyObject* args)
781{
782 ElementObject *self = (ElementObject*)_self;
783 Py_ssize_t result = sizeof(ElementObject);
784 if (self->extra) {
785 result += sizeof(ElementObjectExtra);
786 if (self->extra->children != self->extra->_children)
787 result += sizeof(PyObject*) * self->extra->allocated;
788 }
789 return PyLong_FromSsize_t(result);
790}
791
Eli Bendersky698bdb22013-01-10 06:01:06 -0800792/* dict keys for getstate/setstate. */
793#define PICKLED_TAG "tag"
794#define PICKLED_CHILDREN "_children"
795#define PICKLED_ATTRIB "attrib"
796#define PICKLED_TAIL "tail"
797#define PICKLED_TEXT "text"
798
799/* __getstate__ returns a fabricated instance dict as in the pure-Python
800 * Element implementation, for interoperability/interchangeability. This
801 * makes the pure-Python implementation details an API, but (a) there aren't
802 * any unnecessary structures there; and (b) it buys compatibility with 3.2
803 * pickles. See issue #16076.
804 */
805static PyObject *
806element_getstate(ElementObject *self)
807{
808 int i, noattrib;
809 PyObject *instancedict = NULL, *children;
810
811 /* Build a list of children. */
812 children = PyList_New(self->extra ? self->extra->length : 0);
813 if (!children)
814 return NULL;
815 for (i = 0; i < PyList_GET_SIZE(children); i++) {
816 PyObject *child = self->extra->children[i];
817 Py_INCREF(child);
818 PyList_SET_ITEM(children, i, child);
819 }
820
821 /* Construct the state object. */
822 noattrib = (self->extra == NULL || self->extra->attrib == Py_None);
823 if (noattrib)
824 instancedict = Py_BuildValue("{sOsOs{}sOsO}",
825 PICKLED_TAG, self->tag,
826 PICKLED_CHILDREN, children,
827 PICKLED_ATTRIB,
828 PICKLED_TEXT, self->text,
829 PICKLED_TAIL, self->tail);
830 else
831 instancedict = Py_BuildValue("{sOsOsOsOsO}",
832 PICKLED_TAG, self->tag,
833 PICKLED_CHILDREN, children,
834 PICKLED_ATTRIB, self->extra->attrib,
835 PICKLED_TEXT, self->text,
836 PICKLED_TAIL, self->tail);
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800837 if (instancedict) {
838 Py_DECREF(children);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800839 return instancedict;
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800840 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800841 else {
842 for (i = 0; i < PyList_GET_SIZE(children); i++)
843 Py_DECREF(PyList_GET_ITEM(children, i));
844 Py_DECREF(children);
845
846 return NULL;
847 }
848}
849
850static PyObject *
851element_setstate_from_attributes(ElementObject *self,
852 PyObject *tag,
853 PyObject *attrib,
854 PyObject *text,
855 PyObject *tail,
856 PyObject *children)
857{
858 Py_ssize_t i, nchildren;
859
860 if (!tag) {
861 PyErr_SetString(PyExc_TypeError, "tag may not be NULL");
862 return NULL;
863 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800864
865 Py_CLEAR(self->tag);
866 self->tag = tag;
867 Py_INCREF(self->tag);
868
869 Py_CLEAR(self->text);
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800870 self->text = text ? text : Py_None;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800871 Py_INCREF(self->text);
872
873 Py_CLEAR(self->tail);
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800874 self->tail = tail ? tail : Py_None;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800875 Py_INCREF(self->tail);
876
877 /* Handle ATTRIB and CHILDREN. */
878 if (!children && !attrib)
879 Py_RETURN_NONE;
880
881 /* Compute 'nchildren'. */
882 if (children) {
883 if (!PyList_Check(children)) {
884 PyErr_SetString(PyExc_TypeError, "'_children' is not a list");
885 return NULL;
886 }
887 nchildren = PyList_Size(children);
888 }
889 else {
890 nchildren = 0;
891 }
892
893 /* Allocate 'extra'. */
894 if (element_resize(self, nchildren)) {
895 return NULL;
896 }
897 assert(self->extra && self->extra->allocated >= nchildren);
898
899 /* Copy children */
900 for (i = 0; i < nchildren; i++) {
901 self->extra->children[i] = PyList_GET_ITEM(children, i);
902 Py_INCREF(self->extra->children[i]);
903 }
904
905 self->extra->length = nchildren;
906 self->extra->allocated = nchildren;
907
908 /* Stash attrib. */
909 if (attrib) {
910 Py_CLEAR(self->extra->attrib);
911 self->extra->attrib = attrib;
912 Py_INCREF(attrib);
913 }
914
915 Py_RETURN_NONE;
916}
917
918/* __setstate__ for Element instance from the Python implementation.
919 * 'state' should be the instance dict.
920 */
921static PyObject *
922element_setstate_from_Python(ElementObject *self, PyObject *state)
923{
924 static char *kwlist[] = {PICKLED_TAG, PICKLED_ATTRIB, PICKLED_TEXT,
925 PICKLED_TAIL, PICKLED_CHILDREN, 0};
926 PyObject *args;
927 PyObject *tag, *attrib, *text, *tail, *children;
Eli Bendersky799e3ed2013-01-12 05:42:38 -0800928 PyObject *retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800929
Eli Bendersky698bdb22013-01-10 06:01:06 -0800930 tag = attrib = text = tail = children = NULL;
931 args = PyTuple_New(0);
Eli Bendersky799e3ed2013-01-12 05:42:38 -0800932 if (!args)
Eli Bendersky698bdb22013-01-10 06:01:06 -0800933 return NULL;
Eli Bendersky799e3ed2013-01-12 05:42:38 -0800934
935 if (PyArg_ParseTupleAndKeywords(args, state, "|$OOOOO", kwlist, &tag,
936 &attrib, &text, &tail, &children))
937 retval = element_setstate_from_attributes(self, tag, attrib, text,
938 tail, children);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800939 else
Eli Bendersky799e3ed2013-01-12 05:42:38 -0800940 retval = NULL;
941
942 Py_DECREF(args);
943 return retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800944}
945
946static PyObject *
947element_setstate(ElementObject *self, PyObject *state)
948{
949 if (!PyDict_CheckExact(state)) {
950 PyErr_Format(PyExc_TypeError,
951 "Don't know how to unpickle \"%.200R\" as an Element",
952 state);
953 return NULL;
954 }
955 else
956 return element_setstate_from_Python(self, state);
957}
958
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000959LOCAL(int)
960checkpath(PyObject* tag)
961{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000962 Py_ssize_t i;
963 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000964
965 /* check if a tag contains an xpath character */
966
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000967#define PATHCHAR(ch) \
968 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000969
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000970 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200971 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
972 void *data = PyUnicode_DATA(tag);
973 unsigned int kind = PyUnicode_KIND(tag);
974 for (i = 0; i < len; i++) {
975 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
976 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000977 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200978 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000979 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200980 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000981 return 1;
982 }
983 return 0;
984 }
Christian Heimes72b710a2008-05-26 13:28:38 +0000985 if (PyBytes_Check(tag)) {
986 char *p = PyBytes_AS_STRING(tag);
987 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000988 if (p[i] == '{')
989 check = 0;
990 else if (p[i] == '}')
991 check = 1;
992 else if (check && PATHCHAR(p[i]))
993 return 1;
994 }
995 return 0;
996 }
997
998 return 1; /* unknown type; might be path expression */
999}
1000
1001static PyObject*
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001002element_extend(ElementObject* self, PyObject* args)
1003{
1004 PyObject* seq;
1005 Py_ssize_t i, seqlen = 0;
1006
1007 PyObject* seq_in;
1008 if (!PyArg_ParseTuple(args, "O:extend", &seq_in))
1009 return NULL;
1010
1011 seq = PySequence_Fast(seq_in, "");
1012 if (!seq) {
1013 PyErr_Format(
1014 PyExc_TypeError,
1015 "expected sequence, not \"%.200s\"", Py_TYPE(seq_in)->tp_name
1016 );
1017 return NULL;
1018 }
1019
1020 seqlen = PySequence_Size(seq);
1021 for (i = 0; i < seqlen; i++) {
1022 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001023 if (!PyObject_IsInstance(element, (PyObject *)&Element_Type)) {
1024 Py_DECREF(seq);
1025 PyErr_Format(
1026 PyExc_TypeError,
1027 "expected an Element, not \"%.200s\"",
1028 Py_TYPE(element)->tp_name);
1029 return NULL;
1030 }
1031
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001032 if (element_add_subelement(self, element) < 0) {
1033 Py_DECREF(seq);
1034 return NULL;
1035 }
1036 }
1037
1038 Py_DECREF(seq);
1039
1040 Py_RETURN_NONE;
1041}
1042
1043static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001044element_find(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001045{
1046 int i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001047 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001048 PyObject* namespaces = Py_None;
Eli Bendersky737b1732012-05-29 06:02:56 +03001049 static char *kwlist[] = {"path", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001050
Eli Bendersky737b1732012-05-29 06:02:56 +03001051 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:find", kwlist,
1052 &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001053 return NULL;
1054
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001055 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001056 _Py_IDENTIFIER(find);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001057 return _PyObject_CallMethodId(
1058 elementpath_obj, &PyId_find, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001059 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001060 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001061
1062 if (!self->extra)
1063 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001064
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001065 for (i = 0; i < self->extra->length; i++) {
1066 PyObject* item = self->extra->children[i];
1067 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +00001068 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001069 Py_INCREF(item);
1070 return item;
1071 }
1072 }
1073
1074 Py_RETURN_NONE;
1075}
1076
1077static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001078element_findtext(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001079{
1080 int i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001081 PyObject* tag;
1082 PyObject* default_value = Py_None;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001083 PyObject* namespaces = Py_None;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001084 _Py_IDENTIFIER(findtext);
Eli Bendersky737b1732012-05-29 06:02:56 +03001085 static char *kwlist[] = {"path", "default", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001086
Eli Bendersky737b1732012-05-29 06:02:56 +03001087 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO:findtext", kwlist,
1088 &tag, &default_value, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001089 return NULL;
1090
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001091 if (checkpath(tag) || namespaces != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001092 return _PyObject_CallMethodId(
1093 elementpath_obj, &PyId_findtext, "OOOO", self, tag, default_value, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001094 );
1095
1096 if (!self->extra) {
1097 Py_INCREF(default_value);
1098 return default_value;
1099 }
1100
1101 for (i = 0; i < self->extra->length; i++) {
1102 ElementObject* item = (ElementObject*) self->extra->children[i];
Mark Dickinson211c6252009-02-01 10:28:51 +00001103 if (Element_CheckExact(item) && (PyObject_RichCompareBool(item->tag, tag, Py_EQ) == 1)) {
1104
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001105 PyObject* text = element_get_text(item);
1106 if (text == Py_None)
Eli Bendersky25771b32013-01-13 05:26:07 -08001107 return PyUnicode_New(0, 0);
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001108 Py_XINCREF(text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001109 return text;
1110 }
1111 }
1112
1113 Py_INCREF(default_value);
1114 return default_value;
1115}
1116
1117static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001118element_findall(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001119{
1120 int i;
1121 PyObject* out;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001122 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001123 PyObject* namespaces = Py_None;
Eli Bendersky737b1732012-05-29 06:02:56 +03001124 static char *kwlist[] = {"path", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001125
Eli Bendersky737b1732012-05-29 06:02:56 +03001126 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:findall", kwlist,
1127 &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001128 return NULL;
1129
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001130 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001131 _Py_IDENTIFIER(findall);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001132 return _PyObject_CallMethodId(
1133 elementpath_obj, &PyId_findall, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001134 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001135 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001136
1137 out = PyList_New(0);
1138 if (!out)
1139 return NULL;
1140
1141 if (!self->extra)
1142 return out;
1143
1144 for (i = 0; i < self->extra->length; i++) {
1145 PyObject* item = self->extra->children[i];
1146 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +00001147 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001148 if (PyList_Append(out, item) < 0) {
1149 Py_DECREF(out);
1150 return NULL;
1151 }
1152 }
1153 }
1154
1155 return out;
1156}
1157
1158static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001159element_iterfind(ElementObject *self, PyObject *args, PyObject *kwds)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001160{
1161 PyObject* tag;
1162 PyObject* namespaces = Py_None;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001163 _Py_IDENTIFIER(iterfind);
Eli Bendersky737b1732012-05-29 06:02:56 +03001164 static char *kwlist[] = {"path", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001165
Eli Bendersky737b1732012-05-29 06:02:56 +03001166 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:iterfind", kwlist,
1167 &tag, &namespaces))
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001168 return NULL;
1169
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001170 return _PyObject_CallMethodId(
1171 elementpath_obj, &PyId_iterfind, "OOO", self, tag, namespaces
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001172 );
1173}
1174
1175static PyObject*
Eli Benderskya8736902013-01-05 06:26:39 -08001176element_get(ElementObject* self, PyObject* args, PyObject* kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001177{
1178 PyObject* value;
Eli Benderskya8736902013-01-05 06:26:39 -08001179 static char* kwlist[] = {"key", "default", 0};
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001180
1181 PyObject* key;
1182 PyObject* default_value = Py_None;
Eli Benderskya8736902013-01-05 06:26:39 -08001183
1184 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:get", kwlist, &key,
1185 &default_value))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001186 return NULL;
1187
1188 if (!self->extra || self->extra->attrib == Py_None)
1189 value = default_value;
1190 else {
1191 value = PyDict_GetItem(self->extra->attrib, key);
1192 if (!value)
1193 value = default_value;
1194 }
1195
1196 Py_INCREF(value);
1197 return value;
1198}
1199
1200static PyObject*
1201element_getchildren(ElementObject* self, PyObject* args)
1202{
1203 int i;
1204 PyObject* list;
1205
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001206 /* FIXME: report as deprecated? */
1207
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001208 if (!PyArg_ParseTuple(args, ":getchildren"))
1209 return NULL;
1210
1211 if (!self->extra)
1212 return PyList_New(0);
1213
1214 list = PyList_New(self->extra->length);
1215 if (!list)
1216 return NULL;
1217
1218 for (i = 0; i < self->extra->length; i++) {
1219 PyObject* item = self->extra->children[i];
1220 Py_INCREF(item);
1221 PyList_SET_ITEM(list, i, item);
1222 }
1223
1224 return list;
1225}
1226
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001227
Eli Bendersky64d11e62012-06-15 07:42:50 +03001228static PyObject *
1229create_elementiter(ElementObject *self, PyObject *tag, int gettext);
1230
1231
1232static PyObject *
Eli Benderskya8736902013-01-05 06:26:39 -08001233element_iter(ElementObject *self, PyObject *args, PyObject *kwds)
Eli Bendersky64d11e62012-06-15 07:42:50 +03001234{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001235 PyObject* tag = Py_None;
Eli Benderskya8736902013-01-05 06:26:39 -08001236 static char* kwlist[] = {"tag", 0};
1237
1238 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:iter", kwlist, &tag))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001239 return NULL;
1240
Eli Bendersky64d11e62012-06-15 07:42:50 +03001241 return create_elementiter(self, tag, 0);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001242}
1243
1244
1245static PyObject*
1246element_itertext(ElementObject* self, PyObject* args)
1247{
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001248 if (!PyArg_ParseTuple(args, ":itertext"))
1249 return NULL;
1250
Eli Bendersky64d11e62012-06-15 07:42:50 +03001251 return create_elementiter(self, Py_None, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001252}
1253
Eli Bendersky64d11e62012-06-15 07:42:50 +03001254
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001255static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001256element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001257{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001258 ElementObject* self = (ElementObject*) self_;
1259
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001260 if (!self->extra || index < 0 || index >= self->extra->length) {
1261 PyErr_SetString(
1262 PyExc_IndexError,
1263 "child index out of range"
1264 );
1265 return NULL;
1266 }
1267
1268 Py_INCREF(self->extra->children[index]);
1269 return self->extra->children[index];
1270}
1271
1272static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001273element_insert(ElementObject* self, PyObject* args)
1274{
1275 int i;
1276
1277 int index;
1278 PyObject* element;
1279 if (!PyArg_ParseTuple(args, "iO!:insert", &index,
1280 &Element_Type, &element))
1281 return NULL;
1282
Victor Stinner5f0af232013-07-11 23:01:36 +02001283 if (!self->extra) {
1284 if (create_extra(self, NULL) < 0)
1285 return NULL;
1286 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001287
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001288 if (index < 0) {
1289 index += self->extra->length;
1290 if (index < 0)
1291 index = 0;
1292 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001293 if (index > self->extra->length)
1294 index = self->extra->length;
1295
1296 if (element_resize(self, 1) < 0)
1297 return NULL;
1298
1299 for (i = self->extra->length; i > index; i--)
1300 self->extra->children[i] = self->extra->children[i-1];
1301
1302 Py_INCREF(element);
1303 self->extra->children[index] = element;
1304
1305 self->extra->length++;
1306
1307 Py_RETURN_NONE;
1308}
1309
1310static PyObject*
1311element_items(ElementObject* self, PyObject* args)
1312{
1313 if (!PyArg_ParseTuple(args, ":items"))
1314 return NULL;
1315
1316 if (!self->extra || self->extra->attrib == Py_None)
1317 return PyList_New(0);
1318
1319 return PyDict_Items(self->extra->attrib);
1320}
1321
1322static PyObject*
1323element_keys(ElementObject* self, PyObject* args)
1324{
1325 if (!PyArg_ParseTuple(args, ":keys"))
1326 return NULL;
1327
1328 if (!self->extra || self->extra->attrib == Py_None)
1329 return PyList_New(0);
1330
1331 return PyDict_Keys(self->extra->attrib);
1332}
1333
Martin v. Löwis18e16552006-02-15 17:27:45 +00001334static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001335element_length(ElementObject* self)
1336{
1337 if (!self->extra)
1338 return 0;
1339
1340 return self->extra->length;
1341}
1342
1343static PyObject*
1344element_makeelement(PyObject* self, PyObject* args, PyObject* kw)
1345{
1346 PyObject* elem;
1347
1348 PyObject* tag;
1349 PyObject* attrib;
1350 if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib))
1351 return NULL;
1352
1353 attrib = PyDict_Copy(attrib);
1354 if (!attrib)
1355 return NULL;
1356
Eli Bendersky092af1f2012-03-04 07:14:03 +02001357 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001358
1359 Py_DECREF(attrib);
1360
1361 return elem;
1362}
1363
1364static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001365element_remove(ElementObject* self, PyObject* args)
1366{
1367 int i;
1368
1369 PyObject* element;
1370 if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element))
1371 return NULL;
1372
1373 if (!self->extra) {
1374 /* element has no children, so raise exception */
1375 PyErr_SetString(
1376 PyExc_ValueError,
1377 "list.remove(x): x not in list"
1378 );
1379 return NULL;
1380 }
1381
1382 for (i = 0; i < self->extra->length; i++) {
1383 if (self->extra->children[i] == element)
1384 break;
Mark Dickinson211c6252009-02-01 10:28:51 +00001385 if (PyObject_RichCompareBool(self->extra->children[i], element, Py_EQ) == 1)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001386 break;
1387 }
1388
1389 if (i == self->extra->length) {
1390 /* element is not in children, so raise exception */
1391 PyErr_SetString(
1392 PyExc_ValueError,
1393 "list.remove(x): x not in list"
1394 );
1395 return NULL;
1396 }
1397
1398 Py_DECREF(self->extra->children[i]);
1399
1400 self->extra->length--;
1401
1402 for (; i < self->extra->length; i++)
1403 self->extra->children[i] = self->extra->children[i+1];
1404
1405 Py_RETURN_NONE;
1406}
1407
1408static PyObject*
1409element_repr(ElementObject* self)
1410{
Eli Bendersky092af1f2012-03-04 07:14:03 +02001411 if (self->tag)
1412 return PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1413 else
1414 return PyUnicode_FromFormat("<Element at %p>", self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001415}
1416
1417static PyObject*
1418element_set(ElementObject* self, PyObject* args)
1419{
1420 PyObject* attrib;
1421
1422 PyObject* key;
1423 PyObject* value;
1424 if (!PyArg_ParseTuple(args, "OO:set", &key, &value))
1425 return NULL;
1426
Victor Stinner5f0af232013-07-11 23:01:36 +02001427 if (!self->extra) {
1428 if (create_extra(self, NULL) < 0)
1429 return NULL;
1430 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001431
1432 attrib = element_get_attrib(self);
1433 if (!attrib)
1434 return NULL;
1435
1436 if (PyDict_SetItem(attrib, key, value) < 0)
1437 return NULL;
1438
1439 Py_RETURN_NONE;
1440}
1441
1442static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001443element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001444{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001445 ElementObject* self = (ElementObject*) self_;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001446 int i;
1447 PyObject* old;
1448
1449 if (!self->extra || index < 0 || index >= self->extra->length) {
1450 PyErr_SetString(
1451 PyExc_IndexError,
1452 "child assignment index out of range");
1453 return -1;
1454 }
1455
1456 old = self->extra->children[index];
1457
1458 if (item) {
1459 Py_INCREF(item);
1460 self->extra->children[index] = item;
1461 } else {
1462 self->extra->length--;
1463 for (i = index; i < self->extra->length; i++)
1464 self->extra->children[i] = self->extra->children[i+1];
1465 }
1466
1467 Py_DECREF(old);
1468
1469 return 0;
1470}
1471
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001472static PyObject*
1473element_subscr(PyObject* self_, PyObject* item)
1474{
1475 ElementObject* self = (ElementObject*) self_;
1476
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001477 if (PyIndex_Check(item)) {
1478 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001479
1480 if (i == -1 && PyErr_Occurred()) {
1481 return NULL;
1482 }
1483 if (i < 0 && self->extra)
1484 i += self->extra->length;
1485 return element_getitem(self_, i);
1486 }
1487 else if (PySlice_Check(item)) {
1488 Py_ssize_t start, stop, step, slicelen, cur, i;
1489 PyObject* list;
1490
1491 if (!self->extra)
1492 return PyList_New(0);
1493
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001494 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001495 self->extra->length,
1496 &start, &stop, &step, &slicelen) < 0) {
1497 return NULL;
1498 }
1499
1500 if (slicelen <= 0)
1501 return PyList_New(0);
1502 else {
1503 list = PyList_New(slicelen);
1504 if (!list)
1505 return NULL;
1506
1507 for (cur = start, i = 0; i < slicelen;
1508 cur += step, i++) {
1509 PyObject* item = self->extra->children[cur];
1510 Py_INCREF(item);
1511 PyList_SET_ITEM(list, i, item);
1512 }
1513
1514 return list;
1515 }
1516 }
1517 else {
1518 PyErr_SetString(PyExc_TypeError,
1519 "element indices must be integers");
1520 return NULL;
1521 }
1522}
1523
1524static int
1525element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1526{
1527 ElementObject* self = (ElementObject*) self_;
1528
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001529 if (PyIndex_Check(item)) {
1530 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001531
1532 if (i == -1 && PyErr_Occurred()) {
1533 return -1;
1534 }
1535 if (i < 0 && self->extra)
1536 i += self->extra->length;
1537 return element_setitem(self_, i, value);
1538 }
1539 else if (PySlice_Check(item)) {
1540 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1541
1542 PyObject* recycle = NULL;
1543 PyObject* seq = NULL;
1544
Victor Stinner5f0af232013-07-11 23:01:36 +02001545 if (!self->extra) {
1546 if (create_extra(self, NULL) < 0)
1547 return -1;
1548 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001549
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001550 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001551 self->extra->length,
1552 &start, &stop, &step, &slicelen) < 0) {
1553 return -1;
1554 }
1555
Eli Bendersky865756a2012-03-09 13:38:15 +02001556 if (value == NULL) {
1557 /* Delete slice */
1558 size_t cur;
1559 Py_ssize_t i;
1560
1561 if (slicelen <= 0)
1562 return 0;
1563
1564 /* Since we're deleting, the direction of the range doesn't matter,
1565 * so for simplicity make it always ascending.
1566 */
1567 if (step < 0) {
1568 stop = start + 1;
1569 start = stop + step * (slicelen - 1) - 1;
1570 step = -step;
1571 }
1572
1573 assert((size_t)slicelen <= PY_SIZE_MAX / sizeof(PyObject *));
1574
1575 /* recycle is a list that will contain all the children
1576 * scheduled for removal.
1577 */
1578 if (!(recycle = PyList_New(slicelen))) {
1579 PyErr_NoMemory();
1580 return -1;
1581 }
1582
1583 /* This loop walks over all the children that have to be deleted,
1584 * with cur pointing at them. num_moved is the amount of children
1585 * until the next deleted child that have to be "shifted down" to
1586 * occupy the deleted's places.
1587 * Note that in the ith iteration, shifting is done i+i places down
1588 * because i children were already removed.
1589 */
1590 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1591 /* Compute how many children have to be moved, clipping at the
1592 * list end.
1593 */
1594 Py_ssize_t num_moved = step - 1;
1595 if (cur + step >= (size_t)self->extra->length) {
1596 num_moved = self->extra->length - cur - 1;
1597 }
1598
1599 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1600
1601 memmove(
1602 self->extra->children + cur - i,
1603 self->extra->children + cur + 1,
1604 num_moved * sizeof(PyObject *));
1605 }
1606
1607 /* Leftover "tail" after the last removed child */
1608 cur = start + (size_t)slicelen * step;
1609 if (cur < (size_t)self->extra->length) {
1610 memmove(
1611 self->extra->children + cur - slicelen,
1612 self->extra->children + cur,
1613 (self->extra->length - cur) * sizeof(PyObject *));
1614 }
1615
1616 self->extra->length -= slicelen;
1617
1618 /* Discard the recycle list with all the deleted sub-elements */
1619 Py_XDECREF(recycle);
1620 return 0;
1621 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001622 else {
Eli Bendersky865756a2012-03-09 13:38:15 +02001623 /* A new slice is actually being assigned */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001624 seq = PySequence_Fast(value, "");
1625 if (!seq) {
1626 PyErr_Format(
1627 PyExc_TypeError,
1628 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1629 );
1630 return -1;
1631 }
1632 newlen = PySequence_Size(seq);
1633 }
1634
1635 if (step != 1 && newlen != slicelen)
1636 {
1637 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001638 "attempt to assign sequence of size %zd "
1639 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001640 newlen, slicelen
1641 );
1642 return -1;
1643 }
1644
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001645 /* Resize before creating the recycle bin, to prevent refleaks. */
1646 if (newlen > slicelen) {
1647 if (element_resize(self, newlen - slicelen) < 0) {
1648 if (seq) {
1649 Py_DECREF(seq);
1650 }
1651 return -1;
1652 }
1653 }
1654
1655 if (slicelen > 0) {
1656 /* to avoid recursive calls to this method (via decref), move
1657 old items to the recycle bin here, and get rid of them when
1658 we're done modifying the element */
1659 recycle = PyList_New(slicelen);
1660 if (!recycle) {
1661 if (seq) {
1662 Py_DECREF(seq);
1663 }
1664 return -1;
1665 }
1666 for (cur = start, i = 0; i < slicelen;
1667 cur += step, i++)
1668 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1669 }
1670
1671 if (newlen < slicelen) {
1672 /* delete slice */
1673 for (i = stop; i < self->extra->length; i++)
1674 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1675 } else if (newlen > slicelen) {
1676 /* insert slice */
1677 for (i = self->extra->length-1; i >= stop; i--)
1678 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1679 }
1680
1681 /* replace the slice */
1682 for (cur = start, i = 0; i < newlen;
1683 cur += step, i++) {
1684 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1685 Py_INCREF(element);
1686 self->extra->children[cur] = element;
1687 }
1688
1689 self->extra->length += newlen - slicelen;
1690
1691 if (seq) {
1692 Py_DECREF(seq);
1693 }
1694
1695 /* discard the recycle bin, and everything in it */
1696 Py_XDECREF(recycle);
1697
1698 return 0;
1699 }
1700 else {
1701 PyErr_SetString(PyExc_TypeError,
1702 "element indices must be integers");
1703 return -1;
1704 }
1705}
1706
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001707static PyMethodDef element_methods[] = {
1708
Eli Bendersky0192ba32012-03-30 16:38:33 +03001709 {"clear", (PyCFunction) element_clearmethod, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001710
Eli Benderskya8736902013-01-05 06:26:39 -08001711 {"get", (PyCFunction) element_get, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001712 {"set", (PyCFunction) element_set, METH_VARARGS},
1713
Eli Bendersky737b1732012-05-29 06:02:56 +03001714 {"find", (PyCFunction) element_find, METH_VARARGS | METH_KEYWORDS},
1715 {"findtext", (PyCFunction) element_findtext, METH_VARARGS | METH_KEYWORDS},
1716 {"findall", (PyCFunction) element_findall, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001717
1718 {"append", (PyCFunction) element_append, METH_VARARGS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001719 {"extend", (PyCFunction) element_extend, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001720 {"insert", (PyCFunction) element_insert, METH_VARARGS},
1721 {"remove", (PyCFunction) element_remove, METH_VARARGS},
1722
Eli Benderskya8736902013-01-05 06:26:39 -08001723 {"iter", (PyCFunction) element_iter, METH_VARARGS | METH_KEYWORDS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001724 {"itertext", (PyCFunction) element_itertext, METH_VARARGS},
Eli Bendersky737b1732012-05-29 06:02:56 +03001725 {"iterfind", (PyCFunction) element_iterfind, METH_VARARGS | METH_KEYWORDS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001726
Eli Benderskya8736902013-01-05 06:26:39 -08001727 {"getiterator", (PyCFunction) element_iter, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001728 {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS},
1729
1730 {"items", (PyCFunction) element_items, METH_VARARGS},
1731 {"keys", (PyCFunction) element_keys, METH_VARARGS},
1732
1733 {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS},
1734
1735 {"__copy__", (PyCFunction) element_copy, METH_VARARGS},
1736 {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS},
Martin v. Löwisbce16662012-06-17 10:41:22 +02001737 {"__sizeof__", element_sizeof, METH_NOARGS},
Eli Bendersky698bdb22013-01-10 06:01:06 -08001738 {"__getstate__", (PyCFunction)element_getstate, METH_NOARGS},
1739 {"__setstate__", (PyCFunction)element_setstate, METH_O},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001740
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001741 {NULL, NULL}
1742};
1743
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001744static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001745element_getattro(ElementObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001746{
1747 PyObject* res;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001748 char *name = "";
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001749
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001750 if (PyUnicode_Check(nameobj))
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001751 name = _PyUnicode_AsString(nameobj);
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001752
Alexander Belopolskye239d232010-12-08 23:31:48 +00001753 if (name == NULL)
1754 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001755
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001756 /* handle common attributes first */
1757 if (strcmp(name, "tag") == 0) {
1758 res = self->tag;
1759 Py_INCREF(res);
1760 return res;
1761 } else if (strcmp(name, "text") == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001762 res = element_get_text(self);
Victor Stinner71c8b7e2013-07-11 23:08:39 +02001763 Py_XINCREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001764 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001765 }
1766
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001767 /* methods */
1768 res = PyObject_GenericGetAttr((PyObject*) self, nameobj);
1769 if (res)
1770 return res;
1771
1772 /* less common attributes */
1773 if (strcmp(name, "tail") == 0) {
1774 PyErr_Clear();
1775 res = element_get_tail(self);
1776 } else if (strcmp(name, "attrib") == 0) {
1777 PyErr_Clear();
Victor Stinner5f0af232013-07-11 23:01:36 +02001778 if (!self->extra) {
1779 if (create_extra(self, NULL) < 0)
1780 return NULL;
1781 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001782 res = element_get_attrib(self);
1783 }
1784
1785 if (!res)
1786 return NULL;
1787
1788 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001789 return res;
1790}
1791
Eli Benderskyef9683b2013-05-18 07:52:34 -07001792static int
Eli Benderskyb20df952012-05-20 06:33:29 +03001793element_setattro(ElementObject* self, PyObject* nameobj, PyObject* value)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001794{
Eli Benderskyb20df952012-05-20 06:33:29 +03001795 char *name = "";
1796 if (PyUnicode_Check(nameobj))
1797 name = _PyUnicode_AsString(nameobj);
Victor Stinner4d463432013-07-11 23:05:03 +02001798 if (name == NULL)
Eli Benderskyef9683b2013-05-18 07:52:34 -07001799 return -1;
Victor Stinner4d463432013-07-11 23:05:03 +02001800
1801 if (strcmp(name, "tag") == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001802 Py_DECREF(self->tag);
1803 self->tag = value;
1804 Py_INCREF(self->tag);
1805 } else if (strcmp(name, "text") == 0) {
1806 Py_DECREF(JOIN_OBJ(self->text));
1807 self->text = value;
1808 Py_INCREF(self->text);
1809 } else if (strcmp(name, "tail") == 0) {
1810 Py_DECREF(JOIN_OBJ(self->tail));
1811 self->tail = value;
1812 Py_INCREF(self->tail);
1813 } else if (strcmp(name, "attrib") == 0) {
Victor Stinner5f0af232013-07-11 23:01:36 +02001814 if (!self->extra) {
1815 if (create_extra(self, NULL) < 0)
1816 return -1;
1817 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001818 Py_DECREF(self->extra->attrib);
1819 self->extra->attrib = value;
1820 Py_INCREF(self->extra->attrib);
1821 } else {
Eli Benderskyef9683b2013-05-18 07:52:34 -07001822 PyErr_SetString(PyExc_AttributeError,
Eli Bendersky6a55dc32013-05-19 16:59:59 -07001823 "Can't set arbitrary attributes on Element");
Eli Benderskyef9683b2013-05-18 07:52:34 -07001824 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001825 }
1826
Eli Benderskyef9683b2013-05-18 07:52:34 -07001827 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001828}
1829
1830static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001831 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001832 0, /* sq_concat */
1833 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001834 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001835 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001836 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001837 0,
1838};
1839
1840static PyMappingMethods element_as_mapping = {
1841 (lenfunc) element_length,
1842 (binaryfunc) element_subscr,
1843 (objobjargproc) element_ass_subscr,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001844};
1845
Neal Norwitz227b5332006-03-22 09:28:35 +00001846static PyTypeObject Element_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001847 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08001848 "xml.etree.ElementTree.Element", sizeof(ElementObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001849 /* methods */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001850 (destructor)element_dealloc, /* tp_dealloc */
1851 0, /* tp_print */
1852 0, /* tp_getattr */
Eli Benderskyb20df952012-05-20 06:33:29 +03001853 0, /* tp_setattr */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001854 0, /* tp_reserved */
1855 (reprfunc)element_repr, /* tp_repr */
1856 0, /* tp_as_number */
1857 &element_as_sequence, /* tp_as_sequence */
1858 &element_as_mapping, /* tp_as_mapping */
1859 0, /* tp_hash */
1860 0, /* tp_call */
1861 0, /* tp_str */
1862 (getattrofunc)element_getattro, /* tp_getattro */
Eli Benderskyb20df952012-05-20 06:33:29 +03001863 (setattrofunc)element_setattro, /* tp_setattro */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001864 0, /* tp_as_buffer */
Eli Bendersky0192ba32012-03-30 16:38:33 +03001865 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
1866 /* tp_flags */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001867 0, /* tp_doc */
Eli Bendersky0192ba32012-03-30 16:38:33 +03001868 (traverseproc)element_gc_traverse, /* tp_traverse */
1869 (inquiry)element_gc_clear, /* tp_clear */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001870 0, /* tp_richcompare */
Eli Benderskyebf37a22012-04-03 22:02:37 +03001871 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001872 0, /* tp_iter */
1873 0, /* tp_iternext */
1874 element_methods, /* tp_methods */
1875 0, /* tp_members */
1876 0, /* tp_getset */
1877 0, /* tp_base */
1878 0, /* tp_dict */
1879 0, /* tp_descr_get */
1880 0, /* tp_descr_set */
1881 0, /* tp_dictoffset */
1882 (initproc)element_init, /* tp_init */
1883 PyType_GenericAlloc, /* tp_alloc */
1884 element_new, /* tp_new */
1885 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001886};
1887
Eli Bendersky64d11e62012-06-15 07:42:50 +03001888/******************************* Element iterator ****************************/
1889
1890/* ElementIterObject represents the iteration state over an XML element in
1891 * pre-order traversal. To keep track of which sub-element should be returned
1892 * next, a stack of parents is maintained. This is a standard stack-based
1893 * iterative pre-order traversal of a tree.
1894 * The stack is managed using a single-linked list starting at parent_stack.
1895 * Each stack node contains the saved parent to which we should return after
1896 * the current one is exhausted, and the next child to examine in that parent.
1897 */
1898typedef struct ParentLocator_t {
1899 ElementObject *parent;
1900 Py_ssize_t child_index;
1901 struct ParentLocator_t *next;
1902} ParentLocator;
1903
1904typedef struct {
1905 PyObject_HEAD
1906 ParentLocator *parent_stack;
1907 ElementObject *root_element;
1908 PyObject *sought_tag;
1909 int root_done;
1910 int gettext;
1911} ElementIterObject;
1912
1913
1914static void
1915elementiter_dealloc(ElementIterObject *it)
1916{
1917 ParentLocator *p = it->parent_stack;
1918 while (p) {
1919 ParentLocator *temp = p;
1920 Py_XDECREF(p->parent);
1921 p = p->next;
1922 PyObject_Free(temp);
1923 }
1924
1925 Py_XDECREF(it->sought_tag);
1926 Py_XDECREF(it->root_element);
1927
1928 PyObject_GC_UnTrack(it);
1929 PyObject_GC_Del(it);
1930}
1931
1932static int
1933elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
1934{
1935 ParentLocator *p = it->parent_stack;
1936 while (p) {
1937 Py_VISIT(p->parent);
1938 p = p->next;
1939 }
1940
1941 Py_VISIT(it->root_element);
1942 Py_VISIT(it->sought_tag);
1943 return 0;
1944}
1945
1946/* Helper function for elementiter_next. Add a new parent to the parent stack.
1947 */
1948static ParentLocator *
1949parent_stack_push_new(ParentLocator *stack, ElementObject *parent)
1950{
1951 ParentLocator *new_node = PyObject_Malloc(sizeof(ParentLocator));
1952 if (new_node) {
1953 new_node->parent = parent;
1954 Py_INCREF(parent);
1955 new_node->child_index = 0;
1956 new_node->next = stack;
1957 }
1958 return new_node;
1959}
1960
1961static PyObject *
1962elementiter_next(ElementIterObject *it)
1963{
1964 /* Sub-element iterator.
Eli Bendersky45839902013-01-13 05:14:47 -08001965 *
Eli Bendersky64d11e62012-06-15 07:42:50 +03001966 * A short note on gettext: this function serves both the iter() and
1967 * itertext() methods to avoid code duplication. However, there are a few
1968 * small differences in the way these iterations work. Namely:
1969 * - itertext() only yields text from nodes that have it, and continues
1970 * iterating when a node doesn't have text (so it doesn't return any
1971 * node like iter())
1972 * - itertext() also has to handle tail, after finishing with all the
1973 * children of a node.
1974 */
Eli Bendersky113da642012-06-15 07:52:49 +03001975 ElementObject *cur_parent;
1976 Py_ssize_t child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03001977
1978 while (1) {
1979 /* Handle the case reached in the beginning and end of iteration, where
1980 * the parent stack is empty. The root_done flag gives us indication
1981 * whether we've just started iterating (so root_done is 0), in which
1982 * case the root is returned. If root_done is 1 and we're here, the
1983 * iterator is exhausted.
1984 */
1985 if (!it->parent_stack->parent) {
1986 if (it->root_done) {
1987 PyErr_SetNone(PyExc_StopIteration);
1988 return NULL;
1989 } else {
1990 it->parent_stack = parent_stack_push_new(it->parent_stack,
1991 it->root_element);
1992 if (!it->parent_stack) {
1993 PyErr_NoMemory();
1994 return NULL;
1995 }
1996
1997 it->root_done = 1;
1998 if (it->sought_tag == Py_None ||
1999 PyObject_RichCompareBool(it->root_element->tag,
2000 it->sought_tag, Py_EQ) == 1) {
2001 if (it->gettext) {
Eli Benderskye6174ca2013-01-10 06:27:53 -08002002 PyObject *text = element_get_text(it->root_element);
2003 if (!text)
2004 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002005 if (PyObject_IsTrue(text)) {
2006 Py_INCREF(text);
2007 return text;
2008 }
2009 } else {
2010 Py_INCREF(it->root_element);
2011 return (PyObject *)it->root_element;
2012 }
2013 }
2014 }
2015 }
2016
2017 /* See if there are children left to traverse in the current parent. If
2018 * yes, visit the next child. If not, pop the stack and try again.
2019 */
Eli Bendersky113da642012-06-15 07:52:49 +03002020 cur_parent = it->parent_stack->parent;
2021 child_index = it->parent_stack->child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002022 if (cur_parent->extra && child_index < cur_parent->extra->length) {
2023 ElementObject *child = (ElementObject *)
2024 cur_parent->extra->children[child_index];
2025 it->parent_stack->child_index++;
2026 it->parent_stack = parent_stack_push_new(it->parent_stack,
2027 child);
2028 if (!it->parent_stack) {
2029 PyErr_NoMemory();
2030 return NULL;
2031 }
2032
2033 if (it->gettext) {
Eli Benderskye6174ca2013-01-10 06:27:53 -08002034 PyObject *text = element_get_text(child);
2035 if (!text)
2036 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002037 if (PyObject_IsTrue(text)) {
2038 Py_INCREF(text);
2039 return text;
2040 }
2041 } else if (it->sought_tag == Py_None ||
2042 PyObject_RichCompareBool(child->tag,
2043 it->sought_tag, Py_EQ) == 1) {
2044 Py_INCREF(child);
2045 return (PyObject *)child;
2046 }
2047 else
2048 continue;
2049 }
2050 else {
Eli Benderskye6174ca2013-01-10 06:27:53 -08002051 PyObject *tail;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002052 ParentLocator *next = it->parent_stack->next;
Eli Benderskye6174ca2013-01-10 06:27:53 -08002053 if (it->gettext) {
2054 tail = element_get_tail(cur_parent);
2055 if (!tail)
2056 return NULL;
2057 }
2058 else
2059 tail = Py_None;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002060 Py_XDECREF(it->parent_stack->parent);
2061 PyObject_Free(it->parent_stack);
2062 it->parent_stack = next;
2063
2064 /* Note that extra condition on it->parent_stack->parent here;
2065 * this is because itertext() is supposed to only return *inner*
2066 * text, not text following the element it began iteration with.
2067 */
2068 if (it->parent_stack->parent && PyObject_IsTrue(tail)) {
2069 Py_INCREF(tail);
2070 return tail;
2071 }
2072 }
2073 }
2074
2075 return NULL;
2076}
2077
2078
2079static PyTypeObject ElementIter_Type = {
2080 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002081 /* Using the module's name since the pure-Python implementation does not
2082 have such a type. */
Eli Bendersky64d11e62012-06-15 07:42:50 +03002083 "_elementtree._element_iterator", /* tp_name */
2084 sizeof(ElementIterObject), /* tp_basicsize */
2085 0, /* tp_itemsize */
2086 /* methods */
2087 (destructor)elementiter_dealloc, /* tp_dealloc */
2088 0, /* tp_print */
2089 0, /* tp_getattr */
2090 0, /* tp_setattr */
2091 0, /* tp_reserved */
2092 0, /* tp_repr */
2093 0, /* tp_as_number */
2094 0, /* tp_as_sequence */
2095 0, /* tp_as_mapping */
2096 0, /* tp_hash */
2097 0, /* tp_call */
2098 0, /* tp_str */
2099 0, /* tp_getattro */
2100 0, /* tp_setattro */
2101 0, /* tp_as_buffer */
2102 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2103 0, /* tp_doc */
2104 (traverseproc)elementiter_traverse, /* tp_traverse */
2105 0, /* tp_clear */
2106 0, /* tp_richcompare */
2107 0, /* tp_weaklistoffset */
2108 PyObject_SelfIter, /* tp_iter */
2109 (iternextfunc)elementiter_next, /* tp_iternext */
2110 0, /* tp_methods */
2111 0, /* tp_members */
2112 0, /* tp_getset */
2113 0, /* tp_base */
2114 0, /* tp_dict */
2115 0, /* tp_descr_get */
2116 0, /* tp_descr_set */
2117 0, /* tp_dictoffset */
2118 0, /* tp_init */
2119 0, /* tp_alloc */
2120 0, /* tp_new */
2121};
2122
2123
2124static PyObject *
2125create_elementiter(ElementObject *self, PyObject *tag, int gettext)
2126{
2127 ElementIterObject *it;
2128 PyObject *star = NULL;
2129
2130 it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
2131 if (!it)
2132 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002133
2134 if (PyUnicode_Check(tag))
2135 star = PyUnicode_FromString("*");
2136 else if (PyBytes_Check(tag))
2137 star = PyBytes_FromString("*");
2138
2139 if (star && PyObject_RichCompareBool(tag, star, Py_EQ) == 1)
2140 tag = Py_None;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002141 Py_XDECREF(star);
Victor Stinner4d463432013-07-11 23:05:03 +02002142
2143 Py_INCREF(tag);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002144 it->sought_tag = tag;
2145 it->root_done = 0;
2146 it->gettext = gettext;
Victor Stinner4d463432013-07-11 23:05:03 +02002147 Py_INCREF(self);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002148 it->root_element = self;
2149
Eli Bendersky64d11e62012-06-15 07:42:50 +03002150 PyObject_GC_Track(it);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002151
2152 it->parent_stack = PyObject_Malloc(sizeof(ParentLocator));
2153 if (it->parent_stack == NULL) {
2154 Py_DECREF(it);
2155 PyErr_NoMemory();
2156 return NULL;
2157 }
2158 it->parent_stack->parent = NULL;
2159 it->parent_stack->child_index = 0;
2160 it->parent_stack->next = NULL;
2161
Eli Bendersky64d11e62012-06-15 07:42:50 +03002162 return (PyObject *)it;
2163}
2164
2165
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002166/* ==================================================================== */
2167/* the tree builder type */
2168
2169typedef struct {
2170 PyObject_HEAD
2171
Eli Bendersky58d548d2012-05-29 15:45:16 +03002172 PyObject *root; /* root node (first created node) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002173
Antoine Pitrouee329312012-10-04 19:53:29 +02002174 PyObject *this; /* current node */
2175 PyObject *last; /* most recently created node */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002176
Eli Bendersky58d548d2012-05-29 15:45:16 +03002177 PyObject *data; /* data collector (string or list), or NULL */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002178
Eli Bendersky58d548d2012-05-29 15:45:16 +03002179 PyObject *stack; /* element stack */
2180 Py_ssize_t index; /* current stack size (0 means empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002181
Eli Bendersky48d358b2012-05-30 17:57:50 +03002182 PyObject *element_factory;
2183
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002184 /* element tracing */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002185 PyObject *events; /* list of events, or NULL if not collecting */
2186 PyObject *start_event_obj; /* event objects (NULL to ignore) */
2187 PyObject *end_event_obj;
2188 PyObject *start_ns_event_obj;
2189 PyObject *end_ns_event_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002190} TreeBuilderObject;
2191
Christian Heimes90aa7642007-12-19 02:45:37 +00002192#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002193
2194/* -------------------------------------------------------------------- */
2195/* constructor and destructor */
2196
Eli Bendersky58d548d2012-05-29 15:45:16 +03002197static PyObject *
2198treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002199{
Eli Bendersky58d548d2012-05-29 15:45:16 +03002200 TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
2201 if (t != NULL) {
2202 t->root = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002203
Eli Bendersky58d548d2012-05-29 15:45:16 +03002204 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002205 t->this = Py_None;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002206 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002207 t->last = Py_None;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002208
Eli Bendersky58d548d2012-05-29 15:45:16 +03002209 t->data = NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002210 t->element_factory = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002211 t->stack = PyList_New(20);
2212 if (!t->stack) {
2213 Py_DECREF(t->this);
2214 Py_DECREF(t->last);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002215 Py_DECREF((PyObject *) t);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002216 return NULL;
2217 }
2218 t->index = 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002219
Eli Bendersky58d548d2012-05-29 15:45:16 +03002220 t->events = NULL;
2221 t->start_event_obj = t->end_event_obj = NULL;
2222 t->start_ns_event_obj = t->end_ns_event_obj = NULL;
2223 }
2224 return (PyObject *)t;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002225}
2226
Eli Bendersky58d548d2012-05-29 15:45:16 +03002227static int
2228treebuilder_init(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002229{
Eli Benderskyc68e1362012-06-03 06:09:42 +03002230 static char *kwlist[] = {"element_factory", 0};
Eli Bendersky48d358b2012-05-30 17:57:50 +03002231 PyObject *element_factory = NULL;
2232 TreeBuilderObject *self_tb = (TreeBuilderObject *)self;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002233 PyObject *tmp;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002234
2235 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:TreeBuilder", kwlist,
2236 &element_factory)) {
2237 return -1;
2238 }
2239
2240 if (element_factory) {
2241 Py_INCREF(element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002242 tmp = self_tb->element_factory;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002243 self_tb->element_factory = element_factory;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002244 Py_XDECREF(tmp);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002245 }
2246
Eli Bendersky58d548d2012-05-29 15:45:16 +03002247 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002248}
2249
Eli Bendersky48d358b2012-05-30 17:57:50 +03002250static int
2251treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
2252{
2253 Py_VISIT(self->root);
2254 Py_VISIT(self->this);
2255 Py_VISIT(self->last);
2256 Py_VISIT(self->data);
2257 Py_VISIT(self->stack);
2258 Py_VISIT(self->element_factory);
2259 return 0;
2260}
2261
2262static int
2263treebuilder_gc_clear(TreeBuilderObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002264{
Antoine Pitrouc1948842012-10-01 23:40:37 +02002265 Py_CLEAR(self->end_ns_event_obj);
2266 Py_CLEAR(self->start_ns_event_obj);
2267 Py_CLEAR(self->end_event_obj);
2268 Py_CLEAR(self->start_event_obj);
2269 Py_CLEAR(self->events);
2270 Py_CLEAR(self->stack);
2271 Py_CLEAR(self->data);
2272 Py_CLEAR(self->last);
2273 Py_CLEAR(self->this);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002274 Py_CLEAR(self->element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002275 Py_CLEAR(self->root);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002276 return 0;
2277}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002278
Eli Bendersky48d358b2012-05-30 17:57:50 +03002279static void
2280treebuilder_dealloc(TreeBuilderObject *self)
2281{
2282 PyObject_GC_UnTrack(self);
2283 treebuilder_gc_clear(self);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002284 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002285}
2286
2287/* -------------------------------------------------------------------- */
Antoine Pitrouee329312012-10-04 19:53:29 +02002288/* helpers for handling of arbitrary element-like objects */
2289
2290static int
2291treebuilder_set_element_text_or_tail(PyObject *element, PyObject *data,
2292 PyObject **dest, _Py_Identifier *name)
2293{
2294 if (Element_CheckExact(element)) {
2295 Py_DECREF(JOIN_OBJ(*dest));
2296 *dest = JOIN_SET(data, PyList_CheckExact(data));
2297 return 0;
2298 }
2299 else {
2300 PyObject *joined = list_join(data);
2301 int r;
2302 if (joined == NULL)
2303 return -1;
2304 r = _PyObject_SetAttrId(element, name, joined);
2305 Py_DECREF(joined);
2306 return r;
2307 }
2308}
2309
2310/* These two functions steal a reference to data */
2311static int
2312treebuilder_set_element_text(PyObject *element, PyObject *data)
2313{
2314 _Py_IDENTIFIER(text);
2315 return treebuilder_set_element_text_or_tail(
2316 element, data, &((ElementObject *) element)->text, &PyId_text);
2317}
2318
2319static int
2320treebuilder_set_element_tail(PyObject *element, PyObject *data)
2321{
2322 _Py_IDENTIFIER(tail);
2323 return treebuilder_set_element_text_or_tail(
2324 element, data, &((ElementObject *) element)->tail, &PyId_tail);
2325}
2326
2327static int
2328treebuilder_add_subelement(PyObject *element, PyObject *child)
2329{
2330 _Py_IDENTIFIER(append);
2331 if (Element_CheckExact(element)) {
2332 ElementObject *elem = (ElementObject *) element;
2333 return element_add_subelement(elem, child);
2334 }
2335 else {
2336 PyObject *res;
2337 res = _PyObject_CallMethodId(element, &PyId_append, "O", child);
2338 if (res == NULL)
2339 return -1;
2340 Py_DECREF(res);
2341 return 0;
2342 }
2343}
2344
2345/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002346/* handlers */
2347
2348LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002349treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
2350 PyObject* attrib)
2351{
2352 PyObject* node;
2353 PyObject* this;
2354
2355 if (self->data) {
2356 if (self->this == self->last) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002357 if (treebuilder_set_element_text(self->last, self->data))
2358 return NULL;
2359 }
2360 else {
2361 if (treebuilder_set_element_tail(self->last, self->data))
2362 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002363 }
2364 self->data = NULL;
2365 }
2366
Eli Bendersky08231a92013-05-18 15:47:16 -07002367 if (self->element_factory && self->element_factory != Py_None) {
Eli Bendersky48d358b2012-05-30 17:57:50 +03002368 node = PyObject_CallFunction(self->element_factory, "OO", tag, attrib);
2369 } else {
2370 node = create_new_element(tag, attrib);
2371 }
2372 if (!node) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002373 return NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002374 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002375
Antoine Pitrouee329312012-10-04 19:53:29 +02002376 this = self->this;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002377
2378 if (this != Py_None) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002379 if (treebuilder_add_subelement(this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002380 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002381 } else {
2382 if (self->root) {
2383 PyErr_SetString(
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002384 elementtree_parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002385 "multiple elements on top level"
2386 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002387 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002388 }
2389 Py_INCREF(node);
2390 self->root = node;
2391 }
2392
2393 if (self->index < PyList_GET_SIZE(self->stack)) {
2394 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002395 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002396 Py_INCREF(this);
2397 } else {
2398 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002399 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002400 }
2401 self->index++;
2402
2403 Py_DECREF(this);
2404 Py_INCREF(node);
Antoine Pitrouee329312012-10-04 19:53:29 +02002405 self->this = node;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002406
2407 Py_DECREF(self->last);
2408 Py_INCREF(node);
Antoine Pitrouee329312012-10-04 19:53:29 +02002409 self->last = node;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002410
2411 if (self->start_event_obj) {
2412 PyObject* res;
2413 PyObject* action = self->start_event_obj;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002414 res = PyTuple_Pack(2, action, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002415 if (res) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002416 PyList_Append(self->events, res);
2417 Py_DECREF(res);
2418 } else
2419 PyErr_Clear(); /* FIXME: propagate error */
2420 }
2421
2422 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002423
2424 error:
2425 Py_DECREF(node);
2426 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002427}
2428
2429LOCAL(PyObject*)
2430treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
2431{
2432 if (!self->data) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002433 if (self->last == Py_None) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00002434 /* ignore calls to data before the first call to start */
2435 Py_RETURN_NONE;
2436 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002437 /* store the first item as is */
2438 Py_INCREF(data); self->data = data;
2439 } else {
2440 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00002441 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
2442 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002443 /* XXX this code path unused in Python 3? */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002444 /* expat often generates single character data sections; handle
2445 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00002446 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
2447 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002448 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00002449 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002450 } else if (PyList_CheckExact(self->data)) {
2451 if (PyList_Append(self->data, data) < 0)
2452 return NULL;
2453 } else {
2454 PyObject* list = PyList_New(2);
2455 if (!list)
2456 return NULL;
2457 PyList_SET_ITEM(list, 0, self->data);
2458 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2459 self->data = list;
2460 }
2461 }
2462
2463 Py_RETURN_NONE;
2464}
2465
2466LOCAL(PyObject*)
2467treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2468{
2469 PyObject* item;
2470
2471 if (self->data) {
2472 if (self->this == self->last) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002473 if (treebuilder_set_element_text(self->last, self->data))
2474 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002475 } else {
Antoine Pitrouee329312012-10-04 19:53:29 +02002476 if (treebuilder_set_element_tail(self->last, self->data))
2477 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002478 }
2479 self->data = NULL;
2480 }
2481
2482 if (self->index == 0) {
2483 PyErr_SetString(
2484 PyExc_IndexError,
2485 "pop from empty stack"
2486 );
2487 return NULL;
2488 }
2489
2490 self->index--;
2491
2492 item = PyList_GET_ITEM(self->stack, self->index);
2493 Py_INCREF(item);
2494
2495 Py_DECREF(self->last);
2496
Antoine Pitrouee329312012-10-04 19:53:29 +02002497 self->last = self->this;
2498 self->this = item;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002499
2500 if (self->end_event_obj) {
2501 PyObject* res;
2502 PyObject* action = self->end_event_obj;
2503 PyObject* node = (PyObject*) self->last;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002504 res = PyTuple_Pack(2, action, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002505 if (res) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002506 PyList_Append(self->events, res);
2507 Py_DECREF(res);
2508 } else
2509 PyErr_Clear(); /* FIXME: propagate error */
2510 }
2511
2512 Py_INCREF(self->last);
2513 return (PyObject*) self->last;
2514}
2515
2516LOCAL(void)
2517treebuilder_handle_namespace(TreeBuilderObject* self, int start,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002518 PyObject *prefix, PyObject *uri)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002519{
2520 PyObject* res;
2521 PyObject* action;
2522 PyObject* parcel;
2523
2524 if (!self->events)
2525 return;
2526
2527 if (start) {
2528 if (!self->start_ns_event_obj)
2529 return;
2530 action = self->start_ns_event_obj;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002531 parcel = Py_BuildValue("OO", prefix, uri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002532 if (!parcel)
2533 return;
2534 Py_INCREF(action);
2535 } else {
2536 if (!self->end_ns_event_obj)
2537 return;
2538 action = self->end_ns_event_obj;
2539 Py_INCREF(action);
2540 parcel = Py_None;
2541 Py_INCREF(parcel);
2542 }
2543
2544 res = PyTuple_New(2);
2545
2546 if (res) {
2547 PyTuple_SET_ITEM(res, 0, action);
2548 PyTuple_SET_ITEM(res, 1, parcel);
2549 PyList_Append(self->events, res);
2550 Py_DECREF(res);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002551 }
2552 else {
2553 Py_DECREF(action);
2554 Py_DECREF(parcel);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002555 PyErr_Clear(); /* FIXME: propagate error */
Antoine Pitrouc1948842012-10-01 23:40:37 +02002556 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002557}
2558
2559/* -------------------------------------------------------------------- */
2560/* methods (in alphabetical order) */
2561
2562static PyObject*
2563treebuilder_data(TreeBuilderObject* self, PyObject* args)
2564{
2565 PyObject* data;
2566 if (!PyArg_ParseTuple(args, "O:data", &data))
2567 return NULL;
2568
2569 return treebuilder_handle_data(self, data);
2570}
2571
2572static PyObject*
2573treebuilder_end(TreeBuilderObject* self, PyObject* args)
2574{
2575 PyObject* tag;
2576 if (!PyArg_ParseTuple(args, "O:end", &tag))
2577 return NULL;
2578
2579 return treebuilder_handle_end(self, tag);
2580}
2581
2582LOCAL(PyObject*)
2583treebuilder_done(TreeBuilderObject* self)
2584{
2585 PyObject* res;
2586
2587 /* FIXME: check stack size? */
2588
2589 if (self->root)
2590 res = self->root;
2591 else
2592 res = Py_None;
2593
2594 Py_INCREF(res);
2595 return res;
2596}
2597
2598static PyObject*
2599treebuilder_close(TreeBuilderObject* self, PyObject* args)
2600{
2601 if (!PyArg_ParseTuple(args, ":close"))
2602 return NULL;
2603
2604 return treebuilder_done(self);
2605}
2606
2607static PyObject*
2608treebuilder_start(TreeBuilderObject* self, PyObject* args)
2609{
2610 PyObject* tag;
2611 PyObject* attrib = Py_None;
2612 if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib))
2613 return NULL;
2614
2615 return treebuilder_handle_start(self, tag, attrib);
2616}
2617
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002618static PyMethodDef treebuilder_methods[] = {
2619 {"data", (PyCFunction) treebuilder_data, METH_VARARGS},
2620 {"start", (PyCFunction) treebuilder_start, METH_VARARGS},
2621 {"end", (PyCFunction) treebuilder_end, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002622 {"close", (PyCFunction) treebuilder_close, METH_VARARGS},
2623 {NULL, NULL}
2624};
2625
Neal Norwitz227b5332006-03-22 09:28:35 +00002626static PyTypeObject TreeBuilder_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002627 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002628 "xml.etree.ElementTree.TreeBuilder", sizeof(TreeBuilderObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002629 /* methods */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002630 (destructor)treebuilder_dealloc, /* tp_dealloc */
2631 0, /* tp_print */
2632 0, /* tp_getattr */
2633 0, /* tp_setattr */
2634 0, /* tp_reserved */
2635 0, /* tp_repr */
2636 0, /* tp_as_number */
2637 0, /* tp_as_sequence */
2638 0, /* tp_as_mapping */
2639 0, /* tp_hash */
2640 0, /* tp_call */
2641 0, /* tp_str */
2642 0, /* tp_getattro */
2643 0, /* tp_setattro */
2644 0, /* tp_as_buffer */
Eli Bendersky48d358b2012-05-30 17:57:50 +03002645 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
2646 /* tp_flags */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002647 0, /* tp_doc */
Eli Bendersky48d358b2012-05-30 17:57:50 +03002648 (traverseproc)treebuilder_gc_traverse, /* tp_traverse */
2649 (inquiry)treebuilder_gc_clear, /* tp_clear */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002650 0, /* tp_richcompare */
2651 0, /* tp_weaklistoffset */
2652 0, /* tp_iter */
2653 0, /* tp_iternext */
2654 treebuilder_methods, /* tp_methods */
2655 0, /* tp_members */
2656 0, /* tp_getset */
2657 0, /* tp_base */
2658 0, /* tp_dict */
2659 0, /* tp_descr_get */
2660 0, /* tp_descr_set */
2661 0, /* tp_dictoffset */
2662 (initproc)treebuilder_init, /* tp_init */
2663 PyType_GenericAlloc, /* tp_alloc */
2664 treebuilder_new, /* tp_new */
2665 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002666};
2667
2668/* ==================================================================== */
2669/* the expat interface */
2670
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002671#include "expat.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002672#include "pyexpat.h"
Eli Bendersky20d41742012-06-01 09:48:37 +03002673static struct PyExpat_CAPI *expat_capi;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002674#define EXPAT(func) (expat_capi->func)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002675
Eli Bendersky52467b12012-06-01 07:13:08 +03002676static XML_Memory_Handling_Suite ExpatMemoryHandler = {
2677 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
2678
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002679typedef struct {
2680 PyObject_HEAD
2681
2682 XML_Parser parser;
2683
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002684 PyObject *target;
2685 PyObject *entity;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002686
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002687 PyObject *names;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002688
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002689 PyObject *handle_start;
2690 PyObject *handle_data;
2691 PyObject *handle_end;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002692
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002693 PyObject *handle_comment;
2694 PyObject *handle_pi;
2695 PyObject *handle_doctype;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002696
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002697 PyObject *handle_close;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002698
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002699} XMLParserObject;
2700
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002701#define XMLParser_CheckExact(op) (Py_TYPE(op) == &XMLParser_Type)
2702
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002703/* helpers */
2704
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002705LOCAL(PyObject*)
2706makeuniversal(XMLParserObject* self, const char* string)
2707{
2708 /* convert a UTF-8 tag/attribute name from the expat parser
2709 to a universal name string */
2710
Antoine Pitrouc1948842012-10-01 23:40:37 +02002711 Py_ssize_t size = (Py_ssize_t) strlen(string);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002712 PyObject* key;
2713 PyObject* value;
2714
2715 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00002716 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002717 if (!key)
2718 return NULL;
2719
2720 value = PyDict_GetItem(self->names, key);
2721
2722 if (value) {
2723 Py_INCREF(value);
2724 } else {
2725 /* new name. convert to universal name, and decode as
2726 necessary */
2727
2728 PyObject* tag;
2729 char* p;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002730 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002731
2732 /* look for namespace separator */
2733 for (i = 0; i < size; i++)
2734 if (string[i] == '}')
2735 break;
2736 if (i != size) {
2737 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002738 tag = PyBytes_FromStringAndSize(NULL, size+1);
Victor Stinner71c8b7e2013-07-11 23:08:39 +02002739 if (tag == NULL) {
2740 Py_DECREF(key);
2741 return NULL;
2742 }
Christian Heimes72b710a2008-05-26 13:28:38 +00002743 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002744 p[0] = '{';
2745 memcpy(p+1, string, size);
2746 size++;
2747 } else {
2748 /* plain name; use key as tag */
2749 Py_INCREF(key);
2750 tag = key;
2751 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002752
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002753 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002754 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00002755 value = PyUnicode_DecodeUTF8(p, size, "strict");
2756 Py_DECREF(tag);
2757 if (!value) {
2758 Py_DECREF(key);
2759 return NULL;
2760 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002761
2762 /* add to names dictionary */
2763 if (PyDict_SetItem(self->names, key, value) < 0) {
2764 Py_DECREF(key);
2765 Py_DECREF(value);
2766 return NULL;
2767 }
2768 }
2769
2770 Py_DECREF(key);
2771 return value;
2772}
2773
Eli Bendersky5b77d812012-03-16 08:20:05 +02002774/* Set the ParseError exception with the given parameters.
2775 * If message is not NULL, it's used as the error string. Otherwise, the
2776 * message string is the default for the given error_code.
2777*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002778static void
Eli Bendersky5b77d812012-03-16 08:20:05 +02002779expat_set_error(enum XML_Error error_code, int line, int column, char *message)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002780{
Eli Bendersky5b77d812012-03-16 08:20:05 +02002781 PyObject *errmsg, *error, *position, *code;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002782
Victor Stinner499dfcf2011-03-21 13:26:24 +01002783 errmsg = PyUnicode_FromFormat("%s: line %d, column %d",
Eli Bendersky5b77d812012-03-16 08:20:05 +02002784 message ? message : EXPAT(ErrorString)(error_code),
2785 line, column);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002786 if (errmsg == NULL)
2787 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002788
Victor Stinner499dfcf2011-03-21 13:26:24 +01002789 error = PyObject_CallFunction(elementtree_parseerror_obj, "O", errmsg);
2790 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002791 if (!error)
2792 return;
2793
Eli Bendersky5b77d812012-03-16 08:20:05 +02002794 /* Add code and position attributes */
2795 code = PyLong_FromLong((long)error_code);
2796 if (!code) {
2797 Py_DECREF(error);
2798 return;
2799 }
2800 if (PyObject_SetAttrString(error, "code", code) == -1) {
2801 Py_DECREF(error);
2802 Py_DECREF(code);
2803 return;
2804 }
2805 Py_DECREF(code);
2806
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002807 position = Py_BuildValue("(ii)", line, column);
2808 if (!position) {
2809 Py_DECREF(error);
2810 return;
2811 }
2812 if (PyObject_SetAttrString(error, "position", position) == -1) {
2813 Py_DECREF(error);
2814 Py_DECREF(position);
2815 return;
2816 }
2817 Py_DECREF(position);
2818
2819 PyErr_SetObject(elementtree_parseerror_obj, error);
2820 Py_DECREF(error);
2821}
2822
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002823/* -------------------------------------------------------------------- */
2824/* handlers */
2825
2826static void
2827expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2828 int data_len)
2829{
2830 PyObject* key;
2831 PyObject* value;
2832 PyObject* res;
2833
2834 if (data_len < 2 || data_in[0] != '&')
2835 return;
2836
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002837 if (PyErr_Occurred())
2838 return;
2839
Neal Norwitz0269b912007-08-08 06:56:02 +00002840 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002841 if (!key)
2842 return;
2843
2844 value = PyDict_GetItem(self->entity, key);
2845
2846 if (value) {
2847 if (TreeBuilder_CheckExact(self->target))
2848 res = treebuilder_handle_data(
2849 (TreeBuilderObject*) self->target, value
2850 );
2851 else if (self->handle_data)
2852 res = PyObject_CallFunction(self->handle_data, "O", value);
2853 else
2854 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002855 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002856 } else if (!PyErr_Occurred()) {
2857 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00002858 char message[128] = "undefined entity ";
2859 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002860 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02002861 XML_ERROR_UNDEFINED_ENTITY,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002862 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02002863 EXPAT(GetErrorColumnNumber)(self->parser),
2864 message
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002865 );
2866 }
2867
2868 Py_DECREF(key);
2869}
2870
2871static void
2872expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2873 const XML_Char **attrib_in)
2874{
2875 PyObject* res;
2876 PyObject* tag;
2877 PyObject* attrib;
2878 int ok;
2879
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002880 if (PyErr_Occurred())
2881 return;
2882
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002883 /* tag name */
2884 tag = makeuniversal(self, tag_in);
2885 if (!tag)
2886 return; /* parser will look for errors */
2887
2888 /* attributes */
2889 if (attrib_in[0]) {
2890 attrib = PyDict_New();
2891 if (!attrib)
2892 return;
2893 while (attrib_in[0] && attrib_in[1]) {
2894 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00002895 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002896 if (!key || !value) {
2897 Py_XDECREF(value);
2898 Py_XDECREF(key);
2899 Py_DECREF(attrib);
2900 return;
2901 }
2902 ok = PyDict_SetItem(attrib, key, value);
2903 Py_DECREF(value);
2904 Py_DECREF(key);
2905 if (ok < 0) {
2906 Py_DECREF(attrib);
2907 return;
2908 }
2909 attrib_in += 2;
2910 }
2911 } else {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002912 /* Pass an empty dictionary on */
Eli Bendersky48d358b2012-05-30 17:57:50 +03002913 attrib = PyDict_New();
2914 if (!attrib)
2915 return;
2916 }
2917
2918 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002919 /* shortcut */
2920 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2921 tag, attrib);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002922 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002923 else if (self->handle_start) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002924 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002925 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002926 res = NULL;
2927
2928 Py_DECREF(tag);
2929 Py_DECREF(attrib);
2930
2931 Py_XDECREF(res);
2932}
2933
2934static void
2935expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2936 int data_len)
2937{
2938 PyObject* data;
2939 PyObject* res;
2940
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002941 if (PyErr_Occurred())
2942 return;
2943
Neal Norwitz0269b912007-08-08 06:56:02 +00002944 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002945 if (!data)
2946 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002947
2948 if (TreeBuilder_CheckExact(self->target))
2949 /* shortcut */
2950 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
2951 else if (self->handle_data)
2952 res = PyObject_CallFunction(self->handle_data, "O", data);
2953 else
2954 res = NULL;
2955
2956 Py_DECREF(data);
2957
2958 Py_XDECREF(res);
2959}
2960
2961static void
2962expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
2963{
2964 PyObject* tag;
2965 PyObject* res = NULL;
2966
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002967 if (PyErr_Occurred())
2968 return;
2969
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002970 if (TreeBuilder_CheckExact(self->target))
2971 /* shortcut */
2972 /* the standard tree builder doesn't look at the end tag */
2973 res = treebuilder_handle_end(
2974 (TreeBuilderObject*) self->target, Py_None
2975 );
2976 else if (self->handle_end) {
2977 tag = makeuniversal(self, tag_in);
2978 if (tag) {
2979 res = PyObject_CallFunction(self->handle_end, "O", tag);
2980 Py_DECREF(tag);
2981 }
2982 }
2983
2984 Py_XDECREF(res);
2985}
2986
2987static void
2988expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
2989 const XML_Char *uri)
2990{
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002991 PyObject* sprefix = NULL;
2992 PyObject* suri = NULL;
2993
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002994 if (PyErr_Occurred())
2995 return;
2996
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002997 suri = PyUnicode_DecodeUTF8(uri, strlen(uri), "strict");
2998 if (!suri)
2999 return;
3000
3001 if (prefix)
3002 sprefix = PyUnicode_DecodeUTF8(prefix, strlen(prefix), "strict");
3003 else
3004 sprefix = PyUnicode_FromString("");
3005 if (!sprefix) {
3006 Py_DECREF(suri);
3007 return;
3008 }
3009
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003010 treebuilder_handle_namespace(
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003011 (TreeBuilderObject*) self->target, 1, sprefix, suri
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003012 );
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003013
3014 Py_DECREF(sprefix);
3015 Py_DECREF(suri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003016}
3017
3018static void
3019expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
3020{
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003021 if (PyErr_Occurred())
3022 return;
3023
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003024 treebuilder_handle_namespace(
3025 (TreeBuilderObject*) self->target, 0, NULL, NULL
3026 );
3027}
3028
3029static void
3030expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
3031{
3032 PyObject* comment;
3033 PyObject* res;
3034
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003035 if (PyErr_Occurred())
3036 return;
3037
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003038 if (self->handle_comment) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003039 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003040 if (comment) {
3041 res = PyObject_CallFunction(self->handle_comment, "O", comment);
3042 Py_XDECREF(res);
3043 Py_DECREF(comment);
3044 }
3045 }
3046}
3047
Eli Bendersky45839902013-01-13 05:14:47 -08003048static void
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003049expat_start_doctype_handler(XMLParserObject *self,
3050 const XML_Char *doctype_name,
3051 const XML_Char *sysid,
3052 const XML_Char *pubid,
3053 int has_internal_subset)
3054{
3055 PyObject *self_pyobj = (PyObject *)self;
3056 PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
3057 PyObject *parser_doctype = NULL;
3058 PyObject *res = NULL;
3059
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003060 if (PyErr_Occurred())
3061 return;
3062
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003063 doctype_name_obj = makeuniversal(self, doctype_name);
3064 if (!doctype_name_obj)
3065 return;
3066
3067 if (sysid) {
3068 sysid_obj = makeuniversal(self, sysid);
3069 if (!sysid_obj) {
3070 Py_DECREF(doctype_name_obj);
3071 return;
3072 }
3073 } else {
3074 Py_INCREF(Py_None);
3075 sysid_obj = Py_None;
3076 }
3077
3078 if (pubid) {
3079 pubid_obj = makeuniversal(self, pubid);
3080 if (!pubid_obj) {
3081 Py_DECREF(doctype_name_obj);
3082 Py_DECREF(sysid_obj);
3083 return;
3084 }
3085 } else {
3086 Py_INCREF(Py_None);
3087 pubid_obj = Py_None;
3088 }
3089
3090 /* If the target has a handler for doctype, call it. */
3091 if (self->handle_doctype) {
3092 res = PyObject_CallFunction(self->handle_doctype, "OOO",
3093 doctype_name_obj, pubid_obj, sysid_obj);
3094 Py_CLEAR(res);
3095 }
3096
3097 /* Now see if the parser itself has a doctype method. If yes and it's
3098 * a subclass, call it but warn about deprecation. If it's not a subclass
3099 * (i.e. vanilla XMLParser), do nothing.
3100 */
3101 parser_doctype = PyObject_GetAttrString(self_pyobj, "doctype");
3102 if (parser_doctype) {
3103 if (!XMLParser_CheckExact(self_pyobj)) {
3104 if (PyErr_WarnEx(PyExc_DeprecationWarning,
3105 "This method of XMLParser is deprecated. Define"
3106 " doctype() method on the TreeBuilder target.",
3107 1) < 0) {
3108 goto clear;
3109 }
3110 res = PyObject_CallFunction(parser_doctype, "OOO",
3111 doctype_name_obj, pubid_obj, sysid_obj);
3112 Py_CLEAR(res);
3113 }
3114 }
3115
3116clear:
3117 Py_XDECREF(parser_doctype);
3118 Py_DECREF(doctype_name_obj);
3119 Py_DECREF(pubid_obj);
3120 Py_DECREF(sysid_obj);
3121}
3122
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003123static void
3124expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
3125 const XML_Char* data_in)
3126{
3127 PyObject* target;
3128 PyObject* data;
3129 PyObject* res;
3130
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003131 if (PyErr_Occurred())
3132 return;
3133
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003134 if (self->handle_pi) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003135 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3136 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003137 if (target && data) {
3138 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
3139 Py_XDECREF(res);
3140 Py_DECREF(data);
3141 Py_DECREF(target);
3142 } else {
3143 Py_XDECREF(data);
3144 Py_XDECREF(target);
3145 }
3146 }
3147}
3148
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003149/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003150
Eli Bendersky52467b12012-06-01 07:13:08 +03003151static PyObject *
3152xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003153{
Eli Bendersky52467b12012-06-01 07:13:08 +03003154 XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
3155 if (self) {
3156 self->parser = NULL;
3157 self->target = self->entity = self->names = NULL;
3158 self->handle_start = self->handle_data = self->handle_end = NULL;
3159 self->handle_comment = self->handle_pi = self->handle_close = NULL;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003160 self->handle_doctype = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003161 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003162 return (PyObject *)self;
3163}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003164
Eli Bendersky52467b12012-06-01 07:13:08 +03003165static int
3166xmlparser_init(PyObject *self, PyObject *args, PyObject *kwds)
3167{
3168 XMLParserObject *self_xp = (XMLParserObject *)self;
3169 PyObject *target = NULL, *html = NULL;
3170 char *encoding = NULL;
Eli Benderskyc68e1362012-06-03 06:09:42 +03003171 static char *kwlist[] = {"html", "target", "encoding", 0};
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003172
Eli Bendersky52467b12012-06-01 07:13:08 +03003173 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|OOz:XMLParser", kwlist,
3174 &html, &target, &encoding)) {
3175 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003176 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003177
Eli Bendersky52467b12012-06-01 07:13:08 +03003178 self_xp->entity = PyDict_New();
3179 if (!self_xp->entity)
3180 return -1;
3181
3182 self_xp->names = PyDict_New();
3183 if (!self_xp->names) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02003184 Py_CLEAR(self_xp->entity);
Eli Bendersky52467b12012-06-01 07:13:08 +03003185 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003186 }
3187
Eli Bendersky52467b12012-06-01 07:13:08 +03003188 self_xp->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
3189 if (!self_xp->parser) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02003190 Py_CLEAR(self_xp->entity);
3191 Py_CLEAR(self_xp->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003192 PyErr_NoMemory();
Eli Bendersky52467b12012-06-01 07:13:08 +03003193 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003194 }
3195
Eli Bendersky52467b12012-06-01 07:13:08 +03003196 if (target) {
3197 Py_INCREF(target);
3198 } else {
Eli Bendersky58d548d2012-05-29 15:45:16 +03003199 target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003200 if (!target) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02003201 Py_CLEAR(self_xp->entity);
3202 Py_CLEAR(self_xp->names);
Eli Bendersky52467b12012-06-01 07:13:08 +03003203 EXPAT(ParserFree)(self_xp->parser);
3204 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003205 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003206 }
3207 self_xp->target = target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003208
Eli Bendersky52467b12012-06-01 07:13:08 +03003209 self_xp->handle_start = PyObject_GetAttrString(target, "start");
3210 self_xp->handle_data = PyObject_GetAttrString(target, "data");
3211 self_xp->handle_end = PyObject_GetAttrString(target, "end");
3212 self_xp->handle_comment = PyObject_GetAttrString(target, "comment");
3213 self_xp->handle_pi = PyObject_GetAttrString(target, "pi");
3214 self_xp->handle_close = PyObject_GetAttrString(target, "close");
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003215 self_xp->handle_doctype = PyObject_GetAttrString(target, "doctype");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003216
3217 PyErr_Clear();
Eli Bendersky45839902013-01-13 05:14:47 -08003218
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003219 /* configure parser */
Eli Bendersky52467b12012-06-01 07:13:08 +03003220 EXPAT(SetUserData)(self_xp->parser, self_xp);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003221 EXPAT(SetElementHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003222 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003223 (XML_StartElementHandler) expat_start_handler,
3224 (XML_EndElementHandler) expat_end_handler
3225 );
3226 EXPAT(SetDefaultHandlerExpand)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003227 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003228 (XML_DefaultHandler) expat_default_handler
3229 );
3230 EXPAT(SetCharacterDataHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003231 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003232 (XML_CharacterDataHandler) expat_data_handler
3233 );
Eli Bendersky52467b12012-06-01 07:13:08 +03003234 if (self_xp->handle_comment)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003235 EXPAT(SetCommentHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003236 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003237 (XML_CommentHandler) expat_comment_handler
3238 );
Eli Bendersky52467b12012-06-01 07:13:08 +03003239 if (self_xp->handle_pi)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003240 EXPAT(SetProcessingInstructionHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003241 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003242 (XML_ProcessingInstructionHandler) expat_pi_handler
3243 );
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003244 EXPAT(SetStartDoctypeDeclHandler)(
3245 self_xp->parser,
3246 (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
3247 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003248 EXPAT(SetUnknownEncodingHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003249 self_xp->parser,
Eli Bendersky6dc32b32013-05-25 05:25:48 -07003250 EXPAT(DefaultUnknownEncodingHandler), NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003251 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003252
Eli Bendersky52467b12012-06-01 07:13:08 +03003253 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003254}
3255
Eli Bendersky52467b12012-06-01 07:13:08 +03003256static int
3257xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
3258{
3259 Py_VISIT(self->handle_close);
3260 Py_VISIT(self->handle_pi);
3261 Py_VISIT(self->handle_comment);
3262 Py_VISIT(self->handle_end);
3263 Py_VISIT(self->handle_data);
3264 Py_VISIT(self->handle_start);
3265
3266 Py_VISIT(self->target);
3267 Py_VISIT(self->entity);
3268 Py_VISIT(self->names);
3269
3270 return 0;
3271}
3272
3273static int
3274xmlparser_gc_clear(XMLParserObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003275{
3276 EXPAT(ParserFree)(self->parser);
3277
Antoine Pitrouc1948842012-10-01 23:40:37 +02003278 Py_CLEAR(self->handle_close);
3279 Py_CLEAR(self->handle_pi);
3280 Py_CLEAR(self->handle_comment);
3281 Py_CLEAR(self->handle_end);
3282 Py_CLEAR(self->handle_data);
3283 Py_CLEAR(self->handle_start);
3284 Py_CLEAR(self->handle_doctype);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003285
Antoine Pitrouc1948842012-10-01 23:40:37 +02003286 Py_CLEAR(self->target);
3287 Py_CLEAR(self->entity);
3288 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003289
Eli Bendersky52467b12012-06-01 07:13:08 +03003290 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003291}
3292
Eli Bendersky52467b12012-06-01 07:13:08 +03003293static void
3294xmlparser_dealloc(XMLParserObject* self)
3295{
3296 PyObject_GC_UnTrack(self);
3297 xmlparser_gc_clear(self);
3298 Py_TYPE(self)->tp_free((PyObject *)self);
3299}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003300
3301LOCAL(PyObject*)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003302expat_parse(XMLParserObject* self, const char* data, int data_len, int final)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003303{
3304 int ok;
3305
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003306 assert(!PyErr_Occurred());
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003307 ok = EXPAT(Parse)(self->parser, data, data_len, final);
3308
3309 if (PyErr_Occurred())
3310 return NULL;
3311
3312 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003313 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02003314 EXPAT(GetErrorCode)(self->parser),
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003315 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02003316 EXPAT(GetErrorColumnNumber)(self->parser),
3317 NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003318 );
3319 return NULL;
3320 }
3321
3322 Py_RETURN_NONE;
3323}
3324
3325static PyObject*
3326xmlparser_close(XMLParserObject* self, PyObject* args)
3327{
3328 /* end feeding data to parser */
3329
3330 PyObject* res;
3331 if (!PyArg_ParseTuple(args, ":close"))
3332 return NULL;
3333
3334 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003335 if (!res)
3336 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003337
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003338 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003339 Py_DECREF(res);
3340 return treebuilder_done((TreeBuilderObject*) self->target);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003341 } if (self->handle_close) {
3342 Py_DECREF(res);
3343 return PyObject_CallFunction(self->handle_close, "");
3344 } else
3345 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003346}
3347
3348static PyObject*
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003349xmlparser_feed(XMLParserObject* self, PyObject* arg)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003350{
3351 /* feed data to parser */
3352
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003353 if (PyUnicode_Check(arg)) {
3354 Py_ssize_t data_len;
3355 const char *data = PyUnicode_AsUTF8AndSize(arg, &data_len);
3356 if (data == NULL)
3357 return NULL;
3358 if (data_len > INT_MAX) {
3359 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3360 return NULL;
3361 }
3362 /* Explicitly set UTF-8 encoding. Return code ignored. */
3363 (void)EXPAT(SetEncoding)(self->parser, "utf-8");
3364 return expat_parse(self, data, (int)data_len, 0);
3365 }
3366 else {
3367 Py_buffer view;
3368 PyObject *res;
3369 if (PyObject_GetBuffer(arg, &view, PyBUF_SIMPLE) < 0)
3370 return NULL;
3371 if (view.len > INT_MAX) {
3372 PyBuffer_Release(&view);
3373 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3374 return NULL;
3375 }
3376 res = expat_parse(self, view.buf, (int)view.len, 0);
3377 PyBuffer_Release(&view);
3378 return res;
3379 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003380}
3381
3382static PyObject*
Eli Benderskya3699232013-05-19 18:47:23 -07003383xmlparser_parse_whole(XMLParserObject* self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003384{
Eli Benderskya3699232013-05-19 18:47:23 -07003385 /* (internal) parse the whole input, until end of stream */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003386 PyObject* reader;
3387 PyObject* buffer;
Eli Benderskyf996e772012-03-16 05:53:30 +02003388 PyObject* temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003389 PyObject* res;
3390
3391 PyObject* fileobj;
3392 if (!PyArg_ParseTuple(args, "O:_parse", &fileobj))
3393 return NULL;
3394
3395 reader = PyObject_GetAttrString(fileobj, "read");
3396 if (!reader)
3397 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003398
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003399 /* read from open file object */
3400 for (;;) {
3401
3402 buffer = PyObject_CallFunction(reader, "i", 64*1024);
3403
3404 if (!buffer) {
3405 /* read failed (e.g. due to KeyboardInterrupt) */
3406 Py_DECREF(reader);
3407 return NULL;
3408 }
3409
Eli Benderskyf996e772012-03-16 05:53:30 +02003410 if (PyUnicode_CheckExact(buffer)) {
3411 /* A unicode object is encoded into bytes using UTF-8 */
3412 if (PyUnicode_GET_SIZE(buffer) == 0) {
3413 Py_DECREF(buffer);
3414 break;
3415 }
3416 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
Antoine Pitrouc1948842012-10-01 23:40:37 +02003417 Py_DECREF(buffer);
Eli Benderskyf996e772012-03-16 05:53:30 +02003418 if (!temp) {
3419 /* Propagate exception from PyUnicode_AsEncodedString */
Eli Benderskyf996e772012-03-16 05:53:30 +02003420 Py_DECREF(reader);
3421 return NULL;
3422 }
Eli Benderskyf996e772012-03-16 05:53:30 +02003423 buffer = temp;
3424 }
3425 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003426 Py_DECREF(buffer);
3427 break;
3428 }
3429
3430 res = expat_parse(
Christian Heimes72b710a2008-05-26 13:28:38 +00003431 self, PyBytes_AS_STRING(buffer), PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003432 );
3433
3434 Py_DECREF(buffer);
3435
3436 if (!res) {
3437 Py_DECREF(reader);
3438 return NULL;
3439 }
3440 Py_DECREF(res);
3441
3442 }
3443
3444 Py_DECREF(reader);
3445
3446 res = expat_parse(self, "", 0, 1);
3447
3448 if (res && TreeBuilder_CheckExact(self->target)) {
3449 Py_DECREF(res);
3450 return treebuilder_done((TreeBuilderObject*) self->target);
3451 }
3452
3453 return res;
3454}
3455
3456static PyObject*
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003457xmlparser_doctype(XMLParserObject *self, PyObject *args)
3458{
3459 Py_RETURN_NONE;
3460}
3461
3462static PyObject*
3463xmlparser_setevents(XMLParserObject *self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003464{
3465 /* activate element event reporting */
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003466 Py_ssize_t i, seqlen;
3467 TreeBuilderObject *target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003468
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003469 PyObject *events_queue;
3470 PyObject *events_to_report = Py_None;
3471 PyObject *events_seq;
3472 if (!PyArg_ParseTuple(args, "O!|O:_setevents", &PyList_Type, &events_queue,
3473 &events_to_report))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003474 return NULL;
3475
3476 if (!TreeBuilder_CheckExact(self->target)) {
3477 PyErr_SetString(
3478 PyExc_TypeError,
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003479 "event handling only supported for ElementTree.TreeBuilder "
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003480 "targets"
3481 );
3482 return NULL;
3483 }
3484
3485 target = (TreeBuilderObject*) self->target;
3486
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003487 Py_INCREF(events_queue);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003488 Py_XDECREF(target->events);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003489 target->events = events_queue;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003490
3491 /* clear out existing events */
Antoine Pitrouc1948842012-10-01 23:40:37 +02003492 Py_CLEAR(target->start_event_obj);
3493 Py_CLEAR(target->end_event_obj);
3494 Py_CLEAR(target->start_ns_event_obj);
3495 Py_CLEAR(target->end_ns_event_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003496
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003497 if (events_to_report == Py_None) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003498 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003499 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003500 Py_RETURN_NONE;
3501 }
3502
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003503 if (!(events_seq = PySequence_Fast(events_to_report,
3504 "events must be a sequence"))) {
3505 return NULL;
3506 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003507
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003508 seqlen = PySequence_Size(events_seq);
3509 for (i = 0; i < seqlen; ++i) {
3510 PyObject *event_name_obj = PySequence_Fast_GET_ITEM(events_seq, i);
3511 char *event_name = NULL;
3512 if (PyUnicode_Check(event_name_obj)) {
3513 event_name = _PyUnicode_AsString(event_name_obj);
3514 } else if (PyBytes_Check(event_name_obj)) {
3515 event_name = PyBytes_AS_STRING(event_name_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003516 }
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003517
3518 if (event_name == NULL) {
3519 Py_DECREF(events_seq);
3520 PyErr_Format(PyExc_ValueError, "invalid events sequence");
3521 return NULL;
3522 } else if (strcmp(event_name, "start") == 0) {
3523 Py_INCREF(event_name_obj);
3524 target->start_event_obj = event_name_obj;
3525 } else if (strcmp(event_name, "end") == 0) {
3526 Py_INCREF(event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003527 Py_XDECREF(target->end_event_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003528 target->end_event_obj = event_name_obj;
3529 } else if (strcmp(event_name, "start-ns") == 0) {
3530 Py_INCREF(event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003531 Py_XDECREF(target->start_ns_event_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003532 target->start_ns_event_obj = event_name_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003533 EXPAT(SetNamespaceDeclHandler)(
3534 self->parser,
3535 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3536 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3537 );
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003538 } else if (strcmp(event_name, "end-ns") == 0) {
3539 Py_INCREF(event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003540 Py_XDECREF(target->end_ns_event_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003541 target->end_ns_event_obj = event_name_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003542 EXPAT(SetNamespaceDeclHandler)(
3543 self->parser,
3544 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3545 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3546 );
3547 } else {
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003548 Py_DECREF(events_seq);
3549 PyErr_Format(PyExc_ValueError, "unknown event '%s'", event_name);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003550 return NULL;
3551 }
3552 }
3553
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003554 Py_DECREF(events_seq);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003555 Py_RETURN_NONE;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003556}
3557
3558static PyMethodDef xmlparser_methods[] = {
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003559 {"feed", (PyCFunction) xmlparser_feed, METH_O},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003560 {"close", (PyCFunction) xmlparser_close, METH_VARARGS},
Eli Benderskya3699232013-05-19 18:47:23 -07003561 {"_parse_whole", (PyCFunction) xmlparser_parse_whole, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003562 {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003563 {"doctype", (PyCFunction) xmlparser_doctype, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003564 {NULL, NULL}
3565};
3566
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003567static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003568xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003569{
Alexander Belopolskye239d232010-12-08 23:31:48 +00003570 if (PyUnicode_Check(nameobj)) {
3571 PyObject* res;
3572 if (PyUnicode_CompareWithASCIIString(nameobj, "entity") == 0)
3573 res = self->entity;
3574 else if (PyUnicode_CompareWithASCIIString(nameobj, "target") == 0)
3575 res = self->target;
3576 else if (PyUnicode_CompareWithASCIIString(nameobj, "version") == 0) {
3577 return PyUnicode_FromFormat(
3578 "Expat %d.%d.%d", XML_MAJOR_VERSION,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003579 XML_MINOR_VERSION, XML_MICRO_VERSION);
Alexander Belopolskye239d232010-12-08 23:31:48 +00003580 }
3581 else
3582 goto generic;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003583
Alexander Belopolskye239d232010-12-08 23:31:48 +00003584 Py_INCREF(res);
3585 return res;
3586 }
3587 generic:
3588 return PyObject_GenericGetAttr((PyObject*) self, nameobj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003589}
3590
Neal Norwitz227b5332006-03-22 09:28:35 +00003591static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003592 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08003593 "xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003594 /* methods */
Eli Bendersky52467b12012-06-01 07:13:08 +03003595 (destructor)xmlparser_dealloc, /* tp_dealloc */
3596 0, /* tp_print */
3597 0, /* tp_getattr */
3598 0, /* tp_setattr */
3599 0, /* tp_reserved */
3600 0, /* tp_repr */
3601 0, /* tp_as_number */
3602 0, /* tp_as_sequence */
3603 0, /* tp_as_mapping */
3604 0, /* tp_hash */
3605 0, /* tp_call */
3606 0, /* tp_str */
3607 (getattrofunc)xmlparser_getattro, /* tp_getattro */
3608 0, /* tp_setattro */
3609 0, /* tp_as_buffer */
3610 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3611 /* tp_flags */
3612 0, /* tp_doc */
3613 (traverseproc)xmlparser_gc_traverse, /* tp_traverse */
3614 (inquiry)xmlparser_gc_clear, /* tp_clear */
3615 0, /* tp_richcompare */
3616 0, /* tp_weaklistoffset */
3617 0, /* tp_iter */
3618 0, /* tp_iternext */
3619 xmlparser_methods, /* tp_methods */
3620 0, /* tp_members */
3621 0, /* tp_getset */
3622 0, /* tp_base */
3623 0, /* tp_dict */
3624 0, /* tp_descr_get */
3625 0, /* tp_descr_set */
3626 0, /* tp_dictoffset */
3627 (initproc)xmlparser_init, /* tp_init */
3628 PyType_GenericAlloc, /* tp_alloc */
3629 xmlparser_new, /* tp_new */
3630 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003631};
3632
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003633/* ==================================================================== */
3634/* python module interface */
3635
3636static PyMethodDef _functions[] = {
Eli Benderskya8736902013-01-05 06:26:39 -08003637 {"SubElement", (PyCFunction) subelement, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003638 {NULL, NULL}
3639};
3640
Martin v. Löwis1a214512008-06-11 05:26:20 +00003641
3642static struct PyModuleDef _elementtreemodule = {
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003643 PyModuleDef_HEAD_INIT,
3644 "_elementtree",
3645 NULL,
3646 -1,
3647 _functions,
3648 NULL,
3649 NULL,
3650 NULL,
3651 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00003652};
3653
Neal Norwitzf6657e62006-12-28 04:47:50 +00003654PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00003655PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003656{
Eli Bendersky64d11e62012-06-15 07:42:50 +03003657 PyObject *m, *temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003658
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003659 /* Initialize object types */
Ronald Oussoren138d0802013-07-19 11:11:25 +02003660 if (PyType_Ready(&ElementIter_Type) < 0)
3661 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003662 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003663 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003664 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003665 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003666 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003667 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003668
Martin v. Löwis1a214512008-06-11 05:26:20 +00003669 m = PyModule_Create(&_elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003670 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00003671 return NULL;
3672
Eli Bendersky828efde2012-04-05 05:40:58 +03003673 if (!(temp = PyImport_ImportModule("copy")))
3674 return NULL;
3675 elementtree_deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
3676 Py_XDECREF(temp);
3677
3678 if (!(elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
3679 return NULL;
3680
Eli Bendersky20d41742012-06-01 09:48:37 +03003681 /* link against pyexpat */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003682 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
3683 if (expat_capi) {
3684 /* check that it's usable */
3685 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
3686 expat_capi->size < sizeof(struct PyExpat_CAPI) ||
3687 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
3688 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
Eli Bendersky52467b12012-06-01 07:13:08 +03003689 expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
Eli Benderskyef391ac2012-07-21 20:28:46 +03003690 PyErr_SetString(PyExc_ImportError,
3691 "pyexpat version is incompatible");
3692 return NULL;
Eli Bendersky52467b12012-06-01 07:13:08 +03003693 }
Eli Benderskyef391ac2012-07-21 20:28:46 +03003694 } else {
Eli Bendersky52467b12012-06-01 07:13:08 +03003695 return NULL;
Eli Benderskyef391ac2012-07-21 20:28:46 +03003696 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003697
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003698 elementtree_parseerror_obj = PyErr_NewException(
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003699 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003700 );
3701 Py_INCREF(elementtree_parseerror_obj);
3702 PyModule_AddObject(m, "ParseError", elementtree_parseerror_obj);
3703
Eli Bendersky092af1f2012-03-04 07:14:03 +02003704 Py_INCREF((PyObject *)&Element_Type);
3705 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
3706
Eli Bendersky58d548d2012-05-29 15:45:16 +03003707 Py_INCREF((PyObject *)&TreeBuilder_Type);
3708 PyModule_AddObject(m, "TreeBuilder", (PyObject *)&TreeBuilder_Type);
3709
Eli Bendersky52467b12012-06-01 07:13:08 +03003710 Py_INCREF((PyObject *)&XMLParser_Type);
3711 PyModule_AddObject(m, "XMLParser", (PyObject *)&XMLParser_Type);
Eli Bendersky52467b12012-06-01 07:13:08 +03003712
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003713 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003714}