blob: 28bdf1effebf3d58035222db29129904cae02896 [file] [log] [blame]
Eli Benderskybf05df22013-04-20 05:44:01 -07001/*--------------------------------------------------------------------
2 * Licensed to PSF under a Contributor Agreement.
3 * See http://www.python.org/psf/license for licensing details.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
Eli Benderskybf05df22013-04-20 05:44:01 -07005 * _elementtree - C accelerator for xml.etree.ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00006 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
7 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00008 *
9 * info@pythonware.com
10 * http://www.pythonware.com
Eli Benderskybf05df22013-04-20 05:44:01 -070011 *--------------------------------------------------------------------
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000012 */
13
14#include "Python.h"
Eli Benderskyebf37a22012-04-03 22:02:37 +030015#include "structmember.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000016
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000017/* -------------------------------------------------------------------- */
18/* configuration */
19
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000020/* An element can hold this many children without extra memory
21 allocations. */
22#define STATIC_CHILDREN 4
23
24/* For best performance, chose a value so that 80-90% of all nodes
25 have no more than the given number of children. Set this to zero
26 to minimize the size of the element structure itself (this only
27 helps if you have lots of leaf nodes with attributes). */
28
29/* Also note that pymalloc always allocates blocks in multiples of
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010030 eight bytes. For the current C version of ElementTree, this means
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000031 that the number of children should be an even number, at least on
32 32-bit platforms. */
33
34/* -------------------------------------------------------------------- */
35
36#if 0
37static int memory = 0;
38#define ALLOC(size, comment)\
39do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
40#define RELEASE(size, comment)\
41do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
42#else
43#define ALLOC(size, comment)
44#define RELEASE(size, comment)
45#endif
46
47/* compiler tweaks */
48#if defined(_MSC_VER)
49#define LOCAL(type) static __inline type __fastcall
50#else
51#define LOCAL(type) static type
52#endif
53
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000054/* macros used to store 'join' flags in string object pointers. note
55 that all use of text and tail as object pointers must be wrapped in
56 JOIN_OBJ. see comments in the ElementObject definition for more
57 info. */
58#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
59#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
Antoine Pitrouca8aa4a2012-09-20 20:56:47 +020060#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~(Py_uintptr_t)1))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000061
Ronald Oussoren138d0802013-07-19 11:11:25 +020062/* Types defined by this extension */
63static PyTypeObject Element_Type;
64static PyTypeObject ElementIter_Type;
65static PyTypeObject TreeBuilder_Type;
66static PyTypeObject XMLParser_Type;
67
68
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000069/* glue functions (see the init function for details) */
Florent Xiclunaf15351d2010-03-13 23:24:31 +000070static PyObject* elementtree_parseerror_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000071static PyObject* elementtree_deepcopy_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000072static PyObject* elementpath_obj;
73
74/* helpers */
75
76LOCAL(PyObject*)
77deepcopy(PyObject* object, PyObject* memo)
78{
79 /* do a deep copy of the given object */
80
81 PyObject* args;
82 PyObject* result;
83
84 if (!elementtree_deepcopy_obj) {
85 PyErr_SetString(
86 PyExc_RuntimeError,
87 "deepcopy helper not found"
88 );
89 return NULL;
90 }
91
Antoine Pitrouc1948842012-10-01 23:40:37 +020092 args = PyTuple_Pack(2, object, memo);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +000093 if (!args)
94 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000095 result = PyObject_CallObject(elementtree_deepcopy_obj, args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000096 Py_DECREF(args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000097 return result;
98}
99
100LOCAL(PyObject*)
101list_join(PyObject* list)
102{
103 /* join list elements (destroying the list in the process) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000104 PyObject* joiner;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000105 PyObject* result;
106
Antoine Pitrouc1948842012-10-01 23:40:37 +0200107 joiner = PyUnicode_FromStringAndSize("", 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000108 if (!joiner)
109 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200110 result = PyUnicode_Join(joiner, list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000111 Py_DECREF(joiner);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200112 if (result)
113 Py_DECREF(list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000114 return result;
115}
116
Eli Bendersky48d358b2012-05-30 17:57:50 +0300117/* Is the given object an empty dictionary?
118*/
119static int
120is_empty_dict(PyObject *obj)
121{
122 return PyDict_CheckExact(obj) && PyDict_Size(obj) == 0;
123}
124
125
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000126/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200127/* the Element type */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000128
129typedef struct {
130
131 /* attributes (a dictionary object), or None if no attributes */
132 PyObject* attrib;
133
134 /* child elements */
135 int length; /* actual number of items */
136 int allocated; /* allocated items */
137
138 /* this either points to _children or to a malloced buffer */
139 PyObject* *children;
140
141 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100142
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000143} ElementObjectExtra;
144
145typedef struct {
146 PyObject_HEAD
147
148 /* element tag (a string). */
149 PyObject* tag;
150
151 /* text before first child. note that this is a tagged pointer;
152 use JOIN_OBJ to get the object pointer. the join flag is used
153 to distinguish lists created by the tree builder from lists
154 assigned to the attribute by application code; the former
155 should be joined before being returned to the user, the latter
156 should be left intact. */
157 PyObject* text;
158
159 /* text after this element, in parent. note that this is a tagged
160 pointer; use JOIN_OBJ to get the object pointer. */
161 PyObject* tail;
162
163 ElementObjectExtra* extra;
164
Eli Benderskyebf37a22012-04-03 22:02:37 +0300165 PyObject *weakreflist; /* For tp_weaklistoffset */
166
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000167} ElementObject;
168
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000169
Christian Heimes90aa7642007-12-19 02:45:37 +0000170#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000171
172/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200173/* Element constructors and destructor */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000174
175LOCAL(int)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200176create_extra(ElementObject* self, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000177{
178 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
Victor Stinner81aac732013-07-12 02:03:34 +0200179 if (!self->extra) {
180 PyErr_NoMemory();
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000181 return -1;
Victor Stinner81aac732013-07-12 02:03:34 +0200182 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000183
184 if (!attrib)
185 attrib = Py_None;
186
187 Py_INCREF(attrib);
188 self->extra->attrib = attrib;
189
190 self->extra->length = 0;
191 self->extra->allocated = STATIC_CHILDREN;
192 self->extra->children = self->extra->_children;
193
194 return 0;
195}
196
197LOCAL(void)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200198dealloc_extra(ElementObject* self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000199{
Eli Bendersky08b85292012-04-04 15:55:07 +0300200 ElementObjectExtra *myextra;
201 int i;
202
Eli Benderskyebf37a22012-04-03 22:02:37 +0300203 if (!self->extra)
204 return;
205
206 /* Avoid DECREFs calling into this code again (cycles, etc.)
207 */
Eli Bendersky08b85292012-04-04 15:55:07 +0300208 myextra = self->extra;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300209 self->extra = NULL;
210
211 Py_DECREF(myextra->attrib);
212
Eli Benderskyebf37a22012-04-03 22:02:37 +0300213 for (i = 0; i < myextra->length; i++)
214 Py_DECREF(myextra->children[i]);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000215
Eli Benderskyebf37a22012-04-03 22:02:37 +0300216 if (myextra->children != myextra->_children)
217 PyObject_Free(myextra->children);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000218
Eli Benderskyebf37a22012-04-03 22:02:37 +0300219 PyObject_Free(myextra);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000220}
221
Eli Bendersky092af1f2012-03-04 07:14:03 +0200222/* Convenience internal function to create new Element objects with the given
223 * tag and attributes.
224*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000225LOCAL(PyObject*)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200226create_new_element(PyObject* tag, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000227{
228 ElementObject* self;
229
Eli Bendersky0192ba32012-03-30 16:38:33 +0300230 self = PyObject_GC_New(ElementObject, &Element_Type);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000231 if (self == NULL)
232 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000233 self->extra = NULL;
234
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000235 Py_INCREF(tag);
236 self->tag = tag;
237
238 Py_INCREF(Py_None);
239 self->text = Py_None;
240
241 Py_INCREF(Py_None);
242 self->tail = Py_None;
243
Eli Benderskyebf37a22012-04-03 22:02:37 +0300244 self->weakreflist = NULL;
245
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200246 ALLOC(sizeof(ElementObject), "create element");
247 PyObject_GC_Track(self);
248
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200249 if (attrib != Py_None && !is_empty_dict(attrib)) {
250 if (create_extra(self, attrib) < 0) {
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200251 Py_DECREF(self);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200252 return NULL;
253 }
254 }
255
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000256 return (PyObject*) self;
257}
258
Eli Bendersky092af1f2012-03-04 07:14:03 +0200259static PyObject *
260element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
261{
262 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
263 if (e != NULL) {
264 Py_INCREF(Py_None);
265 e->tag = Py_None;
266
267 Py_INCREF(Py_None);
268 e->text = Py_None;
269
270 Py_INCREF(Py_None);
271 e->tail = Py_None;
272
273 e->extra = NULL;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300274 e->weakreflist = NULL;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200275 }
276 return (PyObject *)e;
277}
278
Eli Bendersky737b1732012-05-29 06:02:56 +0300279/* Helper function for extracting the attrib dictionary from a keywords dict.
280 * This is required by some constructors/functions in this module that can
Eli Bendersky45839902013-01-13 05:14:47 -0800281 * either accept attrib as a keyword argument or all attributes splashed
Eli Bendersky737b1732012-05-29 06:02:56 +0300282 * directly into *kwds.
Eli Benderskyd4cb4b72013-04-22 05:25:25 -0700283 *
284 * Return a dictionary with the content of kwds merged into the content of
285 * attrib. If there is no attrib keyword, return a copy of kwds.
Eli Bendersky737b1732012-05-29 06:02:56 +0300286 */
287static PyObject*
288get_attrib_from_keywords(PyObject *kwds)
289{
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700290 PyObject *attrib_str = PyUnicode_FromString("attrib");
291 PyObject *attrib = PyDict_GetItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300292
293 if (attrib) {
294 /* If attrib was found in kwds, copy its value and remove it from
295 * kwds
296 */
297 if (!PyDict_Check(attrib)) {
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700298 Py_DECREF(attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300299 PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
300 Py_TYPE(attrib)->tp_name);
301 return NULL;
302 }
303 attrib = PyDict_Copy(attrib);
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700304 PyDict_DelItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300305 } else {
306 attrib = PyDict_New();
307 }
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700308
309 Py_DECREF(attrib_str);
310
311 /* attrib can be NULL if PyDict_New failed */
312 if (attrib)
313 PyDict_Update(attrib, kwds);
Eli Bendersky737b1732012-05-29 06:02:56 +0300314 return attrib;
315}
316
Eli Bendersky092af1f2012-03-04 07:14:03 +0200317static int
318element_init(PyObject *self, PyObject *args, PyObject *kwds)
319{
320 PyObject *tag;
321 PyObject *tmp;
322 PyObject *attrib = NULL;
323 ElementObject *self_elem;
324
325 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
326 return -1;
327
Eli Bendersky737b1732012-05-29 06:02:56 +0300328 if (attrib) {
329 /* attrib passed as positional arg */
330 attrib = PyDict_Copy(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200331 if (!attrib)
332 return -1;
Eli Bendersky737b1732012-05-29 06:02:56 +0300333 if (kwds) {
334 if (PyDict_Update(attrib, kwds) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200335 Py_DECREF(attrib);
Eli Bendersky737b1732012-05-29 06:02:56 +0300336 return -1;
337 }
338 }
339 } else if (kwds) {
340 /* have keywords args */
341 attrib = get_attrib_from_keywords(kwds);
342 if (!attrib)
343 return -1;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200344 }
345
346 self_elem = (ElementObject *)self;
347
Antoine Pitrouc1948842012-10-01 23:40:37 +0200348 if (attrib != NULL && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200349 if (create_extra(self_elem, attrib) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200350 Py_DECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200351 return -1;
352 }
353 }
354
Eli Bendersky48d358b2012-05-30 17:57:50 +0300355 /* We own a reference to attrib here and it's no longer needed. */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200356 Py_XDECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200357
358 /* Replace the objects already pointed to by tag, text and tail. */
359 tmp = self_elem->tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200360 Py_INCREF(tag);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200361 self_elem->tag = tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200362 Py_DECREF(tmp);
363
364 tmp = self_elem->text;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200365 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200366 self_elem->text = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200367 Py_DECREF(JOIN_OBJ(tmp));
368
369 tmp = self_elem->tail;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200370 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200371 self_elem->tail = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200372 Py_DECREF(JOIN_OBJ(tmp));
373
374 return 0;
375}
376
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000377LOCAL(int)
378element_resize(ElementObject* self, int extra)
379{
380 int size;
381 PyObject* *children;
382
383 /* make sure self->children can hold the given number of extra
384 elements. set an exception and return -1 if allocation failed */
385
Victor Stinner5f0af232013-07-11 23:01:36 +0200386 if (!self->extra) {
387 if (create_extra(self, NULL) < 0)
388 return -1;
389 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000390
391 size = self->extra->length + extra;
392
393 if (size > self->extra->allocated) {
394 /* use Python 2.4's list growth strategy */
395 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000396 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100397 * which needs at least 4 bytes.
398 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000399 * be safe.
400 */
401 size = size ? size : 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000402 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000403 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100404 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000405 * false alarm always assume at least one child to be safe.
406 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000407 children = PyObject_Realloc(self->extra->children,
408 size * sizeof(PyObject*));
409 if (!children)
410 goto nomemory;
411 } else {
412 children = PyObject_Malloc(size * sizeof(PyObject*));
413 if (!children)
414 goto nomemory;
415 /* copy existing children from static area to malloc buffer */
416 memcpy(children, self->extra->children,
417 self->extra->length * sizeof(PyObject*));
418 }
419 self->extra->children = children;
420 self->extra->allocated = size;
421 }
422
423 return 0;
424
425 nomemory:
426 PyErr_NoMemory();
427 return -1;
428}
429
430LOCAL(int)
431element_add_subelement(ElementObject* self, PyObject* element)
432{
433 /* add a child element to a parent */
434
435 if (element_resize(self, 1) < 0)
436 return -1;
437
438 Py_INCREF(element);
439 self->extra->children[self->extra->length] = element;
440
441 self->extra->length++;
442
443 return 0;
444}
445
446LOCAL(PyObject*)
447element_get_attrib(ElementObject* self)
448{
449 /* return borrowed reference to attrib dictionary */
450 /* note: this function assumes that the extra section exists */
451
452 PyObject* res = self->extra->attrib;
453
454 if (res == Py_None) {
455 /* create missing dictionary */
456 res = PyDict_New();
457 if (!res)
458 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200459 Py_DECREF(Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000460 self->extra->attrib = res;
461 }
462
463 return res;
464}
465
466LOCAL(PyObject*)
467element_get_text(ElementObject* self)
468{
469 /* return borrowed reference to text attribute */
470
471 PyObject* res = self->text;
472
473 if (JOIN_GET(res)) {
474 res = JOIN_OBJ(res);
475 if (PyList_CheckExact(res)) {
476 res = list_join(res);
477 if (!res)
478 return NULL;
479 self->text = res;
480 }
481 }
482
483 return res;
484}
485
486LOCAL(PyObject*)
487element_get_tail(ElementObject* self)
488{
489 /* return borrowed reference to text attribute */
490
491 PyObject* res = self->tail;
492
493 if (JOIN_GET(res)) {
494 res = JOIN_OBJ(res);
495 if (PyList_CheckExact(res)) {
496 res = list_join(res);
497 if (!res)
498 return NULL;
499 self->tail = res;
500 }
501 }
502
503 return res;
504}
505
506static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300507subelement(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000508{
509 PyObject* elem;
510
511 ElementObject* parent;
512 PyObject* tag;
513 PyObject* attrib = NULL;
514 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
515 &Element_Type, &parent, &tag,
516 &PyDict_Type, &attrib))
517 return NULL;
518
Eli Bendersky737b1732012-05-29 06:02:56 +0300519 if (attrib) {
520 /* attrib passed as positional arg */
521 attrib = PyDict_Copy(attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000522 if (!attrib)
523 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300524 if (kwds) {
525 if (PyDict_Update(attrib, kwds) < 0) {
526 return NULL;
527 }
528 }
529 } else if (kwds) {
530 /* have keyword args */
531 attrib = get_attrib_from_keywords(kwds);
532 if (!attrib)
533 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000534 } else {
Eli Bendersky737b1732012-05-29 06:02:56 +0300535 /* no attrib arg, no kwds, so no attribute */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000536 Py_INCREF(Py_None);
537 attrib = Py_None;
538 }
539
Eli Bendersky092af1f2012-03-04 07:14:03 +0200540 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000541 Py_DECREF(attrib);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200542 if (elem == NULL)
543 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000544
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000545 if (element_add_subelement(parent, elem) < 0) {
546 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000547 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000548 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000549
550 return elem;
551}
552
Eli Bendersky0192ba32012-03-30 16:38:33 +0300553static int
554element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
555{
556 Py_VISIT(self->tag);
557 Py_VISIT(JOIN_OBJ(self->text));
558 Py_VISIT(JOIN_OBJ(self->tail));
559
560 if (self->extra) {
561 int i;
562 Py_VISIT(self->extra->attrib);
563
564 for (i = 0; i < self->extra->length; ++i)
565 Py_VISIT(self->extra->children[i]);
566 }
567 return 0;
568}
569
570static int
571element_gc_clear(ElementObject *self)
572{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300573 Py_CLEAR(self->tag);
Eli Benderskyebf37a22012-04-03 22:02:37 +0300574
575 /* The following is like Py_CLEAR for self->text and self->tail, but
576 * written explicitily because the real pointers hide behind access
577 * macros.
578 */
579 if (self->text) {
580 PyObject *tmp = JOIN_OBJ(self->text);
581 self->text = NULL;
582 Py_DECREF(tmp);
583 }
584
585 if (self->tail) {
586 PyObject *tmp = JOIN_OBJ(self->tail);
587 self->tail = NULL;
588 Py_DECREF(tmp);
589 }
Eli Bendersky0192ba32012-03-30 16:38:33 +0300590
591 /* After dropping all references from extra, it's no longer valid anyway,
Eli Benderskyebf37a22012-04-03 22:02:37 +0300592 * so fully deallocate it.
Eli Bendersky0192ba32012-03-30 16:38:33 +0300593 */
Eli Benderskyebf37a22012-04-03 22:02:37 +0300594 dealloc_extra(self);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300595 return 0;
596}
597
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000598static void
599element_dealloc(ElementObject* self)
600{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300601 PyObject_GC_UnTrack(self);
Eli Benderskyebf37a22012-04-03 22:02:37 +0300602
603 if (self->weakreflist != NULL)
604 PyObject_ClearWeakRefs((PyObject *) self);
605
Eli Bendersky0192ba32012-03-30 16:38:33 +0300606 /* element_gc_clear clears all references and deallocates extra
607 */
608 element_gc_clear(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000609
610 RELEASE(sizeof(ElementObject), "destroy element");
Eli Bendersky092af1f2012-03-04 07:14:03 +0200611 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000612}
613
614/* -------------------------------------------------------------------- */
615/* methods (in alphabetical order) */
616
617static PyObject*
618element_append(ElementObject* self, PyObject* args)
619{
620 PyObject* element;
621 if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element))
622 return NULL;
623
624 if (element_add_subelement(self, element) < 0)
625 return NULL;
626
627 Py_RETURN_NONE;
628}
629
630static PyObject*
Eli Bendersky0192ba32012-03-30 16:38:33 +0300631element_clearmethod(ElementObject* self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000632{
633 if (!PyArg_ParseTuple(args, ":clear"))
634 return NULL;
635
Eli Benderskyebf37a22012-04-03 22:02:37 +0300636 dealloc_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000637
638 Py_INCREF(Py_None);
639 Py_DECREF(JOIN_OBJ(self->text));
640 self->text = Py_None;
641
642 Py_INCREF(Py_None);
643 Py_DECREF(JOIN_OBJ(self->tail));
644 self->tail = Py_None;
645
646 Py_RETURN_NONE;
647}
648
649static PyObject*
650element_copy(ElementObject* self, PyObject* args)
651{
652 int i;
653 ElementObject* element;
654
655 if (!PyArg_ParseTuple(args, ":__copy__"))
656 return NULL;
657
Eli Bendersky092af1f2012-03-04 07:14:03 +0200658 element = (ElementObject*) create_new_element(
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000659 self->tag, (self->extra) ? self->extra->attrib : Py_None
660 );
661 if (!element)
662 return NULL;
663
664 Py_DECREF(JOIN_OBJ(element->text));
665 element->text = self->text;
666 Py_INCREF(JOIN_OBJ(element->text));
667
668 Py_DECREF(JOIN_OBJ(element->tail));
669 element->tail = self->tail;
670 Py_INCREF(JOIN_OBJ(element->tail));
671
672 if (self->extra) {
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100673
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000674 if (element_resize(element, self->extra->length) < 0) {
675 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000676 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000677 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000678
679 for (i = 0; i < self->extra->length; i++) {
680 Py_INCREF(self->extra->children[i]);
681 element->extra->children[i] = self->extra->children[i];
682 }
683
684 element->extra->length = self->extra->length;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100685
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000686 }
687
688 return (PyObject*) element;
689}
690
691static PyObject*
692element_deepcopy(ElementObject* self, PyObject* args)
693{
694 int i;
695 ElementObject* element;
696 PyObject* tag;
697 PyObject* attrib;
698 PyObject* text;
699 PyObject* tail;
700 PyObject* id;
701
702 PyObject* memo;
703 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
704 return NULL;
705
706 tag = deepcopy(self->tag, memo);
707 if (!tag)
708 return NULL;
709
710 if (self->extra) {
711 attrib = deepcopy(self->extra->attrib, memo);
712 if (!attrib) {
713 Py_DECREF(tag);
714 return NULL;
715 }
716 } else {
717 Py_INCREF(Py_None);
718 attrib = Py_None;
719 }
720
Eli Bendersky092af1f2012-03-04 07:14:03 +0200721 element = (ElementObject*) create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000722
723 Py_DECREF(tag);
724 Py_DECREF(attrib);
725
726 if (!element)
727 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100728
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000729 text = deepcopy(JOIN_OBJ(self->text), memo);
730 if (!text)
731 goto error;
732 Py_DECREF(element->text);
733 element->text = JOIN_SET(text, JOIN_GET(self->text));
734
735 tail = deepcopy(JOIN_OBJ(self->tail), memo);
736 if (!tail)
737 goto error;
738 Py_DECREF(element->tail);
739 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
740
741 if (self->extra) {
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100742
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000743 if (element_resize(element, self->extra->length) < 0)
744 goto error;
745
746 for (i = 0; i < self->extra->length; i++) {
747 PyObject* child = deepcopy(self->extra->children[i], memo);
748 if (!child) {
749 element->extra->length = i;
750 goto error;
751 }
752 element->extra->children[i] = child;
753 }
754
755 element->extra->length = self->extra->length;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100756
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000757 }
758
759 /* add object to memo dictionary (so deepcopy won't visit it again) */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200760 id = PyLong_FromSsize_t((Py_uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000761 if (!id)
762 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000763
764 i = PyDict_SetItem(memo, id, (PyObject*) element);
765
766 Py_DECREF(id);
767
768 if (i < 0)
769 goto error;
770
771 return (PyObject*) element;
772
773 error:
774 Py_DECREF(element);
775 return NULL;
776}
777
Martin v. Löwisbce16662012-06-17 10:41:22 +0200778static PyObject*
779element_sizeof(PyObject* _self, PyObject* args)
780{
781 ElementObject *self = (ElementObject*)_self;
782 Py_ssize_t result = sizeof(ElementObject);
783 if (self->extra) {
784 result += sizeof(ElementObjectExtra);
785 if (self->extra->children != self->extra->_children)
786 result += sizeof(PyObject*) * self->extra->allocated;
787 }
788 return PyLong_FromSsize_t(result);
789}
790
Eli Bendersky698bdb22013-01-10 06:01:06 -0800791/* dict keys for getstate/setstate. */
792#define PICKLED_TAG "tag"
793#define PICKLED_CHILDREN "_children"
794#define PICKLED_ATTRIB "attrib"
795#define PICKLED_TAIL "tail"
796#define PICKLED_TEXT "text"
797
798/* __getstate__ returns a fabricated instance dict as in the pure-Python
799 * Element implementation, for interoperability/interchangeability. This
800 * makes the pure-Python implementation details an API, but (a) there aren't
801 * any unnecessary structures there; and (b) it buys compatibility with 3.2
802 * pickles. See issue #16076.
803 */
804static PyObject *
805element_getstate(ElementObject *self)
806{
807 int i, noattrib;
808 PyObject *instancedict = NULL, *children;
809
810 /* Build a list of children. */
811 children = PyList_New(self->extra ? self->extra->length : 0);
812 if (!children)
813 return NULL;
814 for (i = 0; i < PyList_GET_SIZE(children); i++) {
815 PyObject *child = self->extra->children[i];
816 Py_INCREF(child);
817 PyList_SET_ITEM(children, i, child);
818 }
819
820 /* Construct the state object. */
821 noattrib = (self->extra == NULL || self->extra->attrib == Py_None);
822 if (noattrib)
823 instancedict = Py_BuildValue("{sOsOs{}sOsO}",
824 PICKLED_TAG, self->tag,
825 PICKLED_CHILDREN, children,
826 PICKLED_ATTRIB,
827 PICKLED_TEXT, self->text,
828 PICKLED_TAIL, self->tail);
829 else
830 instancedict = Py_BuildValue("{sOsOsOsOsO}",
831 PICKLED_TAG, self->tag,
832 PICKLED_CHILDREN, children,
833 PICKLED_ATTRIB, self->extra->attrib,
834 PICKLED_TEXT, self->text,
835 PICKLED_TAIL, self->tail);
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800836 if (instancedict) {
837 Py_DECREF(children);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800838 return instancedict;
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800839 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800840 else {
841 for (i = 0; i < PyList_GET_SIZE(children); i++)
842 Py_DECREF(PyList_GET_ITEM(children, i));
843 Py_DECREF(children);
844
845 return NULL;
846 }
847}
848
849static PyObject *
850element_setstate_from_attributes(ElementObject *self,
851 PyObject *tag,
852 PyObject *attrib,
853 PyObject *text,
854 PyObject *tail,
855 PyObject *children)
856{
857 Py_ssize_t i, nchildren;
858
859 if (!tag) {
860 PyErr_SetString(PyExc_TypeError, "tag may not be NULL");
861 return NULL;
862 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800863
864 Py_CLEAR(self->tag);
865 self->tag = tag;
866 Py_INCREF(self->tag);
867
868 Py_CLEAR(self->text);
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800869 self->text = text ? text : Py_None;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800870 Py_INCREF(self->text);
871
872 Py_CLEAR(self->tail);
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800873 self->tail = tail ? tail : Py_None;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800874 Py_INCREF(self->tail);
875
876 /* Handle ATTRIB and CHILDREN. */
877 if (!children && !attrib)
878 Py_RETURN_NONE;
879
880 /* Compute 'nchildren'. */
881 if (children) {
882 if (!PyList_Check(children)) {
883 PyErr_SetString(PyExc_TypeError, "'_children' is not a list");
884 return NULL;
885 }
886 nchildren = PyList_Size(children);
887 }
888 else {
889 nchildren = 0;
890 }
891
892 /* Allocate 'extra'. */
893 if (element_resize(self, nchildren)) {
894 return NULL;
895 }
896 assert(self->extra && self->extra->allocated >= nchildren);
897
898 /* Copy children */
899 for (i = 0; i < nchildren; i++) {
900 self->extra->children[i] = PyList_GET_ITEM(children, i);
901 Py_INCREF(self->extra->children[i]);
902 }
903
904 self->extra->length = nchildren;
905 self->extra->allocated = nchildren;
906
907 /* Stash attrib. */
908 if (attrib) {
909 Py_CLEAR(self->extra->attrib);
910 self->extra->attrib = attrib;
911 Py_INCREF(attrib);
912 }
913
914 Py_RETURN_NONE;
915}
916
917/* __setstate__ for Element instance from the Python implementation.
918 * 'state' should be the instance dict.
919 */
920static PyObject *
921element_setstate_from_Python(ElementObject *self, PyObject *state)
922{
923 static char *kwlist[] = {PICKLED_TAG, PICKLED_ATTRIB, PICKLED_TEXT,
924 PICKLED_TAIL, PICKLED_CHILDREN, 0};
925 PyObject *args;
926 PyObject *tag, *attrib, *text, *tail, *children;
Eli Bendersky799e3ed2013-01-12 05:42:38 -0800927 PyObject *retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800928
Eli Bendersky698bdb22013-01-10 06:01:06 -0800929 tag = attrib = text = tail = children = NULL;
930 args = PyTuple_New(0);
Eli Bendersky799e3ed2013-01-12 05:42:38 -0800931 if (!args)
Eli Bendersky698bdb22013-01-10 06:01:06 -0800932 return NULL;
Eli Bendersky799e3ed2013-01-12 05:42:38 -0800933
934 if (PyArg_ParseTupleAndKeywords(args, state, "|$OOOOO", kwlist, &tag,
935 &attrib, &text, &tail, &children))
936 retval = element_setstate_from_attributes(self, tag, attrib, text,
937 tail, children);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800938 else
Eli Bendersky799e3ed2013-01-12 05:42:38 -0800939 retval = NULL;
940
941 Py_DECREF(args);
942 return retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800943}
944
945static PyObject *
946element_setstate(ElementObject *self, PyObject *state)
947{
948 if (!PyDict_CheckExact(state)) {
949 PyErr_Format(PyExc_TypeError,
950 "Don't know how to unpickle \"%.200R\" as an Element",
951 state);
952 return NULL;
953 }
954 else
955 return element_setstate_from_Python(self, state);
956}
957
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000958LOCAL(int)
959checkpath(PyObject* tag)
960{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000961 Py_ssize_t i;
962 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000963
964 /* check if a tag contains an xpath character */
965
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000966#define PATHCHAR(ch) \
967 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000968
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000969 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200970 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
971 void *data = PyUnicode_DATA(tag);
972 unsigned int kind = PyUnicode_KIND(tag);
973 for (i = 0; i < len; i++) {
974 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
975 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000976 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200977 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000978 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200979 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000980 return 1;
981 }
982 return 0;
983 }
Christian Heimes72b710a2008-05-26 13:28:38 +0000984 if (PyBytes_Check(tag)) {
985 char *p = PyBytes_AS_STRING(tag);
986 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000987 if (p[i] == '{')
988 check = 0;
989 else if (p[i] == '}')
990 check = 1;
991 else if (check && PATHCHAR(p[i]))
992 return 1;
993 }
994 return 0;
995 }
996
997 return 1; /* unknown type; might be path expression */
998}
999
1000static PyObject*
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001001element_extend(ElementObject* self, PyObject* args)
1002{
1003 PyObject* seq;
1004 Py_ssize_t i, seqlen = 0;
1005
1006 PyObject* seq_in;
1007 if (!PyArg_ParseTuple(args, "O:extend", &seq_in))
1008 return NULL;
1009
1010 seq = PySequence_Fast(seq_in, "");
1011 if (!seq) {
1012 PyErr_Format(
1013 PyExc_TypeError,
1014 "expected sequence, not \"%.200s\"", Py_TYPE(seq_in)->tp_name
1015 );
1016 return NULL;
1017 }
1018
1019 seqlen = PySequence_Size(seq);
1020 for (i = 0; i < seqlen; i++) {
1021 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001022 if (!PyObject_IsInstance(element, (PyObject *)&Element_Type)) {
1023 Py_DECREF(seq);
1024 PyErr_Format(
1025 PyExc_TypeError,
1026 "expected an Element, not \"%.200s\"",
1027 Py_TYPE(element)->tp_name);
1028 return NULL;
1029 }
1030
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001031 if (element_add_subelement(self, element) < 0) {
1032 Py_DECREF(seq);
1033 return NULL;
1034 }
1035 }
1036
1037 Py_DECREF(seq);
1038
1039 Py_RETURN_NONE;
1040}
1041
1042static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001043element_find(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001044{
1045 int i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001046 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001047 PyObject* namespaces = Py_None;
Eli Bendersky737b1732012-05-29 06:02:56 +03001048 static char *kwlist[] = {"path", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001049
Eli Bendersky737b1732012-05-29 06:02:56 +03001050 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:find", kwlist,
1051 &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001052 return NULL;
1053
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001054 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001055 _Py_IDENTIFIER(find);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001056 return _PyObject_CallMethodId(
1057 elementpath_obj, &PyId_find, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001058 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001059 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001060
1061 if (!self->extra)
1062 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001063
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001064 for (i = 0; i < self->extra->length; i++) {
1065 PyObject* item = self->extra->children[i];
1066 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +00001067 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001068 Py_INCREF(item);
1069 return item;
1070 }
1071 }
1072
1073 Py_RETURN_NONE;
1074}
1075
1076static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001077element_findtext(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001078{
1079 int i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001080 PyObject* tag;
1081 PyObject* default_value = Py_None;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001082 PyObject* namespaces = Py_None;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001083 _Py_IDENTIFIER(findtext);
Eli Bendersky737b1732012-05-29 06:02:56 +03001084 static char *kwlist[] = {"path", "default", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001085
Eli Bendersky737b1732012-05-29 06:02:56 +03001086 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO:findtext", kwlist,
1087 &tag, &default_value, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001088 return NULL;
1089
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001090 if (checkpath(tag) || namespaces != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001091 return _PyObject_CallMethodId(
1092 elementpath_obj, &PyId_findtext, "OOOO", self, tag, default_value, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001093 );
1094
1095 if (!self->extra) {
1096 Py_INCREF(default_value);
1097 return default_value;
1098 }
1099
1100 for (i = 0; i < self->extra->length; i++) {
1101 ElementObject* item = (ElementObject*) self->extra->children[i];
Mark Dickinson211c6252009-02-01 10:28:51 +00001102 if (Element_CheckExact(item) && (PyObject_RichCompareBool(item->tag, tag, Py_EQ) == 1)) {
1103
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001104 PyObject* text = element_get_text(item);
1105 if (text == Py_None)
Eli Bendersky25771b32013-01-13 05:26:07 -08001106 return PyUnicode_New(0, 0);
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001107 Py_XINCREF(text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001108 return text;
1109 }
1110 }
1111
1112 Py_INCREF(default_value);
1113 return default_value;
1114}
1115
1116static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001117element_findall(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001118{
1119 int i;
1120 PyObject* out;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001121 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001122 PyObject* namespaces = Py_None;
Eli Bendersky737b1732012-05-29 06:02:56 +03001123 static char *kwlist[] = {"path", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001124
Eli Bendersky737b1732012-05-29 06:02:56 +03001125 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:findall", kwlist,
1126 &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001127 return NULL;
1128
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001129 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001130 _Py_IDENTIFIER(findall);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001131 return _PyObject_CallMethodId(
1132 elementpath_obj, &PyId_findall, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001133 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001134 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001135
1136 out = PyList_New(0);
1137 if (!out)
1138 return NULL;
1139
1140 if (!self->extra)
1141 return out;
1142
1143 for (i = 0; i < self->extra->length; i++) {
1144 PyObject* item = self->extra->children[i];
1145 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +00001146 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001147 if (PyList_Append(out, item) < 0) {
1148 Py_DECREF(out);
1149 return NULL;
1150 }
1151 }
1152 }
1153
1154 return out;
1155}
1156
1157static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001158element_iterfind(ElementObject *self, PyObject *args, PyObject *kwds)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001159{
1160 PyObject* tag;
1161 PyObject* namespaces = Py_None;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001162 _Py_IDENTIFIER(iterfind);
Eli Bendersky737b1732012-05-29 06:02:56 +03001163 static char *kwlist[] = {"path", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001164
Eli Bendersky737b1732012-05-29 06:02:56 +03001165 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:iterfind", kwlist,
1166 &tag, &namespaces))
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001167 return NULL;
1168
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001169 return _PyObject_CallMethodId(
1170 elementpath_obj, &PyId_iterfind, "OOO", self, tag, namespaces
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001171 );
1172}
1173
1174static PyObject*
Eli Benderskya8736902013-01-05 06:26:39 -08001175element_get(ElementObject* self, PyObject* args, PyObject* kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001176{
1177 PyObject* value;
Eli Benderskya8736902013-01-05 06:26:39 -08001178 static char* kwlist[] = {"key", "default", 0};
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001179
1180 PyObject* key;
1181 PyObject* default_value = Py_None;
Eli Benderskya8736902013-01-05 06:26:39 -08001182
1183 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:get", kwlist, &key,
1184 &default_value))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001185 return NULL;
1186
1187 if (!self->extra || self->extra->attrib == Py_None)
1188 value = default_value;
1189 else {
1190 value = PyDict_GetItem(self->extra->attrib, key);
1191 if (!value)
1192 value = default_value;
1193 }
1194
1195 Py_INCREF(value);
1196 return value;
1197}
1198
1199static PyObject*
1200element_getchildren(ElementObject* self, PyObject* args)
1201{
1202 int i;
1203 PyObject* list;
1204
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001205 /* FIXME: report as deprecated? */
1206
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001207 if (!PyArg_ParseTuple(args, ":getchildren"))
1208 return NULL;
1209
1210 if (!self->extra)
1211 return PyList_New(0);
1212
1213 list = PyList_New(self->extra->length);
1214 if (!list)
1215 return NULL;
1216
1217 for (i = 0; i < self->extra->length; i++) {
1218 PyObject* item = self->extra->children[i];
1219 Py_INCREF(item);
1220 PyList_SET_ITEM(list, i, item);
1221 }
1222
1223 return list;
1224}
1225
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001226
Eli Bendersky64d11e62012-06-15 07:42:50 +03001227static PyObject *
1228create_elementiter(ElementObject *self, PyObject *tag, int gettext);
1229
1230
1231static PyObject *
Eli Benderskya8736902013-01-05 06:26:39 -08001232element_iter(ElementObject *self, PyObject *args, PyObject *kwds)
Eli Bendersky64d11e62012-06-15 07:42:50 +03001233{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001234 PyObject* tag = Py_None;
Eli Benderskya8736902013-01-05 06:26:39 -08001235 static char* kwlist[] = {"tag", 0};
1236
1237 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:iter", kwlist, &tag))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001238 return NULL;
1239
Eli Bendersky64d11e62012-06-15 07:42:50 +03001240 return create_elementiter(self, tag, 0);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001241}
1242
1243
1244static PyObject*
1245element_itertext(ElementObject* self, PyObject* args)
1246{
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001247 if (!PyArg_ParseTuple(args, ":itertext"))
1248 return NULL;
1249
Eli Bendersky64d11e62012-06-15 07:42:50 +03001250 return create_elementiter(self, Py_None, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001251}
1252
Eli Bendersky64d11e62012-06-15 07:42:50 +03001253
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001254static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001255element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001256{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001257 ElementObject* self = (ElementObject*) self_;
1258
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001259 if (!self->extra || index < 0 || index >= self->extra->length) {
1260 PyErr_SetString(
1261 PyExc_IndexError,
1262 "child index out of range"
1263 );
1264 return NULL;
1265 }
1266
1267 Py_INCREF(self->extra->children[index]);
1268 return self->extra->children[index];
1269}
1270
1271static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001272element_insert(ElementObject* self, PyObject* args)
1273{
1274 int i;
1275
1276 int index;
1277 PyObject* element;
1278 if (!PyArg_ParseTuple(args, "iO!:insert", &index,
1279 &Element_Type, &element))
1280 return NULL;
1281
Victor Stinner5f0af232013-07-11 23:01:36 +02001282 if (!self->extra) {
1283 if (create_extra(self, NULL) < 0)
1284 return NULL;
1285 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001286
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001287 if (index < 0) {
1288 index += self->extra->length;
1289 if (index < 0)
1290 index = 0;
1291 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001292 if (index > self->extra->length)
1293 index = self->extra->length;
1294
1295 if (element_resize(self, 1) < 0)
1296 return NULL;
1297
1298 for (i = self->extra->length; i > index; i--)
1299 self->extra->children[i] = self->extra->children[i-1];
1300
1301 Py_INCREF(element);
1302 self->extra->children[index] = element;
1303
1304 self->extra->length++;
1305
1306 Py_RETURN_NONE;
1307}
1308
1309static PyObject*
1310element_items(ElementObject* self, PyObject* args)
1311{
1312 if (!PyArg_ParseTuple(args, ":items"))
1313 return NULL;
1314
1315 if (!self->extra || self->extra->attrib == Py_None)
1316 return PyList_New(0);
1317
1318 return PyDict_Items(self->extra->attrib);
1319}
1320
1321static PyObject*
1322element_keys(ElementObject* self, PyObject* args)
1323{
1324 if (!PyArg_ParseTuple(args, ":keys"))
1325 return NULL;
1326
1327 if (!self->extra || self->extra->attrib == Py_None)
1328 return PyList_New(0);
1329
1330 return PyDict_Keys(self->extra->attrib);
1331}
1332
Martin v. Löwis18e16552006-02-15 17:27:45 +00001333static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001334element_length(ElementObject* self)
1335{
1336 if (!self->extra)
1337 return 0;
1338
1339 return self->extra->length;
1340}
1341
1342static PyObject*
1343element_makeelement(PyObject* self, PyObject* args, PyObject* kw)
1344{
1345 PyObject* elem;
1346
1347 PyObject* tag;
1348 PyObject* attrib;
1349 if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib))
1350 return NULL;
1351
1352 attrib = PyDict_Copy(attrib);
1353 if (!attrib)
1354 return NULL;
1355
Eli Bendersky092af1f2012-03-04 07:14:03 +02001356 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001357
1358 Py_DECREF(attrib);
1359
1360 return elem;
1361}
1362
1363static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001364element_remove(ElementObject* self, PyObject* args)
1365{
1366 int i;
1367
1368 PyObject* element;
1369 if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element))
1370 return NULL;
1371
1372 if (!self->extra) {
1373 /* element has no children, so raise exception */
1374 PyErr_SetString(
1375 PyExc_ValueError,
1376 "list.remove(x): x not in list"
1377 );
1378 return NULL;
1379 }
1380
1381 for (i = 0; i < self->extra->length; i++) {
1382 if (self->extra->children[i] == element)
1383 break;
Mark Dickinson211c6252009-02-01 10:28:51 +00001384 if (PyObject_RichCompareBool(self->extra->children[i], element, Py_EQ) == 1)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001385 break;
1386 }
1387
1388 if (i == self->extra->length) {
1389 /* element is not in children, so raise exception */
1390 PyErr_SetString(
1391 PyExc_ValueError,
1392 "list.remove(x): x not in list"
1393 );
1394 return NULL;
1395 }
1396
1397 Py_DECREF(self->extra->children[i]);
1398
1399 self->extra->length--;
1400
1401 for (; i < self->extra->length; i++)
1402 self->extra->children[i] = self->extra->children[i+1];
1403
1404 Py_RETURN_NONE;
1405}
1406
1407static PyObject*
1408element_repr(ElementObject* self)
1409{
Eli Bendersky092af1f2012-03-04 07:14:03 +02001410 if (self->tag)
1411 return PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1412 else
1413 return PyUnicode_FromFormat("<Element at %p>", self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001414}
1415
1416static PyObject*
1417element_set(ElementObject* self, PyObject* args)
1418{
1419 PyObject* attrib;
1420
1421 PyObject* key;
1422 PyObject* value;
1423 if (!PyArg_ParseTuple(args, "OO:set", &key, &value))
1424 return NULL;
1425
Victor Stinner5f0af232013-07-11 23:01:36 +02001426 if (!self->extra) {
1427 if (create_extra(self, NULL) < 0)
1428 return NULL;
1429 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001430
1431 attrib = element_get_attrib(self);
1432 if (!attrib)
1433 return NULL;
1434
1435 if (PyDict_SetItem(attrib, key, value) < 0)
1436 return NULL;
1437
1438 Py_RETURN_NONE;
1439}
1440
1441static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001442element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001443{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001444 ElementObject* self = (ElementObject*) self_;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001445 int i;
1446 PyObject* old;
1447
1448 if (!self->extra || index < 0 || index >= self->extra->length) {
1449 PyErr_SetString(
1450 PyExc_IndexError,
1451 "child assignment index out of range");
1452 return -1;
1453 }
1454
1455 old = self->extra->children[index];
1456
1457 if (item) {
1458 Py_INCREF(item);
1459 self->extra->children[index] = item;
1460 } else {
1461 self->extra->length--;
1462 for (i = index; i < self->extra->length; i++)
1463 self->extra->children[i] = self->extra->children[i+1];
1464 }
1465
1466 Py_DECREF(old);
1467
1468 return 0;
1469}
1470
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001471static PyObject*
1472element_subscr(PyObject* self_, PyObject* item)
1473{
1474 ElementObject* self = (ElementObject*) self_;
1475
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001476 if (PyIndex_Check(item)) {
1477 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001478
1479 if (i == -1 && PyErr_Occurred()) {
1480 return NULL;
1481 }
1482 if (i < 0 && self->extra)
1483 i += self->extra->length;
1484 return element_getitem(self_, i);
1485 }
1486 else if (PySlice_Check(item)) {
1487 Py_ssize_t start, stop, step, slicelen, cur, i;
1488 PyObject* list;
1489
1490 if (!self->extra)
1491 return PyList_New(0);
1492
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001493 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001494 self->extra->length,
1495 &start, &stop, &step, &slicelen) < 0) {
1496 return NULL;
1497 }
1498
1499 if (slicelen <= 0)
1500 return PyList_New(0);
1501 else {
1502 list = PyList_New(slicelen);
1503 if (!list)
1504 return NULL;
1505
1506 for (cur = start, i = 0; i < slicelen;
1507 cur += step, i++) {
1508 PyObject* item = self->extra->children[cur];
1509 Py_INCREF(item);
1510 PyList_SET_ITEM(list, i, item);
1511 }
1512
1513 return list;
1514 }
1515 }
1516 else {
1517 PyErr_SetString(PyExc_TypeError,
1518 "element indices must be integers");
1519 return NULL;
1520 }
1521}
1522
1523static int
1524element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1525{
1526 ElementObject* self = (ElementObject*) self_;
1527
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001528 if (PyIndex_Check(item)) {
1529 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001530
1531 if (i == -1 && PyErr_Occurred()) {
1532 return -1;
1533 }
1534 if (i < 0 && self->extra)
1535 i += self->extra->length;
1536 return element_setitem(self_, i, value);
1537 }
1538 else if (PySlice_Check(item)) {
1539 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1540
1541 PyObject* recycle = NULL;
1542 PyObject* seq = NULL;
1543
Victor Stinner5f0af232013-07-11 23:01:36 +02001544 if (!self->extra) {
1545 if (create_extra(self, NULL) < 0)
1546 return -1;
1547 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001548
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001549 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001550 self->extra->length,
1551 &start, &stop, &step, &slicelen) < 0) {
1552 return -1;
1553 }
1554
Eli Bendersky865756a2012-03-09 13:38:15 +02001555 if (value == NULL) {
1556 /* Delete slice */
1557 size_t cur;
1558 Py_ssize_t i;
1559
1560 if (slicelen <= 0)
1561 return 0;
1562
1563 /* Since we're deleting, the direction of the range doesn't matter,
1564 * so for simplicity make it always ascending.
1565 */
1566 if (step < 0) {
1567 stop = start + 1;
1568 start = stop + step * (slicelen - 1) - 1;
1569 step = -step;
1570 }
1571
1572 assert((size_t)slicelen <= PY_SIZE_MAX / sizeof(PyObject *));
1573
1574 /* recycle is a list that will contain all the children
1575 * scheduled for removal.
1576 */
1577 if (!(recycle = PyList_New(slicelen))) {
1578 PyErr_NoMemory();
1579 return -1;
1580 }
1581
1582 /* This loop walks over all the children that have to be deleted,
1583 * with cur pointing at them. num_moved is the amount of children
1584 * until the next deleted child that have to be "shifted down" to
1585 * occupy the deleted's places.
1586 * Note that in the ith iteration, shifting is done i+i places down
1587 * because i children were already removed.
1588 */
1589 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1590 /* Compute how many children have to be moved, clipping at the
1591 * list end.
1592 */
1593 Py_ssize_t num_moved = step - 1;
1594 if (cur + step >= (size_t)self->extra->length) {
1595 num_moved = self->extra->length - cur - 1;
1596 }
1597
1598 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1599
1600 memmove(
1601 self->extra->children + cur - i,
1602 self->extra->children + cur + 1,
1603 num_moved * sizeof(PyObject *));
1604 }
1605
1606 /* Leftover "tail" after the last removed child */
1607 cur = start + (size_t)slicelen * step;
1608 if (cur < (size_t)self->extra->length) {
1609 memmove(
1610 self->extra->children + cur - slicelen,
1611 self->extra->children + cur,
1612 (self->extra->length - cur) * sizeof(PyObject *));
1613 }
1614
1615 self->extra->length -= slicelen;
1616
1617 /* Discard the recycle list with all the deleted sub-elements */
1618 Py_XDECREF(recycle);
1619 return 0;
1620 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001621 else {
Eli Bendersky865756a2012-03-09 13:38:15 +02001622 /* A new slice is actually being assigned */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001623 seq = PySequence_Fast(value, "");
1624 if (!seq) {
1625 PyErr_Format(
1626 PyExc_TypeError,
1627 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1628 );
1629 return -1;
1630 }
1631 newlen = PySequence_Size(seq);
1632 }
1633
1634 if (step != 1 && newlen != slicelen)
1635 {
1636 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001637 "attempt to assign sequence of size %zd "
1638 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001639 newlen, slicelen
1640 );
1641 return -1;
1642 }
1643
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001644 /* Resize before creating the recycle bin, to prevent refleaks. */
1645 if (newlen > slicelen) {
1646 if (element_resize(self, newlen - slicelen) < 0) {
1647 if (seq) {
1648 Py_DECREF(seq);
1649 }
1650 return -1;
1651 }
1652 }
1653
1654 if (slicelen > 0) {
1655 /* to avoid recursive calls to this method (via decref), move
1656 old items to the recycle bin here, and get rid of them when
1657 we're done modifying the element */
1658 recycle = PyList_New(slicelen);
1659 if (!recycle) {
1660 if (seq) {
1661 Py_DECREF(seq);
1662 }
1663 return -1;
1664 }
1665 for (cur = start, i = 0; i < slicelen;
1666 cur += step, i++)
1667 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1668 }
1669
1670 if (newlen < slicelen) {
1671 /* delete slice */
1672 for (i = stop; i < self->extra->length; i++)
1673 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1674 } else if (newlen > slicelen) {
1675 /* insert slice */
1676 for (i = self->extra->length-1; i >= stop; i--)
1677 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1678 }
1679
1680 /* replace the slice */
1681 for (cur = start, i = 0; i < newlen;
1682 cur += step, i++) {
1683 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1684 Py_INCREF(element);
1685 self->extra->children[cur] = element;
1686 }
1687
1688 self->extra->length += newlen - slicelen;
1689
1690 if (seq) {
1691 Py_DECREF(seq);
1692 }
1693
1694 /* discard the recycle bin, and everything in it */
1695 Py_XDECREF(recycle);
1696
1697 return 0;
1698 }
1699 else {
1700 PyErr_SetString(PyExc_TypeError,
1701 "element indices must be integers");
1702 return -1;
1703 }
1704}
1705
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001706static PyMethodDef element_methods[] = {
1707
Eli Bendersky0192ba32012-03-30 16:38:33 +03001708 {"clear", (PyCFunction) element_clearmethod, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001709
Eli Benderskya8736902013-01-05 06:26:39 -08001710 {"get", (PyCFunction) element_get, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001711 {"set", (PyCFunction) element_set, METH_VARARGS},
1712
Eli Bendersky737b1732012-05-29 06:02:56 +03001713 {"find", (PyCFunction) element_find, METH_VARARGS | METH_KEYWORDS},
1714 {"findtext", (PyCFunction) element_findtext, METH_VARARGS | METH_KEYWORDS},
1715 {"findall", (PyCFunction) element_findall, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001716
1717 {"append", (PyCFunction) element_append, METH_VARARGS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001718 {"extend", (PyCFunction) element_extend, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001719 {"insert", (PyCFunction) element_insert, METH_VARARGS},
1720 {"remove", (PyCFunction) element_remove, METH_VARARGS},
1721
Eli Benderskya8736902013-01-05 06:26:39 -08001722 {"iter", (PyCFunction) element_iter, METH_VARARGS | METH_KEYWORDS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001723 {"itertext", (PyCFunction) element_itertext, METH_VARARGS},
Eli Bendersky737b1732012-05-29 06:02:56 +03001724 {"iterfind", (PyCFunction) element_iterfind, METH_VARARGS | METH_KEYWORDS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001725
Eli Benderskya8736902013-01-05 06:26:39 -08001726 {"getiterator", (PyCFunction) element_iter, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001727 {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS},
1728
1729 {"items", (PyCFunction) element_items, METH_VARARGS},
1730 {"keys", (PyCFunction) element_keys, METH_VARARGS},
1731
1732 {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS},
1733
1734 {"__copy__", (PyCFunction) element_copy, METH_VARARGS},
1735 {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS},
Martin v. Löwisbce16662012-06-17 10:41:22 +02001736 {"__sizeof__", element_sizeof, METH_NOARGS},
Eli Bendersky698bdb22013-01-10 06:01:06 -08001737 {"__getstate__", (PyCFunction)element_getstate, METH_NOARGS},
1738 {"__setstate__", (PyCFunction)element_setstate, METH_O},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001739
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001740 {NULL, NULL}
1741};
1742
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001743static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001744element_getattro(ElementObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001745{
1746 PyObject* res;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001747 char *name = "";
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001748
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001749 if (PyUnicode_Check(nameobj))
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001750 name = _PyUnicode_AsString(nameobj);
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001751
Alexander Belopolskye239d232010-12-08 23:31:48 +00001752 if (name == NULL)
1753 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001754
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001755 /* handle common attributes first */
1756 if (strcmp(name, "tag") == 0) {
1757 res = self->tag;
1758 Py_INCREF(res);
1759 return res;
1760 } else if (strcmp(name, "text") == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001761 res = element_get_text(self);
Victor Stinner71c8b7e2013-07-11 23:08:39 +02001762 Py_XINCREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001763 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001764 }
1765
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001766 /* methods */
1767 res = PyObject_GenericGetAttr((PyObject*) self, nameobj);
1768 if (res)
1769 return res;
1770
1771 /* less common attributes */
1772 if (strcmp(name, "tail") == 0) {
1773 PyErr_Clear();
1774 res = element_get_tail(self);
1775 } else if (strcmp(name, "attrib") == 0) {
1776 PyErr_Clear();
Victor Stinner5f0af232013-07-11 23:01:36 +02001777 if (!self->extra) {
1778 if (create_extra(self, NULL) < 0)
1779 return NULL;
1780 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001781 res = element_get_attrib(self);
1782 }
1783
1784 if (!res)
1785 return NULL;
1786
1787 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001788 return res;
1789}
1790
Eli Benderskyef9683b2013-05-18 07:52:34 -07001791static int
Eli Benderskyb20df952012-05-20 06:33:29 +03001792element_setattro(ElementObject* self, PyObject* nameobj, PyObject* value)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001793{
Eli Benderskyb20df952012-05-20 06:33:29 +03001794 char *name = "";
1795 if (PyUnicode_Check(nameobj))
1796 name = _PyUnicode_AsString(nameobj);
Victor Stinner4d463432013-07-11 23:05:03 +02001797 if (name == NULL)
Eli Benderskyef9683b2013-05-18 07:52:34 -07001798 return -1;
Victor Stinner4d463432013-07-11 23:05:03 +02001799
1800 if (strcmp(name, "tag") == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001801 Py_DECREF(self->tag);
1802 self->tag = value;
1803 Py_INCREF(self->tag);
1804 } else if (strcmp(name, "text") == 0) {
1805 Py_DECREF(JOIN_OBJ(self->text));
1806 self->text = value;
1807 Py_INCREF(self->text);
1808 } else if (strcmp(name, "tail") == 0) {
1809 Py_DECREF(JOIN_OBJ(self->tail));
1810 self->tail = value;
1811 Py_INCREF(self->tail);
1812 } else if (strcmp(name, "attrib") == 0) {
Victor Stinner5f0af232013-07-11 23:01:36 +02001813 if (!self->extra) {
1814 if (create_extra(self, NULL) < 0)
1815 return -1;
1816 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001817 Py_DECREF(self->extra->attrib);
1818 self->extra->attrib = value;
1819 Py_INCREF(self->extra->attrib);
1820 } else {
Eli Benderskyef9683b2013-05-18 07:52:34 -07001821 PyErr_SetString(PyExc_AttributeError,
Eli Bendersky6a55dc32013-05-19 16:59:59 -07001822 "Can't set arbitrary attributes on Element");
Eli Benderskyef9683b2013-05-18 07:52:34 -07001823 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001824 }
1825
Eli Benderskyef9683b2013-05-18 07:52:34 -07001826 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001827}
1828
1829static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001830 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001831 0, /* sq_concat */
1832 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001833 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001834 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001835 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001836 0,
1837};
1838
1839static PyMappingMethods element_as_mapping = {
1840 (lenfunc) element_length,
1841 (binaryfunc) element_subscr,
1842 (objobjargproc) element_ass_subscr,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001843};
1844
Neal Norwitz227b5332006-03-22 09:28:35 +00001845static PyTypeObject Element_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001846 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08001847 "xml.etree.ElementTree.Element", sizeof(ElementObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001848 /* methods */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001849 (destructor)element_dealloc, /* tp_dealloc */
1850 0, /* tp_print */
1851 0, /* tp_getattr */
Eli Benderskyb20df952012-05-20 06:33:29 +03001852 0, /* tp_setattr */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001853 0, /* tp_reserved */
1854 (reprfunc)element_repr, /* tp_repr */
1855 0, /* tp_as_number */
1856 &element_as_sequence, /* tp_as_sequence */
1857 &element_as_mapping, /* tp_as_mapping */
1858 0, /* tp_hash */
1859 0, /* tp_call */
1860 0, /* tp_str */
1861 (getattrofunc)element_getattro, /* tp_getattro */
Eli Benderskyb20df952012-05-20 06:33:29 +03001862 (setattrofunc)element_setattro, /* tp_setattro */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001863 0, /* tp_as_buffer */
Eli Bendersky0192ba32012-03-30 16:38:33 +03001864 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
1865 /* tp_flags */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001866 0, /* tp_doc */
Eli Bendersky0192ba32012-03-30 16:38:33 +03001867 (traverseproc)element_gc_traverse, /* tp_traverse */
1868 (inquiry)element_gc_clear, /* tp_clear */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001869 0, /* tp_richcompare */
Eli Benderskyebf37a22012-04-03 22:02:37 +03001870 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001871 0, /* tp_iter */
1872 0, /* tp_iternext */
1873 element_methods, /* tp_methods */
1874 0, /* tp_members */
1875 0, /* tp_getset */
1876 0, /* tp_base */
1877 0, /* tp_dict */
1878 0, /* tp_descr_get */
1879 0, /* tp_descr_set */
1880 0, /* tp_dictoffset */
1881 (initproc)element_init, /* tp_init */
1882 PyType_GenericAlloc, /* tp_alloc */
1883 element_new, /* tp_new */
1884 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001885};
1886
Eli Bendersky64d11e62012-06-15 07:42:50 +03001887/******************************* Element iterator ****************************/
1888
1889/* ElementIterObject represents the iteration state over an XML element in
1890 * pre-order traversal. To keep track of which sub-element should be returned
1891 * next, a stack of parents is maintained. This is a standard stack-based
1892 * iterative pre-order traversal of a tree.
1893 * The stack is managed using a single-linked list starting at parent_stack.
1894 * Each stack node contains the saved parent to which we should return after
1895 * the current one is exhausted, and the next child to examine in that parent.
1896 */
1897typedef struct ParentLocator_t {
1898 ElementObject *parent;
1899 Py_ssize_t child_index;
1900 struct ParentLocator_t *next;
1901} ParentLocator;
1902
1903typedef struct {
1904 PyObject_HEAD
1905 ParentLocator *parent_stack;
1906 ElementObject *root_element;
1907 PyObject *sought_tag;
1908 int root_done;
1909 int gettext;
1910} ElementIterObject;
1911
1912
1913static void
1914elementiter_dealloc(ElementIterObject *it)
1915{
1916 ParentLocator *p = it->parent_stack;
1917 while (p) {
1918 ParentLocator *temp = p;
1919 Py_XDECREF(p->parent);
1920 p = p->next;
1921 PyObject_Free(temp);
1922 }
1923
1924 Py_XDECREF(it->sought_tag);
1925 Py_XDECREF(it->root_element);
1926
1927 PyObject_GC_UnTrack(it);
1928 PyObject_GC_Del(it);
1929}
1930
1931static int
1932elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
1933{
1934 ParentLocator *p = it->parent_stack;
1935 while (p) {
1936 Py_VISIT(p->parent);
1937 p = p->next;
1938 }
1939
1940 Py_VISIT(it->root_element);
1941 Py_VISIT(it->sought_tag);
1942 return 0;
1943}
1944
1945/* Helper function for elementiter_next. Add a new parent to the parent stack.
1946 */
1947static ParentLocator *
1948parent_stack_push_new(ParentLocator *stack, ElementObject *parent)
1949{
1950 ParentLocator *new_node = PyObject_Malloc(sizeof(ParentLocator));
1951 if (new_node) {
1952 new_node->parent = parent;
1953 Py_INCREF(parent);
1954 new_node->child_index = 0;
1955 new_node->next = stack;
1956 }
1957 return new_node;
1958}
1959
1960static PyObject *
1961elementiter_next(ElementIterObject *it)
1962{
1963 /* Sub-element iterator.
Eli Bendersky45839902013-01-13 05:14:47 -08001964 *
Eli Bendersky64d11e62012-06-15 07:42:50 +03001965 * A short note on gettext: this function serves both the iter() and
1966 * itertext() methods to avoid code duplication. However, there are a few
1967 * small differences in the way these iterations work. Namely:
1968 * - itertext() only yields text from nodes that have it, and continues
1969 * iterating when a node doesn't have text (so it doesn't return any
1970 * node like iter())
1971 * - itertext() also has to handle tail, after finishing with all the
1972 * children of a node.
1973 */
Eli Bendersky113da642012-06-15 07:52:49 +03001974 ElementObject *cur_parent;
1975 Py_ssize_t child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03001976
1977 while (1) {
1978 /* Handle the case reached in the beginning and end of iteration, where
1979 * the parent stack is empty. The root_done flag gives us indication
1980 * whether we've just started iterating (so root_done is 0), in which
1981 * case the root is returned. If root_done is 1 and we're here, the
1982 * iterator is exhausted.
1983 */
1984 if (!it->parent_stack->parent) {
1985 if (it->root_done) {
1986 PyErr_SetNone(PyExc_StopIteration);
1987 return NULL;
1988 } else {
1989 it->parent_stack = parent_stack_push_new(it->parent_stack,
1990 it->root_element);
1991 if (!it->parent_stack) {
1992 PyErr_NoMemory();
1993 return NULL;
1994 }
1995
1996 it->root_done = 1;
1997 if (it->sought_tag == Py_None ||
1998 PyObject_RichCompareBool(it->root_element->tag,
1999 it->sought_tag, Py_EQ) == 1) {
2000 if (it->gettext) {
Eli Benderskye6174ca2013-01-10 06:27:53 -08002001 PyObject *text = element_get_text(it->root_element);
2002 if (!text)
2003 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002004 if (PyObject_IsTrue(text)) {
2005 Py_INCREF(text);
2006 return text;
2007 }
2008 } else {
2009 Py_INCREF(it->root_element);
2010 return (PyObject *)it->root_element;
2011 }
2012 }
2013 }
2014 }
2015
2016 /* See if there are children left to traverse in the current parent. If
2017 * yes, visit the next child. If not, pop the stack and try again.
2018 */
Eli Bendersky113da642012-06-15 07:52:49 +03002019 cur_parent = it->parent_stack->parent;
2020 child_index = it->parent_stack->child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002021 if (cur_parent->extra && child_index < cur_parent->extra->length) {
2022 ElementObject *child = (ElementObject *)
2023 cur_parent->extra->children[child_index];
2024 it->parent_stack->child_index++;
2025 it->parent_stack = parent_stack_push_new(it->parent_stack,
2026 child);
2027 if (!it->parent_stack) {
2028 PyErr_NoMemory();
2029 return NULL;
2030 }
2031
2032 if (it->gettext) {
Eli Benderskye6174ca2013-01-10 06:27:53 -08002033 PyObject *text = element_get_text(child);
2034 if (!text)
2035 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002036 if (PyObject_IsTrue(text)) {
2037 Py_INCREF(text);
2038 return text;
2039 }
2040 } else if (it->sought_tag == Py_None ||
2041 PyObject_RichCompareBool(child->tag,
2042 it->sought_tag, Py_EQ) == 1) {
2043 Py_INCREF(child);
2044 return (PyObject *)child;
2045 }
2046 else
2047 continue;
2048 }
2049 else {
Eli Benderskye6174ca2013-01-10 06:27:53 -08002050 PyObject *tail;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002051 ParentLocator *next = it->parent_stack->next;
Eli Benderskye6174ca2013-01-10 06:27:53 -08002052 if (it->gettext) {
2053 tail = element_get_tail(cur_parent);
2054 if (!tail)
2055 return NULL;
2056 }
2057 else
2058 tail = Py_None;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002059 Py_XDECREF(it->parent_stack->parent);
2060 PyObject_Free(it->parent_stack);
2061 it->parent_stack = next;
2062
2063 /* Note that extra condition on it->parent_stack->parent here;
2064 * this is because itertext() is supposed to only return *inner*
2065 * text, not text following the element it began iteration with.
2066 */
2067 if (it->parent_stack->parent && PyObject_IsTrue(tail)) {
2068 Py_INCREF(tail);
2069 return tail;
2070 }
2071 }
2072 }
2073
2074 return NULL;
2075}
2076
2077
2078static PyTypeObject ElementIter_Type = {
2079 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002080 /* Using the module's name since the pure-Python implementation does not
2081 have such a type. */
Eli Bendersky64d11e62012-06-15 07:42:50 +03002082 "_elementtree._element_iterator", /* tp_name */
2083 sizeof(ElementIterObject), /* tp_basicsize */
2084 0, /* tp_itemsize */
2085 /* methods */
2086 (destructor)elementiter_dealloc, /* tp_dealloc */
2087 0, /* tp_print */
2088 0, /* tp_getattr */
2089 0, /* tp_setattr */
2090 0, /* tp_reserved */
2091 0, /* tp_repr */
2092 0, /* tp_as_number */
2093 0, /* tp_as_sequence */
2094 0, /* tp_as_mapping */
2095 0, /* tp_hash */
2096 0, /* tp_call */
2097 0, /* tp_str */
2098 0, /* tp_getattro */
2099 0, /* tp_setattro */
2100 0, /* tp_as_buffer */
2101 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2102 0, /* tp_doc */
2103 (traverseproc)elementiter_traverse, /* tp_traverse */
2104 0, /* tp_clear */
2105 0, /* tp_richcompare */
2106 0, /* tp_weaklistoffset */
2107 PyObject_SelfIter, /* tp_iter */
2108 (iternextfunc)elementiter_next, /* tp_iternext */
2109 0, /* tp_methods */
2110 0, /* tp_members */
2111 0, /* tp_getset */
2112 0, /* tp_base */
2113 0, /* tp_dict */
2114 0, /* tp_descr_get */
2115 0, /* tp_descr_set */
2116 0, /* tp_dictoffset */
2117 0, /* tp_init */
2118 0, /* tp_alloc */
2119 0, /* tp_new */
2120};
2121
2122
2123static PyObject *
2124create_elementiter(ElementObject *self, PyObject *tag, int gettext)
2125{
2126 ElementIterObject *it;
2127 PyObject *star = NULL;
2128
2129 it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
2130 if (!it)
2131 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002132
2133 if (PyUnicode_Check(tag))
2134 star = PyUnicode_FromString("*");
2135 else if (PyBytes_Check(tag))
2136 star = PyBytes_FromString("*");
2137
2138 if (star && PyObject_RichCompareBool(tag, star, Py_EQ) == 1)
2139 tag = Py_None;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002140 Py_XDECREF(star);
Victor Stinner4d463432013-07-11 23:05:03 +02002141
2142 Py_INCREF(tag);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002143 it->sought_tag = tag;
2144 it->root_done = 0;
2145 it->gettext = gettext;
Victor Stinner4d463432013-07-11 23:05:03 +02002146 Py_INCREF(self);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002147 it->root_element = self;
2148
Eli Bendersky64d11e62012-06-15 07:42:50 +03002149 PyObject_GC_Track(it);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002150
2151 it->parent_stack = PyObject_Malloc(sizeof(ParentLocator));
2152 if (it->parent_stack == NULL) {
2153 Py_DECREF(it);
2154 PyErr_NoMemory();
2155 return NULL;
2156 }
2157 it->parent_stack->parent = NULL;
2158 it->parent_stack->child_index = 0;
2159 it->parent_stack->next = NULL;
2160
Eli Bendersky64d11e62012-06-15 07:42:50 +03002161 return (PyObject *)it;
2162}
2163
2164
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002165/* ==================================================================== */
2166/* the tree builder type */
2167
2168typedef struct {
2169 PyObject_HEAD
2170
Eli Bendersky58d548d2012-05-29 15:45:16 +03002171 PyObject *root; /* root node (first created node) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002172
Antoine Pitrouee329312012-10-04 19:53:29 +02002173 PyObject *this; /* current node */
2174 PyObject *last; /* most recently created node */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002175
Eli Bendersky58d548d2012-05-29 15:45:16 +03002176 PyObject *data; /* data collector (string or list), or NULL */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002177
Eli Bendersky58d548d2012-05-29 15:45:16 +03002178 PyObject *stack; /* element stack */
2179 Py_ssize_t index; /* current stack size (0 means empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002180
Eli Bendersky48d358b2012-05-30 17:57:50 +03002181 PyObject *element_factory;
2182
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002183 /* element tracing */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002184 PyObject *events; /* list of events, or NULL if not collecting */
2185 PyObject *start_event_obj; /* event objects (NULL to ignore) */
2186 PyObject *end_event_obj;
2187 PyObject *start_ns_event_obj;
2188 PyObject *end_ns_event_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002189} TreeBuilderObject;
2190
Christian Heimes90aa7642007-12-19 02:45:37 +00002191#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002192
2193/* -------------------------------------------------------------------- */
2194/* constructor and destructor */
2195
Eli Bendersky58d548d2012-05-29 15:45:16 +03002196static PyObject *
2197treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002198{
Eli Bendersky58d548d2012-05-29 15:45:16 +03002199 TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
2200 if (t != NULL) {
2201 t->root = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002202
Eli Bendersky58d548d2012-05-29 15:45:16 +03002203 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002204 t->this = Py_None;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002205 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002206 t->last = Py_None;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002207
Eli Bendersky58d548d2012-05-29 15:45:16 +03002208 t->data = NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002209 t->element_factory = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002210 t->stack = PyList_New(20);
2211 if (!t->stack) {
2212 Py_DECREF(t->this);
2213 Py_DECREF(t->last);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002214 Py_DECREF((PyObject *) t);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002215 return NULL;
2216 }
2217 t->index = 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002218
Eli Bendersky58d548d2012-05-29 15:45:16 +03002219 t->events = NULL;
2220 t->start_event_obj = t->end_event_obj = NULL;
2221 t->start_ns_event_obj = t->end_ns_event_obj = NULL;
2222 }
2223 return (PyObject *)t;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002224}
2225
Eli Bendersky58d548d2012-05-29 15:45:16 +03002226static int
2227treebuilder_init(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002228{
Eli Benderskyc68e1362012-06-03 06:09:42 +03002229 static char *kwlist[] = {"element_factory", 0};
Eli Bendersky48d358b2012-05-30 17:57:50 +03002230 PyObject *element_factory = NULL;
2231 TreeBuilderObject *self_tb = (TreeBuilderObject *)self;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002232 PyObject *tmp;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002233
2234 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:TreeBuilder", kwlist,
2235 &element_factory)) {
2236 return -1;
2237 }
2238
2239 if (element_factory) {
2240 Py_INCREF(element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002241 tmp = self_tb->element_factory;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002242 self_tb->element_factory = element_factory;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002243 Py_XDECREF(tmp);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002244 }
2245
Eli Bendersky58d548d2012-05-29 15:45:16 +03002246 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002247}
2248
Eli Bendersky48d358b2012-05-30 17:57:50 +03002249static int
2250treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
2251{
2252 Py_VISIT(self->root);
2253 Py_VISIT(self->this);
2254 Py_VISIT(self->last);
2255 Py_VISIT(self->data);
2256 Py_VISIT(self->stack);
2257 Py_VISIT(self->element_factory);
2258 return 0;
2259}
2260
2261static int
2262treebuilder_gc_clear(TreeBuilderObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002263{
Antoine Pitrouc1948842012-10-01 23:40:37 +02002264 Py_CLEAR(self->end_ns_event_obj);
2265 Py_CLEAR(self->start_ns_event_obj);
2266 Py_CLEAR(self->end_event_obj);
2267 Py_CLEAR(self->start_event_obj);
2268 Py_CLEAR(self->events);
2269 Py_CLEAR(self->stack);
2270 Py_CLEAR(self->data);
2271 Py_CLEAR(self->last);
2272 Py_CLEAR(self->this);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002273 Py_CLEAR(self->element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002274 Py_CLEAR(self->root);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002275 return 0;
2276}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002277
Eli Bendersky48d358b2012-05-30 17:57:50 +03002278static void
2279treebuilder_dealloc(TreeBuilderObject *self)
2280{
2281 PyObject_GC_UnTrack(self);
2282 treebuilder_gc_clear(self);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002283 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002284}
2285
2286/* -------------------------------------------------------------------- */
Antoine Pitrouee329312012-10-04 19:53:29 +02002287/* helpers for handling of arbitrary element-like objects */
2288
2289static int
2290treebuilder_set_element_text_or_tail(PyObject *element, PyObject *data,
2291 PyObject **dest, _Py_Identifier *name)
2292{
2293 if (Element_CheckExact(element)) {
2294 Py_DECREF(JOIN_OBJ(*dest));
2295 *dest = JOIN_SET(data, PyList_CheckExact(data));
2296 return 0;
2297 }
2298 else {
2299 PyObject *joined = list_join(data);
2300 int r;
2301 if (joined == NULL)
2302 return -1;
2303 r = _PyObject_SetAttrId(element, name, joined);
2304 Py_DECREF(joined);
2305 return r;
2306 }
2307}
2308
2309/* These two functions steal a reference to data */
2310static int
2311treebuilder_set_element_text(PyObject *element, PyObject *data)
2312{
2313 _Py_IDENTIFIER(text);
2314 return treebuilder_set_element_text_or_tail(
2315 element, data, &((ElementObject *) element)->text, &PyId_text);
2316}
2317
2318static int
2319treebuilder_set_element_tail(PyObject *element, PyObject *data)
2320{
2321 _Py_IDENTIFIER(tail);
2322 return treebuilder_set_element_text_or_tail(
2323 element, data, &((ElementObject *) element)->tail, &PyId_tail);
2324}
2325
2326static int
2327treebuilder_add_subelement(PyObject *element, PyObject *child)
2328{
2329 _Py_IDENTIFIER(append);
2330 if (Element_CheckExact(element)) {
2331 ElementObject *elem = (ElementObject *) element;
2332 return element_add_subelement(elem, child);
2333 }
2334 else {
2335 PyObject *res;
2336 res = _PyObject_CallMethodId(element, &PyId_append, "O", child);
2337 if (res == NULL)
2338 return -1;
2339 Py_DECREF(res);
2340 return 0;
2341 }
2342}
2343
2344/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002345/* handlers */
2346
2347LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002348treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
2349 PyObject* attrib)
2350{
2351 PyObject* node;
2352 PyObject* this;
2353
2354 if (self->data) {
2355 if (self->this == self->last) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002356 if (treebuilder_set_element_text(self->last, self->data))
2357 return NULL;
2358 }
2359 else {
2360 if (treebuilder_set_element_tail(self->last, self->data))
2361 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002362 }
2363 self->data = NULL;
2364 }
2365
Eli Bendersky08231a92013-05-18 15:47:16 -07002366 if (self->element_factory && self->element_factory != Py_None) {
Eli Bendersky48d358b2012-05-30 17:57:50 +03002367 node = PyObject_CallFunction(self->element_factory, "OO", tag, attrib);
2368 } else {
2369 node = create_new_element(tag, attrib);
2370 }
2371 if (!node) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002372 return NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002373 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002374
Antoine Pitrouee329312012-10-04 19:53:29 +02002375 this = self->this;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002376
2377 if (this != Py_None) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002378 if (treebuilder_add_subelement(this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002379 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002380 } else {
2381 if (self->root) {
2382 PyErr_SetString(
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002383 elementtree_parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002384 "multiple elements on top level"
2385 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002386 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002387 }
2388 Py_INCREF(node);
2389 self->root = node;
2390 }
2391
2392 if (self->index < PyList_GET_SIZE(self->stack)) {
2393 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002394 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002395 Py_INCREF(this);
2396 } else {
2397 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002398 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002399 }
2400 self->index++;
2401
2402 Py_DECREF(this);
2403 Py_INCREF(node);
Antoine Pitrouee329312012-10-04 19:53:29 +02002404 self->this = node;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002405
2406 Py_DECREF(self->last);
2407 Py_INCREF(node);
Antoine Pitrouee329312012-10-04 19:53:29 +02002408 self->last = node;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002409
2410 if (self->start_event_obj) {
2411 PyObject* res;
2412 PyObject* action = self->start_event_obj;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002413 res = PyTuple_Pack(2, action, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002414 if (res) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002415 PyList_Append(self->events, res);
2416 Py_DECREF(res);
2417 } else
2418 PyErr_Clear(); /* FIXME: propagate error */
2419 }
2420
2421 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002422
2423 error:
2424 Py_DECREF(node);
2425 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002426}
2427
2428LOCAL(PyObject*)
2429treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
2430{
2431 if (!self->data) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002432 if (self->last == Py_None) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00002433 /* ignore calls to data before the first call to start */
2434 Py_RETURN_NONE;
2435 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002436 /* store the first item as is */
2437 Py_INCREF(data); self->data = data;
2438 } else {
2439 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00002440 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
2441 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002442 /* XXX this code path unused in Python 3? */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002443 /* expat often generates single character data sections; handle
2444 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00002445 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
2446 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002447 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00002448 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002449 } else if (PyList_CheckExact(self->data)) {
2450 if (PyList_Append(self->data, data) < 0)
2451 return NULL;
2452 } else {
2453 PyObject* list = PyList_New(2);
2454 if (!list)
2455 return NULL;
2456 PyList_SET_ITEM(list, 0, self->data);
2457 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2458 self->data = list;
2459 }
2460 }
2461
2462 Py_RETURN_NONE;
2463}
2464
2465LOCAL(PyObject*)
2466treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2467{
2468 PyObject* item;
2469
2470 if (self->data) {
2471 if (self->this == self->last) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002472 if (treebuilder_set_element_text(self->last, self->data))
2473 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002474 } else {
Antoine Pitrouee329312012-10-04 19:53:29 +02002475 if (treebuilder_set_element_tail(self->last, self->data))
2476 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002477 }
2478 self->data = NULL;
2479 }
2480
2481 if (self->index == 0) {
2482 PyErr_SetString(
2483 PyExc_IndexError,
2484 "pop from empty stack"
2485 );
2486 return NULL;
2487 }
2488
2489 self->index--;
2490
2491 item = PyList_GET_ITEM(self->stack, self->index);
2492 Py_INCREF(item);
2493
2494 Py_DECREF(self->last);
2495
Antoine Pitrouee329312012-10-04 19:53:29 +02002496 self->last = self->this;
2497 self->this = item;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002498
2499 if (self->end_event_obj) {
2500 PyObject* res;
2501 PyObject* action = self->end_event_obj;
2502 PyObject* node = (PyObject*) self->last;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002503 res = PyTuple_Pack(2, action, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002504 if (res) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002505 PyList_Append(self->events, res);
2506 Py_DECREF(res);
2507 } else
2508 PyErr_Clear(); /* FIXME: propagate error */
2509 }
2510
2511 Py_INCREF(self->last);
2512 return (PyObject*) self->last;
2513}
2514
2515LOCAL(void)
2516treebuilder_handle_namespace(TreeBuilderObject* self, int start,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002517 PyObject *prefix, PyObject *uri)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002518{
2519 PyObject* res;
2520 PyObject* action;
2521 PyObject* parcel;
2522
2523 if (!self->events)
2524 return;
2525
2526 if (start) {
2527 if (!self->start_ns_event_obj)
2528 return;
2529 action = self->start_ns_event_obj;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002530 parcel = Py_BuildValue("OO", prefix, uri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002531 if (!parcel)
2532 return;
2533 Py_INCREF(action);
2534 } else {
2535 if (!self->end_ns_event_obj)
2536 return;
2537 action = self->end_ns_event_obj;
2538 Py_INCREF(action);
2539 parcel = Py_None;
2540 Py_INCREF(parcel);
2541 }
2542
2543 res = PyTuple_New(2);
2544
2545 if (res) {
2546 PyTuple_SET_ITEM(res, 0, action);
2547 PyTuple_SET_ITEM(res, 1, parcel);
2548 PyList_Append(self->events, res);
2549 Py_DECREF(res);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002550 }
2551 else {
2552 Py_DECREF(action);
2553 Py_DECREF(parcel);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002554 PyErr_Clear(); /* FIXME: propagate error */
Antoine Pitrouc1948842012-10-01 23:40:37 +02002555 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002556}
2557
2558/* -------------------------------------------------------------------- */
2559/* methods (in alphabetical order) */
2560
2561static PyObject*
2562treebuilder_data(TreeBuilderObject* self, PyObject* args)
2563{
2564 PyObject* data;
2565 if (!PyArg_ParseTuple(args, "O:data", &data))
2566 return NULL;
2567
2568 return treebuilder_handle_data(self, data);
2569}
2570
2571static PyObject*
2572treebuilder_end(TreeBuilderObject* self, PyObject* args)
2573{
2574 PyObject* tag;
2575 if (!PyArg_ParseTuple(args, "O:end", &tag))
2576 return NULL;
2577
2578 return treebuilder_handle_end(self, tag);
2579}
2580
2581LOCAL(PyObject*)
2582treebuilder_done(TreeBuilderObject* self)
2583{
2584 PyObject* res;
2585
2586 /* FIXME: check stack size? */
2587
2588 if (self->root)
2589 res = self->root;
2590 else
2591 res = Py_None;
2592
2593 Py_INCREF(res);
2594 return res;
2595}
2596
2597static PyObject*
2598treebuilder_close(TreeBuilderObject* self, PyObject* args)
2599{
2600 if (!PyArg_ParseTuple(args, ":close"))
2601 return NULL;
2602
2603 return treebuilder_done(self);
2604}
2605
2606static PyObject*
2607treebuilder_start(TreeBuilderObject* self, PyObject* args)
2608{
2609 PyObject* tag;
2610 PyObject* attrib = Py_None;
2611 if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib))
2612 return NULL;
2613
2614 return treebuilder_handle_start(self, tag, attrib);
2615}
2616
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002617static PyMethodDef treebuilder_methods[] = {
2618 {"data", (PyCFunction) treebuilder_data, METH_VARARGS},
2619 {"start", (PyCFunction) treebuilder_start, METH_VARARGS},
2620 {"end", (PyCFunction) treebuilder_end, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002621 {"close", (PyCFunction) treebuilder_close, METH_VARARGS},
2622 {NULL, NULL}
2623};
2624
Neal Norwitz227b5332006-03-22 09:28:35 +00002625static PyTypeObject TreeBuilder_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002626 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002627 "xml.etree.ElementTree.TreeBuilder", sizeof(TreeBuilderObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002628 /* methods */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002629 (destructor)treebuilder_dealloc, /* tp_dealloc */
2630 0, /* tp_print */
2631 0, /* tp_getattr */
2632 0, /* tp_setattr */
2633 0, /* tp_reserved */
2634 0, /* tp_repr */
2635 0, /* tp_as_number */
2636 0, /* tp_as_sequence */
2637 0, /* tp_as_mapping */
2638 0, /* tp_hash */
2639 0, /* tp_call */
2640 0, /* tp_str */
2641 0, /* tp_getattro */
2642 0, /* tp_setattro */
2643 0, /* tp_as_buffer */
Eli Bendersky48d358b2012-05-30 17:57:50 +03002644 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
2645 /* tp_flags */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002646 0, /* tp_doc */
Eli Bendersky48d358b2012-05-30 17:57:50 +03002647 (traverseproc)treebuilder_gc_traverse, /* tp_traverse */
2648 (inquiry)treebuilder_gc_clear, /* tp_clear */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002649 0, /* tp_richcompare */
2650 0, /* tp_weaklistoffset */
2651 0, /* tp_iter */
2652 0, /* tp_iternext */
2653 treebuilder_methods, /* tp_methods */
2654 0, /* tp_members */
2655 0, /* tp_getset */
2656 0, /* tp_base */
2657 0, /* tp_dict */
2658 0, /* tp_descr_get */
2659 0, /* tp_descr_set */
2660 0, /* tp_dictoffset */
2661 (initproc)treebuilder_init, /* tp_init */
2662 PyType_GenericAlloc, /* tp_alloc */
2663 treebuilder_new, /* tp_new */
2664 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002665};
2666
2667/* ==================================================================== */
2668/* the expat interface */
2669
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002670#include "expat.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002671#include "pyexpat.h"
Eli Bendersky20d41742012-06-01 09:48:37 +03002672static struct PyExpat_CAPI *expat_capi;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002673#define EXPAT(func) (expat_capi->func)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002674
Eli Bendersky52467b12012-06-01 07:13:08 +03002675static XML_Memory_Handling_Suite ExpatMemoryHandler = {
2676 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
2677
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002678typedef struct {
2679 PyObject_HEAD
2680
2681 XML_Parser parser;
2682
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002683 PyObject *target;
2684 PyObject *entity;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002685
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002686 PyObject *names;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002687
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002688 PyObject *handle_start;
2689 PyObject *handle_data;
2690 PyObject *handle_end;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002691
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002692 PyObject *handle_comment;
2693 PyObject *handle_pi;
2694 PyObject *handle_doctype;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002695
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002696 PyObject *handle_close;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002697
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002698} XMLParserObject;
2699
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002700#define XMLParser_CheckExact(op) (Py_TYPE(op) == &XMLParser_Type)
2701
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002702/* helpers */
2703
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002704LOCAL(PyObject*)
2705makeuniversal(XMLParserObject* self, const char* string)
2706{
2707 /* convert a UTF-8 tag/attribute name from the expat parser
2708 to a universal name string */
2709
Antoine Pitrouc1948842012-10-01 23:40:37 +02002710 Py_ssize_t size = (Py_ssize_t) strlen(string);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002711 PyObject* key;
2712 PyObject* value;
2713
2714 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00002715 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002716 if (!key)
2717 return NULL;
2718
2719 value = PyDict_GetItem(self->names, key);
2720
2721 if (value) {
2722 Py_INCREF(value);
2723 } else {
2724 /* new name. convert to universal name, and decode as
2725 necessary */
2726
2727 PyObject* tag;
2728 char* p;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002729 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002730
2731 /* look for namespace separator */
2732 for (i = 0; i < size; i++)
2733 if (string[i] == '}')
2734 break;
2735 if (i != size) {
2736 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002737 tag = PyBytes_FromStringAndSize(NULL, size+1);
Victor Stinner71c8b7e2013-07-11 23:08:39 +02002738 if (tag == NULL) {
2739 Py_DECREF(key);
2740 return NULL;
2741 }
Christian Heimes72b710a2008-05-26 13:28:38 +00002742 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002743 p[0] = '{';
2744 memcpy(p+1, string, size);
2745 size++;
2746 } else {
2747 /* plain name; use key as tag */
2748 Py_INCREF(key);
2749 tag = key;
2750 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002751
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002752 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002753 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00002754 value = PyUnicode_DecodeUTF8(p, size, "strict");
2755 Py_DECREF(tag);
2756 if (!value) {
2757 Py_DECREF(key);
2758 return NULL;
2759 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002760
2761 /* add to names dictionary */
2762 if (PyDict_SetItem(self->names, key, value) < 0) {
2763 Py_DECREF(key);
2764 Py_DECREF(value);
2765 return NULL;
2766 }
2767 }
2768
2769 Py_DECREF(key);
2770 return value;
2771}
2772
Eli Bendersky5b77d812012-03-16 08:20:05 +02002773/* Set the ParseError exception with the given parameters.
2774 * If message is not NULL, it's used as the error string. Otherwise, the
2775 * message string is the default for the given error_code.
2776*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002777static void
Eli Bendersky5b77d812012-03-16 08:20:05 +02002778expat_set_error(enum XML_Error error_code, int line, int column, char *message)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002779{
Eli Bendersky5b77d812012-03-16 08:20:05 +02002780 PyObject *errmsg, *error, *position, *code;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002781
Victor Stinner499dfcf2011-03-21 13:26:24 +01002782 errmsg = PyUnicode_FromFormat("%s: line %d, column %d",
Eli Bendersky5b77d812012-03-16 08:20:05 +02002783 message ? message : EXPAT(ErrorString)(error_code),
2784 line, column);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002785 if (errmsg == NULL)
2786 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002787
Victor Stinner499dfcf2011-03-21 13:26:24 +01002788 error = PyObject_CallFunction(elementtree_parseerror_obj, "O", errmsg);
2789 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002790 if (!error)
2791 return;
2792
Eli Bendersky5b77d812012-03-16 08:20:05 +02002793 /* Add code and position attributes */
2794 code = PyLong_FromLong((long)error_code);
2795 if (!code) {
2796 Py_DECREF(error);
2797 return;
2798 }
2799 if (PyObject_SetAttrString(error, "code", code) == -1) {
2800 Py_DECREF(error);
2801 Py_DECREF(code);
2802 return;
2803 }
2804 Py_DECREF(code);
2805
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002806 position = Py_BuildValue("(ii)", line, column);
2807 if (!position) {
2808 Py_DECREF(error);
2809 return;
2810 }
2811 if (PyObject_SetAttrString(error, "position", position) == -1) {
2812 Py_DECREF(error);
2813 Py_DECREF(position);
2814 return;
2815 }
2816 Py_DECREF(position);
2817
2818 PyErr_SetObject(elementtree_parseerror_obj, error);
2819 Py_DECREF(error);
2820}
2821
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002822/* -------------------------------------------------------------------- */
2823/* handlers */
2824
2825static void
2826expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2827 int data_len)
2828{
2829 PyObject* key;
2830 PyObject* value;
2831 PyObject* res;
2832
2833 if (data_len < 2 || data_in[0] != '&')
2834 return;
2835
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002836 if (PyErr_Occurred())
2837 return;
2838
Neal Norwitz0269b912007-08-08 06:56:02 +00002839 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002840 if (!key)
2841 return;
2842
2843 value = PyDict_GetItem(self->entity, key);
2844
2845 if (value) {
2846 if (TreeBuilder_CheckExact(self->target))
2847 res = treebuilder_handle_data(
2848 (TreeBuilderObject*) self->target, value
2849 );
2850 else if (self->handle_data)
2851 res = PyObject_CallFunction(self->handle_data, "O", value);
2852 else
2853 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002854 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002855 } else if (!PyErr_Occurred()) {
2856 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00002857 char message[128] = "undefined entity ";
2858 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002859 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02002860 XML_ERROR_UNDEFINED_ENTITY,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002861 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02002862 EXPAT(GetErrorColumnNumber)(self->parser),
2863 message
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002864 );
2865 }
2866
2867 Py_DECREF(key);
2868}
2869
2870static void
2871expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2872 const XML_Char **attrib_in)
2873{
2874 PyObject* res;
2875 PyObject* tag;
2876 PyObject* attrib;
2877 int ok;
2878
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002879 if (PyErr_Occurred())
2880 return;
2881
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002882 /* tag name */
2883 tag = makeuniversal(self, tag_in);
2884 if (!tag)
2885 return; /* parser will look for errors */
2886
2887 /* attributes */
2888 if (attrib_in[0]) {
2889 attrib = PyDict_New();
2890 if (!attrib)
2891 return;
2892 while (attrib_in[0] && attrib_in[1]) {
2893 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00002894 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002895 if (!key || !value) {
2896 Py_XDECREF(value);
2897 Py_XDECREF(key);
2898 Py_DECREF(attrib);
2899 return;
2900 }
2901 ok = PyDict_SetItem(attrib, key, value);
2902 Py_DECREF(value);
2903 Py_DECREF(key);
2904 if (ok < 0) {
2905 Py_DECREF(attrib);
2906 return;
2907 }
2908 attrib_in += 2;
2909 }
2910 } else {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002911 /* Pass an empty dictionary on */
Eli Bendersky48d358b2012-05-30 17:57:50 +03002912 attrib = PyDict_New();
2913 if (!attrib)
2914 return;
2915 }
2916
2917 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002918 /* shortcut */
2919 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2920 tag, attrib);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002921 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002922 else if (self->handle_start) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002923 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002924 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002925 res = NULL;
2926
2927 Py_DECREF(tag);
2928 Py_DECREF(attrib);
2929
2930 Py_XDECREF(res);
2931}
2932
2933static void
2934expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2935 int data_len)
2936{
2937 PyObject* data;
2938 PyObject* res;
2939
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002940 if (PyErr_Occurred())
2941 return;
2942
Neal Norwitz0269b912007-08-08 06:56:02 +00002943 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002944 if (!data)
2945 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002946
2947 if (TreeBuilder_CheckExact(self->target))
2948 /* shortcut */
2949 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
2950 else if (self->handle_data)
2951 res = PyObject_CallFunction(self->handle_data, "O", data);
2952 else
2953 res = NULL;
2954
2955 Py_DECREF(data);
2956
2957 Py_XDECREF(res);
2958}
2959
2960static void
2961expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
2962{
2963 PyObject* tag;
2964 PyObject* res = NULL;
2965
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002966 if (PyErr_Occurred())
2967 return;
2968
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002969 if (TreeBuilder_CheckExact(self->target))
2970 /* shortcut */
2971 /* the standard tree builder doesn't look at the end tag */
2972 res = treebuilder_handle_end(
2973 (TreeBuilderObject*) self->target, Py_None
2974 );
2975 else if (self->handle_end) {
2976 tag = makeuniversal(self, tag_in);
2977 if (tag) {
2978 res = PyObject_CallFunction(self->handle_end, "O", tag);
2979 Py_DECREF(tag);
2980 }
2981 }
2982
2983 Py_XDECREF(res);
2984}
2985
2986static void
2987expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
2988 const XML_Char *uri)
2989{
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002990 PyObject* sprefix = NULL;
2991 PyObject* suri = NULL;
2992
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002993 if (PyErr_Occurred())
2994 return;
2995
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002996 suri = PyUnicode_DecodeUTF8(uri, strlen(uri), "strict");
2997 if (!suri)
2998 return;
2999
3000 if (prefix)
3001 sprefix = PyUnicode_DecodeUTF8(prefix, strlen(prefix), "strict");
3002 else
3003 sprefix = PyUnicode_FromString("");
3004 if (!sprefix) {
3005 Py_DECREF(suri);
3006 return;
3007 }
3008
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003009 treebuilder_handle_namespace(
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003010 (TreeBuilderObject*) self->target, 1, sprefix, suri
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003011 );
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003012
3013 Py_DECREF(sprefix);
3014 Py_DECREF(suri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003015}
3016
3017static void
3018expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
3019{
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003020 if (PyErr_Occurred())
3021 return;
3022
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003023 treebuilder_handle_namespace(
3024 (TreeBuilderObject*) self->target, 0, NULL, NULL
3025 );
3026}
3027
3028static void
3029expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
3030{
3031 PyObject* comment;
3032 PyObject* res;
3033
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003034 if (PyErr_Occurred())
3035 return;
3036
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003037 if (self->handle_comment) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003038 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003039 if (comment) {
3040 res = PyObject_CallFunction(self->handle_comment, "O", comment);
3041 Py_XDECREF(res);
3042 Py_DECREF(comment);
3043 }
3044 }
3045}
3046
Eli Bendersky45839902013-01-13 05:14:47 -08003047static void
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003048expat_start_doctype_handler(XMLParserObject *self,
3049 const XML_Char *doctype_name,
3050 const XML_Char *sysid,
3051 const XML_Char *pubid,
3052 int has_internal_subset)
3053{
3054 PyObject *self_pyobj = (PyObject *)self;
3055 PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
3056 PyObject *parser_doctype = NULL;
3057 PyObject *res = NULL;
3058
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003059 if (PyErr_Occurred())
3060 return;
3061
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003062 doctype_name_obj = makeuniversal(self, doctype_name);
3063 if (!doctype_name_obj)
3064 return;
3065
3066 if (sysid) {
3067 sysid_obj = makeuniversal(self, sysid);
3068 if (!sysid_obj) {
3069 Py_DECREF(doctype_name_obj);
3070 return;
3071 }
3072 } else {
3073 Py_INCREF(Py_None);
3074 sysid_obj = Py_None;
3075 }
3076
3077 if (pubid) {
3078 pubid_obj = makeuniversal(self, pubid);
3079 if (!pubid_obj) {
3080 Py_DECREF(doctype_name_obj);
3081 Py_DECREF(sysid_obj);
3082 return;
3083 }
3084 } else {
3085 Py_INCREF(Py_None);
3086 pubid_obj = Py_None;
3087 }
3088
3089 /* If the target has a handler for doctype, call it. */
3090 if (self->handle_doctype) {
3091 res = PyObject_CallFunction(self->handle_doctype, "OOO",
3092 doctype_name_obj, pubid_obj, sysid_obj);
3093 Py_CLEAR(res);
3094 }
3095
3096 /* Now see if the parser itself has a doctype method. If yes and it's
3097 * a subclass, call it but warn about deprecation. If it's not a subclass
3098 * (i.e. vanilla XMLParser), do nothing.
3099 */
3100 parser_doctype = PyObject_GetAttrString(self_pyobj, "doctype");
3101 if (parser_doctype) {
3102 if (!XMLParser_CheckExact(self_pyobj)) {
3103 if (PyErr_WarnEx(PyExc_DeprecationWarning,
3104 "This method of XMLParser is deprecated. Define"
3105 " doctype() method on the TreeBuilder target.",
3106 1) < 0) {
3107 goto clear;
3108 }
3109 res = PyObject_CallFunction(parser_doctype, "OOO",
3110 doctype_name_obj, pubid_obj, sysid_obj);
3111 Py_CLEAR(res);
3112 }
3113 }
3114
3115clear:
3116 Py_XDECREF(parser_doctype);
3117 Py_DECREF(doctype_name_obj);
3118 Py_DECREF(pubid_obj);
3119 Py_DECREF(sysid_obj);
3120}
3121
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003122static void
3123expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
3124 const XML_Char* data_in)
3125{
3126 PyObject* target;
3127 PyObject* data;
3128 PyObject* res;
3129
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003130 if (PyErr_Occurred())
3131 return;
3132
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003133 if (self->handle_pi) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003134 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3135 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003136 if (target && data) {
3137 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
3138 Py_XDECREF(res);
3139 Py_DECREF(data);
3140 Py_DECREF(target);
3141 } else {
3142 Py_XDECREF(data);
3143 Py_XDECREF(target);
3144 }
3145 }
3146}
3147
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003148/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003149
Eli Bendersky52467b12012-06-01 07:13:08 +03003150static PyObject *
3151xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003152{
Eli Bendersky52467b12012-06-01 07:13:08 +03003153 XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
3154 if (self) {
3155 self->parser = NULL;
3156 self->target = self->entity = self->names = NULL;
3157 self->handle_start = self->handle_data = self->handle_end = NULL;
3158 self->handle_comment = self->handle_pi = self->handle_close = NULL;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003159 self->handle_doctype = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003160 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003161 return (PyObject *)self;
3162}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003163
Eli Bendersky52467b12012-06-01 07:13:08 +03003164static int
3165xmlparser_init(PyObject *self, PyObject *args, PyObject *kwds)
3166{
3167 XMLParserObject *self_xp = (XMLParserObject *)self;
3168 PyObject *target = NULL, *html = NULL;
3169 char *encoding = NULL;
Eli Benderskyc68e1362012-06-03 06:09:42 +03003170 static char *kwlist[] = {"html", "target", "encoding", 0};
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003171
Eli Bendersky52467b12012-06-01 07:13:08 +03003172 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|OOz:XMLParser", kwlist,
3173 &html, &target, &encoding)) {
3174 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003175 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003176
Eli Bendersky52467b12012-06-01 07:13:08 +03003177 self_xp->entity = PyDict_New();
3178 if (!self_xp->entity)
3179 return -1;
3180
3181 self_xp->names = PyDict_New();
3182 if (!self_xp->names) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02003183 Py_CLEAR(self_xp->entity);
Eli Bendersky52467b12012-06-01 07:13:08 +03003184 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003185 }
3186
Eli Bendersky52467b12012-06-01 07:13:08 +03003187 self_xp->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
3188 if (!self_xp->parser) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02003189 Py_CLEAR(self_xp->entity);
3190 Py_CLEAR(self_xp->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003191 PyErr_NoMemory();
Eli Bendersky52467b12012-06-01 07:13:08 +03003192 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003193 }
3194
Eli Bendersky52467b12012-06-01 07:13:08 +03003195 if (target) {
3196 Py_INCREF(target);
3197 } else {
Eli Bendersky58d548d2012-05-29 15:45:16 +03003198 target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003199 if (!target) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02003200 Py_CLEAR(self_xp->entity);
3201 Py_CLEAR(self_xp->names);
Eli Bendersky52467b12012-06-01 07:13:08 +03003202 EXPAT(ParserFree)(self_xp->parser);
3203 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003204 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003205 }
3206 self_xp->target = target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003207
Eli Bendersky52467b12012-06-01 07:13:08 +03003208 self_xp->handle_start = PyObject_GetAttrString(target, "start");
3209 self_xp->handle_data = PyObject_GetAttrString(target, "data");
3210 self_xp->handle_end = PyObject_GetAttrString(target, "end");
3211 self_xp->handle_comment = PyObject_GetAttrString(target, "comment");
3212 self_xp->handle_pi = PyObject_GetAttrString(target, "pi");
3213 self_xp->handle_close = PyObject_GetAttrString(target, "close");
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003214 self_xp->handle_doctype = PyObject_GetAttrString(target, "doctype");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003215
3216 PyErr_Clear();
Eli Bendersky45839902013-01-13 05:14:47 -08003217
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003218 /* configure parser */
Eli Bendersky52467b12012-06-01 07:13:08 +03003219 EXPAT(SetUserData)(self_xp->parser, self_xp);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003220 EXPAT(SetElementHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003221 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003222 (XML_StartElementHandler) expat_start_handler,
3223 (XML_EndElementHandler) expat_end_handler
3224 );
3225 EXPAT(SetDefaultHandlerExpand)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003226 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003227 (XML_DefaultHandler) expat_default_handler
3228 );
3229 EXPAT(SetCharacterDataHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003230 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003231 (XML_CharacterDataHandler) expat_data_handler
3232 );
Eli Bendersky52467b12012-06-01 07:13:08 +03003233 if (self_xp->handle_comment)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003234 EXPAT(SetCommentHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003235 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003236 (XML_CommentHandler) expat_comment_handler
3237 );
Eli Bendersky52467b12012-06-01 07:13:08 +03003238 if (self_xp->handle_pi)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003239 EXPAT(SetProcessingInstructionHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003240 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003241 (XML_ProcessingInstructionHandler) expat_pi_handler
3242 );
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003243 EXPAT(SetStartDoctypeDeclHandler)(
3244 self_xp->parser,
3245 (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
3246 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003247 EXPAT(SetUnknownEncodingHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003248 self_xp->parser,
Eli Bendersky6dc32b32013-05-25 05:25:48 -07003249 EXPAT(DefaultUnknownEncodingHandler), NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003250 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003251
Eli Bendersky52467b12012-06-01 07:13:08 +03003252 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003253}
3254
Eli Bendersky52467b12012-06-01 07:13:08 +03003255static int
3256xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
3257{
3258 Py_VISIT(self->handle_close);
3259 Py_VISIT(self->handle_pi);
3260 Py_VISIT(self->handle_comment);
3261 Py_VISIT(self->handle_end);
3262 Py_VISIT(self->handle_data);
3263 Py_VISIT(self->handle_start);
3264
3265 Py_VISIT(self->target);
3266 Py_VISIT(self->entity);
3267 Py_VISIT(self->names);
3268
3269 return 0;
3270}
3271
3272static int
3273xmlparser_gc_clear(XMLParserObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003274{
3275 EXPAT(ParserFree)(self->parser);
3276
Antoine Pitrouc1948842012-10-01 23:40:37 +02003277 Py_CLEAR(self->handle_close);
3278 Py_CLEAR(self->handle_pi);
3279 Py_CLEAR(self->handle_comment);
3280 Py_CLEAR(self->handle_end);
3281 Py_CLEAR(self->handle_data);
3282 Py_CLEAR(self->handle_start);
3283 Py_CLEAR(self->handle_doctype);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003284
Antoine Pitrouc1948842012-10-01 23:40:37 +02003285 Py_CLEAR(self->target);
3286 Py_CLEAR(self->entity);
3287 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003288
Eli Bendersky52467b12012-06-01 07:13:08 +03003289 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003290}
3291
Eli Bendersky52467b12012-06-01 07:13:08 +03003292static void
3293xmlparser_dealloc(XMLParserObject* self)
3294{
3295 PyObject_GC_UnTrack(self);
3296 xmlparser_gc_clear(self);
3297 Py_TYPE(self)->tp_free((PyObject *)self);
3298}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003299
3300LOCAL(PyObject*)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003301expat_parse(XMLParserObject* self, const char* data, int data_len, int final)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003302{
3303 int ok;
3304
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003305 assert(!PyErr_Occurred());
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003306 ok = EXPAT(Parse)(self->parser, data, data_len, final);
3307
3308 if (PyErr_Occurred())
3309 return NULL;
3310
3311 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003312 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02003313 EXPAT(GetErrorCode)(self->parser),
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003314 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02003315 EXPAT(GetErrorColumnNumber)(self->parser),
3316 NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003317 );
3318 return NULL;
3319 }
3320
3321 Py_RETURN_NONE;
3322}
3323
3324static PyObject*
3325xmlparser_close(XMLParserObject* self, PyObject* args)
3326{
3327 /* end feeding data to parser */
3328
3329 PyObject* res;
3330 if (!PyArg_ParseTuple(args, ":close"))
3331 return NULL;
3332
3333 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003334 if (!res)
3335 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003336
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003337 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003338 Py_DECREF(res);
3339 return treebuilder_done((TreeBuilderObject*) self->target);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003340 } if (self->handle_close) {
3341 Py_DECREF(res);
3342 return PyObject_CallFunction(self->handle_close, "");
3343 } else
3344 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003345}
3346
3347static PyObject*
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003348xmlparser_feed(XMLParserObject* self, PyObject* arg)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003349{
3350 /* feed data to parser */
3351
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003352 if (PyUnicode_Check(arg)) {
3353 Py_ssize_t data_len;
3354 const char *data = PyUnicode_AsUTF8AndSize(arg, &data_len);
3355 if (data == NULL)
3356 return NULL;
3357 if (data_len > INT_MAX) {
3358 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3359 return NULL;
3360 }
3361 /* Explicitly set UTF-8 encoding. Return code ignored. */
3362 (void)EXPAT(SetEncoding)(self->parser, "utf-8");
3363 return expat_parse(self, data, (int)data_len, 0);
3364 }
3365 else {
3366 Py_buffer view;
3367 PyObject *res;
3368 if (PyObject_GetBuffer(arg, &view, PyBUF_SIMPLE) < 0)
3369 return NULL;
3370 if (view.len > INT_MAX) {
3371 PyBuffer_Release(&view);
3372 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3373 return NULL;
3374 }
3375 res = expat_parse(self, view.buf, (int)view.len, 0);
3376 PyBuffer_Release(&view);
3377 return res;
3378 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003379}
3380
3381static PyObject*
Eli Benderskya3699232013-05-19 18:47:23 -07003382xmlparser_parse_whole(XMLParserObject* self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003383{
Eli Benderskya3699232013-05-19 18:47:23 -07003384 /* (internal) parse the whole input, until end of stream */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003385 PyObject* reader;
3386 PyObject* buffer;
Eli Benderskyf996e772012-03-16 05:53:30 +02003387 PyObject* temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003388 PyObject* res;
3389
3390 PyObject* fileobj;
3391 if (!PyArg_ParseTuple(args, "O:_parse", &fileobj))
3392 return NULL;
3393
3394 reader = PyObject_GetAttrString(fileobj, "read");
3395 if (!reader)
3396 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003397
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003398 /* read from open file object */
3399 for (;;) {
3400
3401 buffer = PyObject_CallFunction(reader, "i", 64*1024);
3402
3403 if (!buffer) {
3404 /* read failed (e.g. due to KeyboardInterrupt) */
3405 Py_DECREF(reader);
3406 return NULL;
3407 }
3408
Eli Benderskyf996e772012-03-16 05:53:30 +02003409 if (PyUnicode_CheckExact(buffer)) {
3410 /* A unicode object is encoded into bytes using UTF-8 */
3411 if (PyUnicode_GET_SIZE(buffer) == 0) {
3412 Py_DECREF(buffer);
3413 break;
3414 }
3415 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
Antoine Pitrouc1948842012-10-01 23:40:37 +02003416 Py_DECREF(buffer);
Eli Benderskyf996e772012-03-16 05:53:30 +02003417 if (!temp) {
3418 /* Propagate exception from PyUnicode_AsEncodedString */
Eli Benderskyf996e772012-03-16 05:53:30 +02003419 Py_DECREF(reader);
3420 return NULL;
3421 }
Eli Benderskyf996e772012-03-16 05:53:30 +02003422 buffer = temp;
3423 }
3424 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003425 Py_DECREF(buffer);
3426 break;
3427 }
3428
3429 res = expat_parse(
Christian Heimes72b710a2008-05-26 13:28:38 +00003430 self, PyBytes_AS_STRING(buffer), PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003431 );
3432
3433 Py_DECREF(buffer);
3434
3435 if (!res) {
3436 Py_DECREF(reader);
3437 return NULL;
3438 }
3439 Py_DECREF(res);
3440
3441 }
3442
3443 Py_DECREF(reader);
3444
3445 res = expat_parse(self, "", 0, 1);
3446
3447 if (res && TreeBuilder_CheckExact(self->target)) {
3448 Py_DECREF(res);
3449 return treebuilder_done((TreeBuilderObject*) self->target);
3450 }
3451
3452 return res;
3453}
3454
3455static PyObject*
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003456xmlparser_doctype(XMLParserObject *self, PyObject *args)
3457{
3458 Py_RETURN_NONE;
3459}
3460
3461static PyObject*
3462xmlparser_setevents(XMLParserObject *self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003463{
3464 /* activate element event reporting */
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003465 Py_ssize_t i, seqlen;
3466 TreeBuilderObject *target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003467
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003468 PyObject *events_queue;
3469 PyObject *events_to_report = Py_None;
3470 PyObject *events_seq;
3471 if (!PyArg_ParseTuple(args, "O!|O:_setevents", &PyList_Type, &events_queue,
3472 &events_to_report))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003473 return NULL;
3474
3475 if (!TreeBuilder_CheckExact(self->target)) {
3476 PyErr_SetString(
3477 PyExc_TypeError,
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003478 "event handling only supported for ElementTree.TreeBuilder "
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003479 "targets"
3480 );
3481 return NULL;
3482 }
3483
3484 target = (TreeBuilderObject*) self->target;
3485
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003486 Py_INCREF(events_queue);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003487 Py_XDECREF(target->events);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003488 target->events = events_queue;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003489
3490 /* clear out existing events */
Antoine Pitrouc1948842012-10-01 23:40:37 +02003491 Py_CLEAR(target->start_event_obj);
3492 Py_CLEAR(target->end_event_obj);
3493 Py_CLEAR(target->start_ns_event_obj);
3494 Py_CLEAR(target->end_ns_event_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003495
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003496 if (events_to_report == Py_None) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003497 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003498 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003499 Py_RETURN_NONE;
3500 }
3501
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003502 if (!(events_seq = PySequence_Fast(events_to_report,
3503 "events must be a sequence"))) {
3504 return NULL;
3505 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003506
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003507 seqlen = PySequence_Size(events_seq);
3508 for (i = 0; i < seqlen; ++i) {
3509 PyObject *event_name_obj = PySequence_Fast_GET_ITEM(events_seq, i);
3510 char *event_name = NULL;
3511 if (PyUnicode_Check(event_name_obj)) {
3512 event_name = _PyUnicode_AsString(event_name_obj);
3513 } else if (PyBytes_Check(event_name_obj)) {
3514 event_name = PyBytes_AS_STRING(event_name_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003515 }
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003516
3517 if (event_name == NULL) {
3518 Py_DECREF(events_seq);
3519 PyErr_Format(PyExc_ValueError, "invalid events sequence");
3520 return NULL;
3521 } else if (strcmp(event_name, "start") == 0) {
3522 Py_INCREF(event_name_obj);
3523 target->start_event_obj = event_name_obj;
3524 } else if (strcmp(event_name, "end") == 0) {
3525 Py_INCREF(event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003526 Py_XDECREF(target->end_event_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003527 target->end_event_obj = event_name_obj;
3528 } else if (strcmp(event_name, "start-ns") == 0) {
3529 Py_INCREF(event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003530 Py_XDECREF(target->start_ns_event_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003531 target->start_ns_event_obj = event_name_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003532 EXPAT(SetNamespaceDeclHandler)(
3533 self->parser,
3534 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3535 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3536 );
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003537 } else if (strcmp(event_name, "end-ns") == 0) {
3538 Py_INCREF(event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003539 Py_XDECREF(target->end_ns_event_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003540 target->end_ns_event_obj = event_name_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003541 EXPAT(SetNamespaceDeclHandler)(
3542 self->parser,
3543 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3544 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3545 );
3546 } else {
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003547 Py_DECREF(events_seq);
3548 PyErr_Format(PyExc_ValueError, "unknown event '%s'", event_name);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003549 return NULL;
3550 }
3551 }
3552
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003553 Py_DECREF(events_seq);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003554 Py_RETURN_NONE;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003555}
3556
3557static PyMethodDef xmlparser_methods[] = {
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003558 {"feed", (PyCFunction) xmlparser_feed, METH_O},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003559 {"close", (PyCFunction) xmlparser_close, METH_VARARGS},
Eli Benderskya3699232013-05-19 18:47:23 -07003560 {"_parse_whole", (PyCFunction) xmlparser_parse_whole, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003561 {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003562 {"doctype", (PyCFunction) xmlparser_doctype, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003563 {NULL, NULL}
3564};
3565
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003566static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003567xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003568{
Alexander Belopolskye239d232010-12-08 23:31:48 +00003569 if (PyUnicode_Check(nameobj)) {
3570 PyObject* res;
3571 if (PyUnicode_CompareWithASCIIString(nameobj, "entity") == 0)
3572 res = self->entity;
3573 else if (PyUnicode_CompareWithASCIIString(nameobj, "target") == 0)
3574 res = self->target;
3575 else if (PyUnicode_CompareWithASCIIString(nameobj, "version") == 0) {
3576 return PyUnicode_FromFormat(
3577 "Expat %d.%d.%d", XML_MAJOR_VERSION,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003578 XML_MINOR_VERSION, XML_MICRO_VERSION);
Alexander Belopolskye239d232010-12-08 23:31:48 +00003579 }
3580 else
3581 goto generic;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003582
Alexander Belopolskye239d232010-12-08 23:31:48 +00003583 Py_INCREF(res);
3584 return res;
3585 }
3586 generic:
3587 return PyObject_GenericGetAttr((PyObject*) self, nameobj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003588}
3589
Neal Norwitz227b5332006-03-22 09:28:35 +00003590static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003591 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08003592 "xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003593 /* methods */
Eli Bendersky52467b12012-06-01 07:13:08 +03003594 (destructor)xmlparser_dealloc, /* tp_dealloc */
3595 0, /* tp_print */
3596 0, /* tp_getattr */
3597 0, /* tp_setattr */
3598 0, /* tp_reserved */
3599 0, /* tp_repr */
3600 0, /* tp_as_number */
3601 0, /* tp_as_sequence */
3602 0, /* tp_as_mapping */
3603 0, /* tp_hash */
3604 0, /* tp_call */
3605 0, /* tp_str */
3606 (getattrofunc)xmlparser_getattro, /* tp_getattro */
3607 0, /* tp_setattro */
3608 0, /* tp_as_buffer */
3609 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3610 /* tp_flags */
3611 0, /* tp_doc */
3612 (traverseproc)xmlparser_gc_traverse, /* tp_traverse */
3613 (inquiry)xmlparser_gc_clear, /* tp_clear */
3614 0, /* tp_richcompare */
3615 0, /* tp_weaklistoffset */
3616 0, /* tp_iter */
3617 0, /* tp_iternext */
3618 xmlparser_methods, /* tp_methods */
3619 0, /* tp_members */
3620 0, /* tp_getset */
3621 0, /* tp_base */
3622 0, /* tp_dict */
3623 0, /* tp_descr_get */
3624 0, /* tp_descr_set */
3625 0, /* tp_dictoffset */
3626 (initproc)xmlparser_init, /* tp_init */
3627 PyType_GenericAlloc, /* tp_alloc */
3628 xmlparser_new, /* tp_new */
3629 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003630};
3631
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003632/* ==================================================================== */
3633/* python module interface */
3634
3635static PyMethodDef _functions[] = {
Eli Benderskya8736902013-01-05 06:26:39 -08003636 {"SubElement", (PyCFunction) subelement, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003637 {NULL, NULL}
3638};
3639
Martin v. Löwis1a214512008-06-11 05:26:20 +00003640
3641static struct PyModuleDef _elementtreemodule = {
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003642 PyModuleDef_HEAD_INIT,
3643 "_elementtree",
3644 NULL,
3645 -1,
3646 _functions,
3647 NULL,
3648 NULL,
3649 NULL,
3650 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00003651};
3652
Neal Norwitzf6657e62006-12-28 04:47:50 +00003653PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00003654PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003655{
Eli Bendersky64d11e62012-06-15 07:42:50 +03003656 PyObject *m, *temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003657
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003658 /* Initialize object types */
Ronald Oussoren138d0802013-07-19 11:11:25 +02003659 if (PyType_Ready(&ElementIter_Type) < 0)
3660 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003661 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003662 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003663 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003664 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003665 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003666 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003667
Martin v. Löwis1a214512008-06-11 05:26:20 +00003668 m = PyModule_Create(&_elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003669 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00003670 return NULL;
3671
Eli Bendersky828efde2012-04-05 05:40:58 +03003672 if (!(temp = PyImport_ImportModule("copy")))
3673 return NULL;
3674 elementtree_deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
3675 Py_XDECREF(temp);
3676
3677 if (!(elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
3678 return NULL;
3679
Eli Bendersky20d41742012-06-01 09:48:37 +03003680 /* link against pyexpat */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003681 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
3682 if (expat_capi) {
3683 /* check that it's usable */
3684 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
3685 expat_capi->size < sizeof(struct PyExpat_CAPI) ||
3686 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
3687 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
Eli Bendersky52467b12012-06-01 07:13:08 +03003688 expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
Eli Benderskyef391ac2012-07-21 20:28:46 +03003689 PyErr_SetString(PyExc_ImportError,
3690 "pyexpat version is incompatible");
3691 return NULL;
Eli Bendersky52467b12012-06-01 07:13:08 +03003692 }
Eli Benderskyef391ac2012-07-21 20:28:46 +03003693 } else {
Eli Bendersky52467b12012-06-01 07:13:08 +03003694 return NULL;
Eli Benderskyef391ac2012-07-21 20:28:46 +03003695 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003696
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003697 elementtree_parseerror_obj = PyErr_NewException(
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003698 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003699 );
3700 Py_INCREF(elementtree_parseerror_obj);
3701 PyModule_AddObject(m, "ParseError", elementtree_parseerror_obj);
3702
Eli Bendersky092af1f2012-03-04 07:14:03 +02003703 Py_INCREF((PyObject *)&Element_Type);
3704 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
3705
Eli Bendersky58d548d2012-05-29 15:45:16 +03003706 Py_INCREF((PyObject *)&TreeBuilder_Type);
3707 PyModule_AddObject(m, "TreeBuilder", (PyObject *)&TreeBuilder_Type);
3708
Eli Bendersky52467b12012-06-01 07:13:08 +03003709 Py_INCREF((PyObject *)&XMLParser_Type);
3710 PyModule_AddObject(m, "XMLParser", (PyObject *)&XMLParser_Type);
Eli Bendersky52467b12012-06-01 07:13:08 +03003711
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003712 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003713}