blob: 6cd2d3ad6f27de9f1193892c4eb2113eb9c464db [file] [log] [blame]
Eli Benderskybf05df22013-04-20 05:44:01 -07001/*--------------------------------------------------------------------
2 * Licensed to PSF under a Contributor Agreement.
3 * See http://www.python.org/psf/license for licensing details.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
Eli Benderskybf05df22013-04-20 05:44:01 -07005 * _elementtree - C accelerator for xml.etree.ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00006 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
7 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00008 *
9 * info@pythonware.com
10 * http://www.pythonware.com
Eli Benderskybf05df22013-04-20 05:44:01 -070011 *--------------------------------------------------------------------
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000012 */
13
14#include "Python.h"
Eli Benderskyebf37a22012-04-03 22:02:37 +030015#include "structmember.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000016
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000017/* -------------------------------------------------------------------- */
18/* configuration */
19
20/* Leave defined to include the expat-based XMLParser type */
21#define USE_EXPAT
22
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000023/* An element can hold this many children without extra memory
24 allocations. */
25#define STATIC_CHILDREN 4
26
27/* For best performance, chose a value so that 80-90% of all nodes
28 have no more than the given number of children. Set this to zero
29 to minimize the size of the element structure itself (this only
30 helps if you have lots of leaf nodes with attributes). */
31
32/* Also note that pymalloc always allocates blocks in multiples of
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010033 eight bytes. For the current C version of ElementTree, this means
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000034 that the number of children should be an even number, at least on
35 32-bit platforms. */
36
37/* -------------------------------------------------------------------- */
38
39#if 0
40static int memory = 0;
41#define ALLOC(size, comment)\
42do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
43#define RELEASE(size, comment)\
44do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
45#else
46#define ALLOC(size, comment)
47#define RELEASE(size, comment)
48#endif
49
50/* compiler tweaks */
51#if defined(_MSC_VER)
52#define LOCAL(type) static __inline type __fastcall
53#else
54#define LOCAL(type) static type
55#endif
56
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000057/* macros used to store 'join' flags in string object pointers. note
58 that all use of text and tail as object pointers must be wrapped in
59 JOIN_OBJ. see comments in the ElementObject definition for more
60 info. */
61#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
62#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
Antoine Pitrouca8aa4a2012-09-20 20:56:47 +020063#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~(Py_uintptr_t)1))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000064
65/* glue functions (see the init function for details) */
Florent Xiclunaf15351d2010-03-13 23:24:31 +000066static PyObject* elementtree_parseerror_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000067static PyObject* elementtree_deepcopy_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000068static PyObject* elementpath_obj;
69
70/* helpers */
71
72LOCAL(PyObject*)
73deepcopy(PyObject* object, PyObject* memo)
74{
75 /* do a deep copy of the given object */
76
77 PyObject* args;
78 PyObject* result;
79
80 if (!elementtree_deepcopy_obj) {
81 PyErr_SetString(
82 PyExc_RuntimeError,
83 "deepcopy helper not found"
84 );
85 return NULL;
86 }
87
Antoine Pitrouc1948842012-10-01 23:40:37 +020088 args = PyTuple_Pack(2, object, memo);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +000089 if (!args)
90 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000091 result = PyObject_CallObject(elementtree_deepcopy_obj, args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000092 Py_DECREF(args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000093 return result;
94}
95
96LOCAL(PyObject*)
97list_join(PyObject* list)
98{
99 /* join list elements (destroying the list in the process) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000100 PyObject* joiner;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000101 PyObject* result;
102
Antoine Pitrouc1948842012-10-01 23:40:37 +0200103 joiner = PyUnicode_FromStringAndSize("", 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000104 if (!joiner)
105 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200106 result = PyUnicode_Join(joiner, list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000107 Py_DECREF(joiner);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200108 if (result)
109 Py_DECREF(list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000110 return result;
111}
112
Eli Bendersky48d358b2012-05-30 17:57:50 +0300113/* Is the given object an empty dictionary?
114*/
115static int
116is_empty_dict(PyObject *obj)
117{
118 return PyDict_CheckExact(obj) && PyDict_Size(obj) == 0;
119}
120
121
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000122/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200123/* the Element type */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000124
125typedef struct {
126
127 /* attributes (a dictionary object), or None if no attributes */
128 PyObject* attrib;
129
130 /* child elements */
131 int length; /* actual number of items */
132 int allocated; /* allocated items */
133
134 /* this either points to _children or to a malloced buffer */
135 PyObject* *children;
136
137 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100138
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000139} ElementObjectExtra;
140
141typedef struct {
142 PyObject_HEAD
143
144 /* element tag (a string). */
145 PyObject* tag;
146
147 /* text before first child. note that this is a tagged pointer;
148 use JOIN_OBJ to get the object pointer. the join flag is used
149 to distinguish lists created by the tree builder from lists
150 assigned to the attribute by application code; the former
151 should be joined before being returned to the user, the latter
152 should be left intact. */
153 PyObject* text;
154
155 /* text after this element, in parent. note that this is a tagged
156 pointer; use JOIN_OBJ to get the object pointer. */
157 PyObject* tail;
158
159 ElementObjectExtra* extra;
160
Eli Benderskyebf37a22012-04-03 22:02:37 +0300161 PyObject *weakreflist; /* For tp_weaklistoffset */
162
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000163} ElementObject;
164
Neal Norwitz227b5332006-03-22 09:28:35 +0000165static PyTypeObject Element_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000166
Christian Heimes90aa7642007-12-19 02:45:37 +0000167#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000168
169/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200170/* Element constructors and destructor */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000171
172LOCAL(int)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200173create_extra(ElementObject* self, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000174{
175 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
176 if (!self->extra)
177 return -1;
178
179 if (!attrib)
180 attrib = Py_None;
181
182 Py_INCREF(attrib);
183 self->extra->attrib = attrib;
184
185 self->extra->length = 0;
186 self->extra->allocated = STATIC_CHILDREN;
187 self->extra->children = self->extra->_children;
188
189 return 0;
190}
191
192LOCAL(void)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200193dealloc_extra(ElementObject* self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000194{
Eli Bendersky08b85292012-04-04 15:55:07 +0300195 ElementObjectExtra *myextra;
196 int i;
197
Eli Benderskyebf37a22012-04-03 22:02:37 +0300198 if (!self->extra)
199 return;
200
201 /* Avoid DECREFs calling into this code again (cycles, etc.)
202 */
Eli Bendersky08b85292012-04-04 15:55:07 +0300203 myextra = self->extra;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300204 self->extra = NULL;
205
206 Py_DECREF(myextra->attrib);
207
Eli Benderskyebf37a22012-04-03 22:02:37 +0300208 for (i = 0; i < myextra->length; i++)
209 Py_DECREF(myextra->children[i]);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000210
Eli Benderskyebf37a22012-04-03 22:02:37 +0300211 if (myextra->children != myextra->_children)
212 PyObject_Free(myextra->children);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000213
Eli Benderskyebf37a22012-04-03 22:02:37 +0300214 PyObject_Free(myextra);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000215}
216
Eli Bendersky092af1f2012-03-04 07:14:03 +0200217/* Convenience internal function to create new Element objects with the given
218 * tag and attributes.
219*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000220LOCAL(PyObject*)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200221create_new_element(PyObject* tag, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000222{
223 ElementObject* self;
224
Eli Bendersky0192ba32012-03-30 16:38:33 +0300225 self = PyObject_GC_New(ElementObject, &Element_Type);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000226 if (self == NULL)
227 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000228 self->extra = NULL;
229
Eli Bendersky48d358b2012-05-30 17:57:50 +0300230 if (attrib != Py_None && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200231 if (create_extra(self, attrib) < 0) {
Thomas Wouters477c8d52006-05-27 19:21:47 +0000232 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000233 return NULL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000234 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000235 }
236
237 Py_INCREF(tag);
238 self->tag = tag;
239
240 Py_INCREF(Py_None);
241 self->text = Py_None;
242
243 Py_INCREF(Py_None);
244 self->tail = Py_None;
245
Eli Benderskyebf37a22012-04-03 22:02:37 +0300246 self->weakreflist = NULL;
247
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000248 ALLOC(sizeof(ElementObject), "create element");
Eli Bendersky0192ba32012-03-30 16:38:33 +0300249 PyObject_GC_Track(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000250 return (PyObject*) self;
251}
252
Eli Bendersky092af1f2012-03-04 07:14:03 +0200253static PyObject *
254element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
255{
256 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
257 if (e != NULL) {
258 Py_INCREF(Py_None);
259 e->tag = Py_None;
260
261 Py_INCREF(Py_None);
262 e->text = Py_None;
263
264 Py_INCREF(Py_None);
265 e->tail = Py_None;
266
267 e->extra = NULL;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300268 e->weakreflist = NULL;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200269 }
270 return (PyObject *)e;
271}
272
Eli Bendersky737b1732012-05-29 06:02:56 +0300273/* Helper function for extracting the attrib dictionary from a keywords dict.
274 * This is required by some constructors/functions in this module that can
Eli Bendersky45839902013-01-13 05:14:47 -0800275 * either accept attrib as a keyword argument or all attributes splashed
Eli Bendersky737b1732012-05-29 06:02:56 +0300276 * directly into *kwds.
277 * If there is no 'attrib' keyword, return an empty dict.
278 */
279static PyObject*
280get_attrib_from_keywords(PyObject *kwds)
281{
282 PyObject *attrib_str = PyUnicode_FromString("attrib");
283 PyObject *attrib = PyDict_GetItem(kwds, attrib_str);
284
285 if (attrib) {
286 /* If attrib was found in kwds, copy its value and remove it from
287 * kwds
288 */
289 if (!PyDict_Check(attrib)) {
290 Py_DECREF(attrib_str);
291 PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
292 Py_TYPE(attrib)->tp_name);
293 return NULL;
294 }
295 attrib = PyDict_Copy(attrib);
296 PyDict_DelItem(kwds, attrib_str);
297 } else {
298 attrib = PyDict_New();
299 }
300
301 Py_DECREF(attrib_str);
302
303 if (attrib)
304 PyDict_Update(attrib, kwds);
305 return attrib;
306}
307
Eli Bendersky092af1f2012-03-04 07:14:03 +0200308static int
309element_init(PyObject *self, PyObject *args, PyObject *kwds)
310{
311 PyObject *tag;
312 PyObject *tmp;
313 PyObject *attrib = NULL;
314 ElementObject *self_elem;
315
316 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
317 return -1;
318
Eli Bendersky737b1732012-05-29 06:02:56 +0300319 if (attrib) {
320 /* attrib passed as positional arg */
321 attrib = PyDict_Copy(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200322 if (!attrib)
323 return -1;
Eli Bendersky737b1732012-05-29 06:02:56 +0300324 if (kwds) {
325 if (PyDict_Update(attrib, kwds) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200326 Py_DECREF(attrib);
Eli Bendersky737b1732012-05-29 06:02:56 +0300327 return -1;
328 }
329 }
330 } else if (kwds) {
331 /* have keywords args */
332 attrib = get_attrib_from_keywords(kwds);
333 if (!attrib)
334 return -1;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200335 }
336
337 self_elem = (ElementObject *)self;
338
Antoine Pitrouc1948842012-10-01 23:40:37 +0200339 if (attrib != NULL && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200340 if (create_extra(self_elem, attrib) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200341 Py_DECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200342 return -1;
343 }
344 }
345
Eli Bendersky48d358b2012-05-30 17:57:50 +0300346 /* We own a reference to attrib here and it's no longer needed. */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200347 Py_XDECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200348
349 /* Replace the objects already pointed to by tag, text and tail. */
350 tmp = self_elem->tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200351 Py_INCREF(tag);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200352 self_elem->tag = tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200353 Py_DECREF(tmp);
354
355 tmp = self_elem->text;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200356 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200357 self_elem->text = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200358 Py_DECREF(JOIN_OBJ(tmp));
359
360 tmp = self_elem->tail;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200361 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200362 self_elem->tail = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200363 Py_DECREF(JOIN_OBJ(tmp));
364
365 return 0;
366}
367
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000368LOCAL(int)
369element_resize(ElementObject* self, int extra)
370{
371 int size;
372 PyObject* *children;
373
374 /* make sure self->children can hold the given number of extra
375 elements. set an exception and return -1 if allocation failed */
376
377 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200378 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000379
380 size = self->extra->length + extra;
381
382 if (size > self->extra->allocated) {
383 /* use Python 2.4's list growth strategy */
384 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000385 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100386 * which needs at least 4 bytes.
387 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000388 * be safe.
389 */
390 size = size ? size : 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000391 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000392 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100393 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000394 * false alarm always assume at least one child to be safe.
395 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000396 children = PyObject_Realloc(self->extra->children,
397 size * sizeof(PyObject*));
398 if (!children)
399 goto nomemory;
400 } else {
401 children = PyObject_Malloc(size * sizeof(PyObject*));
402 if (!children)
403 goto nomemory;
404 /* copy existing children from static area to malloc buffer */
405 memcpy(children, self->extra->children,
406 self->extra->length * sizeof(PyObject*));
407 }
408 self->extra->children = children;
409 self->extra->allocated = size;
410 }
411
412 return 0;
413
414 nomemory:
415 PyErr_NoMemory();
416 return -1;
417}
418
419LOCAL(int)
420element_add_subelement(ElementObject* self, PyObject* element)
421{
422 /* add a child element to a parent */
423
424 if (element_resize(self, 1) < 0)
425 return -1;
426
427 Py_INCREF(element);
428 self->extra->children[self->extra->length] = element;
429
430 self->extra->length++;
431
432 return 0;
433}
434
435LOCAL(PyObject*)
436element_get_attrib(ElementObject* self)
437{
438 /* return borrowed reference to attrib dictionary */
439 /* note: this function assumes that the extra section exists */
440
441 PyObject* res = self->extra->attrib;
442
443 if (res == Py_None) {
444 /* create missing dictionary */
445 res = PyDict_New();
446 if (!res)
447 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200448 Py_DECREF(Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000449 self->extra->attrib = res;
450 }
451
452 return res;
453}
454
455LOCAL(PyObject*)
456element_get_text(ElementObject* self)
457{
458 /* return borrowed reference to text attribute */
459
460 PyObject* res = self->text;
461
462 if (JOIN_GET(res)) {
463 res = JOIN_OBJ(res);
464 if (PyList_CheckExact(res)) {
465 res = list_join(res);
466 if (!res)
467 return NULL;
468 self->text = res;
469 }
470 }
471
472 return res;
473}
474
475LOCAL(PyObject*)
476element_get_tail(ElementObject* self)
477{
478 /* return borrowed reference to text attribute */
479
480 PyObject* res = self->tail;
481
482 if (JOIN_GET(res)) {
483 res = JOIN_OBJ(res);
484 if (PyList_CheckExact(res)) {
485 res = list_join(res);
486 if (!res)
487 return NULL;
488 self->tail = res;
489 }
490 }
491
492 return res;
493}
494
495static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300496subelement(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000497{
498 PyObject* elem;
499
500 ElementObject* parent;
501 PyObject* tag;
502 PyObject* attrib = NULL;
503 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
504 &Element_Type, &parent, &tag,
505 &PyDict_Type, &attrib))
506 return NULL;
507
Eli Bendersky737b1732012-05-29 06:02:56 +0300508 if (attrib) {
509 /* attrib passed as positional arg */
510 attrib = PyDict_Copy(attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000511 if (!attrib)
512 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300513 if (kwds) {
514 if (PyDict_Update(attrib, kwds) < 0) {
515 return NULL;
516 }
517 }
518 } else if (kwds) {
519 /* have keyword args */
520 attrib = get_attrib_from_keywords(kwds);
521 if (!attrib)
522 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000523 } else {
Eli Bendersky737b1732012-05-29 06:02:56 +0300524 /* no attrib arg, no kwds, so no attribute */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000525 Py_INCREF(Py_None);
526 attrib = Py_None;
527 }
528
Eli Bendersky092af1f2012-03-04 07:14:03 +0200529 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000530
531 Py_DECREF(attrib);
532
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000533 if (element_add_subelement(parent, elem) < 0) {
534 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000535 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000536 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000537
538 return elem;
539}
540
Eli Bendersky0192ba32012-03-30 16:38:33 +0300541static int
542element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
543{
544 Py_VISIT(self->tag);
545 Py_VISIT(JOIN_OBJ(self->text));
546 Py_VISIT(JOIN_OBJ(self->tail));
547
548 if (self->extra) {
549 int i;
550 Py_VISIT(self->extra->attrib);
551
552 for (i = 0; i < self->extra->length; ++i)
553 Py_VISIT(self->extra->children[i]);
554 }
555 return 0;
556}
557
558static int
559element_gc_clear(ElementObject *self)
560{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300561 Py_CLEAR(self->tag);
Eli Benderskyebf37a22012-04-03 22:02:37 +0300562
563 /* The following is like Py_CLEAR for self->text and self->tail, but
564 * written explicitily because the real pointers hide behind access
565 * macros.
566 */
567 if (self->text) {
568 PyObject *tmp = JOIN_OBJ(self->text);
569 self->text = NULL;
570 Py_DECREF(tmp);
571 }
572
573 if (self->tail) {
574 PyObject *tmp = JOIN_OBJ(self->tail);
575 self->tail = NULL;
576 Py_DECREF(tmp);
577 }
Eli Bendersky0192ba32012-03-30 16:38:33 +0300578
579 /* After dropping all references from extra, it's no longer valid anyway,
Eli Benderskyebf37a22012-04-03 22:02:37 +0300580 * so fully deallocate it.
Eli Bendersky0192ba32012-03-30 16:38:33 +0300581 */
Eli Benderskyebf37a22012-04-03 22:02:37 +0300582 dealloc_extra(self);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300583 return 0;
584}
585
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000586static void
587element_dealloc(ElementObject* self)
588{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300589 PyObject_GC_UnTrack(self);
Eli Benderskyebf37a22012-04-03 22:02:37 +0300590
591 if (self->weakreflist != NULL)
592 PyObject_ClearWeakRefs((PyObject *) self);
593
Eli Bendersky0192ba32012-03-30 16:38:33 +0300594 /* element_gc_clear clears all references and deallocates extra
595 */
596 element_gc_clear(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000597
598 RELEASE(sizeof(ElementObject), "destroy element");
Eli Bendersky092af1f2012-03-04 07:14:03 +0200599 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000600}
601
602/* -------------------------------------------------------------------- */
603/* methods (in alphabetical order) */
604
605static PyObject*
606element_append(ElementObject* self, PyObject* args)
607{
608 PyObject* element;
609 if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element))
610 return NULL;
611
612 if (element_add_subelement(self, element) < 0)
613 return NULL;
614
615 Py_RETURN_NONE;
616}
617
618static PyObject*
Eli Bendersky0192ba32012-03-30 16:38:33 +0300619element_clearmethod(ElementObject* self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000620{
621 if (!PyArg_ParseTuple(args, ":clear"))
622 return NULL;
623
Eli Benderskyebf37a22012-04-03 22:02:37 +0300624 dealloc_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000625
626 Py_INCREF(Py_None);
627 Py_DECREF(JOIN_OBJ(self->text));
628 self->text = Py_None;
629
630 Py_INCREF(Py_None);
631 Py_DECREF(JOIN_OBJ(self->tail));
632 self->tail = Py_None;
633
634 Py_RETURN_NONE;
635}
636
637static PyObject*
638element_copy(ElementObject* self, PyObject* args)
639{
640 int i;
641 ElementObject* element;
642
643 if (!PyArg_ParseTuple(args, ":__copy__"))
644 return NULL;
645
Eli Bendersky092af1f2012-03-04 07:14:03 +0200646 element = (ElementObject*) create_new_element(
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000647 self->tag, (self->extra) ? self->extra->attrib : Py_None
648 );
649 if (!element)
650 return NULL;
651
652 Py_DECREF(JOIN_OBJ(element->text));
653 element->text = self->text;
654 Py_INCREF(JOIN_OBJ(element->text));
655
656 Py_DECREF(JOIN_OBJ(element->tail));
657 element->tail = self->tail;
658 Py_INCREF(JOIN_OBJ(element->tail));
659
660 if (self->extra) {
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100661
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000662 if (element_resize(element, self->extra->length) < 0) {
663 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000664 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000665 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000666
667 for (i = 0; i < self->extra->length; i++) {
668 Py_INCREF(self->extra->children[i]);
669 element->extra->children[i] = self->extra->children[i];
670 }
671
672 element->extra->length = self->extra->length;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100673
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000674 }
675
676 return (PyObject*) element;
677}
678
679static PyObject*
680element_deepcopy(ElementObject* self, PyObject* args)
681{
682 int i;
683 ElementObject* element;
684 PyObject* tag;
685 PyObject* attrib;
686 PyObject* text;
687 PyObject* tail;
688 PyObject* id;
689
690 PyObject* memo;
691 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
692 return NULL;
693
694 tag = deepcopy(self->tag, memo);
695 if (!tag)
696 return NULL;
697
698 if (self->extra) {
699 attrib = deepcopy(self->extra->attrib, memo);
700 if (!attrib) {
701 Py_DECREF(tag);
702 return NULL;
703 }
704 } else {
705 Py_INCREF(Py_None);
706 attrib = Py_None;
707 }
708
Eli Bendersky092af1f2012-03-04 07:14:03 +0200709 element = (ElementObject*) create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000710
711 Py_DECREF(tag);
712 Py_DECREF(attrib);
713
714 if (!element)
715 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100716
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000717 text = deepcopy(JOIN_OBJ(self->text), memo);
718 if (!text)
719 goto error;
720 Py_DECREF(element->text);
721 element->text = JOIN_SET(text, JOIN_GET(self->text));
722
723 tail = deepcopy(JOIN_OBJ(self->tail), memo);
724 if (!tail)
725 goto error;
726 Py_DECREF(element->tail);
727 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
728
729 if (self->extra) {
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100730
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000731 if (element_resize(element, self->extra->length) < 0)
732 goto error;
733
734 for (i = 0; i < self->extra->length; i++) {
735 PyObject* child = deepcopy(self->extra->children[i], memo);
736 if (!child) {
737 element->extra->length = i;
738 goto error;
739 }
740 element->extra->children[i] = child;
741 }
742
743 element->extra->length = self->extra->length;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100744
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000745 }
746
747 /* add object to memo dictionary (so deepcopy won't visit it again) */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200748 id = PyLong_FromSsize_t((Py_uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000749 if (!id)
750 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000751
752 i = PyDict_SetItem(memo, id, (PyObject*) element);
753
754 Py_DECREF(id);
755
756 if (i < 0)
757 goto error;
758
759 return (PyObject*) element;
760
761 error:
762 Py_DECREF(element);
763 return NULL;
764}
765
Martin v. Löwisbce16662012-06-17 10:41:22 +0200766static PyObject*
767element_sizeof(PyObject* _self, PyObject* args)
768{
769 ElementObject *self = (ElementObject*)_self;
770 Py_ssize_t result = sizeof(ElementObject);
771 if (self->extra) {
772 result += sizeof(ElementObjectExtra);
773 if (self->extra->children != self->extra->_children)
774 result += sizeof(PyObject*) * self->extra->allocated;
775 }
776 return PyLong_FromSsize_t(result);
777}
778
Eli Bendersky698bdb22013-01-10 06:01:06 -0800779/* dict keys for getstate/setstate. */
780#define PICKLED_TAG "tag"
781#define PICKLED_CHILDREN "_children"
782#define PICKLED_ATTRIB "attrib"
783#define PICKLED_TAIL "tail"
784#define PICKLED_TEXT "text"
785
786/* __getstate__ returns a fabricated instance dict as in the pure-Python
787 * Element implementation, for interoperability/interchangeability. This
788 * makes the pure-Python implementation details an API, but (a) there aren't
789 * any unnecessary structures there; and (b) it buys compatibility with 3.2
790 * pickles. See issue #16076.
791 */
792static PyObject *
793element_getstate(ElementObject *self)
794{
795 int i, noattrib;
796 PyObject *instancedict = NULL, *children;
797
798 /* Build a list of children. */
799 children = PyList_New(self->extra ? self->extra->length : 0);
800 if (!children)
801 return NULL;
802 for (i = 0; i < PyList_GET_SIZE(children); i++) {
803 PyObject *child = self->extra->children[i];
804 Py_INCREF(child);
805 PyList_SET_ITEM(children, i, child);
806 }
807
808 /* Construct the state object. */
809 noattrib = (self->extra == NULL || self->extra->attrib == Py_None);
810 if (noattrib)
811 instancedict = Py_BuildValue("{sOsOs{}sOsO}",
812 PICKLED_TAG, self->tag,
813 PICKLED_CHILDREN, children,
814 PICKLED_ATTRIB,
815 PICKLED_TEXT, self->text,
816 PICKLED_TAIL, self->tail);
817 else
818 instancedict = Py_BuildValue("{sOsOsOsOsO}",
819 PICKLED_TAG, self->tag,
820 PICKLED_CHILDREN, children,
821 PICKLED_ATTRIB, self->extra->attrib,
822 PICKLED_TEXT, self->text,
823 PICKLED_TAIL, self->tail);
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800824 if (instancedict) {
825 Py_DECREF(children);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800826 return instancedict;
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800827 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800828 else {
829 for (i = 0; i < PyList_GET_SIZE(children); i++)
830 Py_DECREF(PyList_GET_ITEM(children, i));
831 Py_DECREF(children);
832
833 return NULL;
834 }
835}
836
837static PyObject *
838element_setstate_from_attributes(ElementObject *self,
839 PyObject *tag,
840 PyObject *attrib,
841 PyObject *text,
842 PyObject *tail,
843 PyObject *children)
844{
845 Py_ssize_t i, nchildren;
846
847 if (!tag) {
848 PyErr_SetString(PyExc_TypeError, "tag may not be NULL");
849 return NULL;
850 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800851
852 Py_CLEAR(self->tag);
853 self->tag = tag;
854 Py_INCREF(self->tag);
855
856 Py_CLEAR(self->text);
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800857 self->text = text ? text : Py_None;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800858 Py_INCREF(self->text);
859
860 Py_CLEAR(self->tail);
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800861 self->tail = tail ? tail : Py_None;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800862 Py_INCREF(self->tail);
863
864 /* Handle ATTRIB and CHILDREN. */
865 if (!children && !attrib)
866 Py_RETURN_NONE;
867
868 /* Compute 'nchildren'. */
869 if (children) {
870 if (!PyList_Check(children)) {
871 PyErr_SetString(PyExc_TypeError, "'_children' is not a list");
872 return NULL;
873 }
874 nchildren = PyList_Size(children);
875 }
876 else {
877 nchildren = 0;
878 }
879
880 /* Allocate 'extra'. */
881 if (element_resize(self, nchildren)) {
882 return NULL;
883 }
884 assert(self->extra && self->extra->allocated >= nchildren);
885
886 /* Copy children */
887 for (i = 0; i < nchildren; i++) {
888 self->extra->children[i] = PyList_GET_ITEM(children, i);
889 Py_INCREF(self->extra->children[i]);
890 }
891
892 self->extra->length = nchildren;
893 self->extra->allocated = nchildren;
894
895 /* Stash attrib. */
896 if (attrib) {
897 Py_CLEAR(self->extra->attrib);
898 self->extra->attrib = attrib;
899 Py_INCREF(attrib);
900 }
901
902 Py_RETURN_NONE;
903}
904
905/* __setstate__ for Element instance from the Python implementation.
906 * 'state' should be the instance dict.
907 */
908static PyObject *
909element_setstate_from_Python(ElementObject *self, PyObject *state)
910{
911 static char *kwlist[] = {PICKLED_TAG, PICKLED_ATTRIB, PICKLED_TEXT,
912 PICKLED_TAIL, PICKLED_CHILDREN, 0};
913 PyObject *args;
914 PyObject *tag, *attrib, *text, *tail, *children;
Eli Bendersky799e3ed2013-01-12 05:42:38 -0800915 PyObject *retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800916
Eli Bendersky698bdb22013-01-10 06:01:06 -0800917 tag = attrib = text = tail = children = NULL;
918 args = PyTuple_New(0);
Eli Bendersky799e3ed2013-01-12 05:42:38 -0800919 if (!args)
Eli Bendersky698bdb22013-01-10 06:01:06 -0800920 return NULL;
Eli Bendersky799e3ed2013-01-12 05:42:38 -0800921
922 if (PyArg_ParseTupleAndKeywords(args, state, "|$OOOOO", kwlist, &tag,
923 &attrib, &text, &tail, &children))
924 retval = element_setstate_from_attributes(self, tag, attrib, text,
925 tail, children);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800926 else
Eli Bendersky799e3ed2013-01-12 05:42:38 -0800927 retval = NULL;
928
929 Py_DECREF(args);
930 return retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800931}
932
933static PyObject *
934element_setstate(ElementObject *self, PyObject *state)
935{
936 if (!PyDict_CheckExact(state)) {
937 PyErr_Format(PyExc_TypeError,
938 "Don't know how to unpickle \"%.200R\" as an Element",
939 state);
940 return NULL;
941 }
942 else
943 return element_setstate_from_Python(self, state);
944}
945
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000946LOCAL(int)
947checkpath(PyObject* tag)
948{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000949 Py_ssize_t i;
950 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000951
952 /* check if a tag contains an xpath character */
953
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000954#define PATHCHAR(ch) \
955 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000956
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000957 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200958 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
959 void *data = PyUnicode_DATA(tag);
960 unsigned int kind = PyUnicode_KIND(tag);
961 for (i = 0; i < len; i++) {
962 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
963 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000964 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200965 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000966 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200967 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000968 return 1;
969 }
970 return 0;
971 }
Christian Heimes72b710a2008-05-26 13:28:38 +0000972 if (PyBytes_Check(tag)) {
973 char *p = PyBytes_AS_STRING(tag);
974 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000975 if (p[i] == '{')
976 check = 0;
977 else if (p[i] == '}')
978 check = 1;
979 else if (check && PATHCHAR(p[i]))
980 return 1;
981 }
982 return 0;
983 }
984
985 return 1; /* unknown type; might be path expression */
986}
987
988static PyObject*
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000989element_extend(ElementObject* self, PyObject* args)
990{
991 PyObject* seq;
992 Py_ssize_t i, seqlen = 0;
993
994 PyObject* seq_in;
995 if (!PyArg_ParseTuple(args, "O:extend", &seq_in))
996 return NULL;
997
998 seq = PySequence_Fast(seq_in, "");
999 if (!seq) {
1000 PyErr_Format(
1001 PyExc_TypeError,
1002 "expected sequence, not \"%.200s\"", Py_TYPE(seq_in)->tp_name
1003 );
1004 return NULL;
1005 }
1006
1007 seqlen = PySequence_Size(seq);
1008 for (i = 0; i < seqlen; i++) {
1009 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001010 if (!PyObject_IsInstance(element, (PyObject *)&Element_Type)) {
1011 Py_DECREF(seq);
1012 PyErr_Format(
1013 PyExc_TypeError,
1014 "expected an Element, not \"%.200s\"",
1015 Py_TYPE(element)->tp_name);
1016 return NULL;
1017 }
1018
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001019 if (element_add_subelement(self, element) < 0) {
1020 Py_DECREF(seq);
1021 return NULL;
1022 }
1023 }
1024
1025 Py_DECREF(seq);
1026
1027 Py_RETURN_NONE;
1028}
1029
1030static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001031element_find(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001032{
1033 int i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001034 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001035 PyObject* namespaces = Py_None;
Eli Bendersky737b1732012-05-29 06:02:56 +03001036 static char *kwlist[] = {"path", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001037
Eli Bendersky737b1732012-05-29 06:02:56 +03001038 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:find", kwlist,
1039 &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001040 return NULL;
1041
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001042 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001043 _Py_IDENTIFIER(find);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001044 return _PyObject_CallMethodId(
1045 elementpath_obj, &PyId_find, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001046 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001047 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001048
1049 if (!self->extra)
1050 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001051
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001052 for (i = 0; i < self->extra->length; i++) {
1053 PyObject* item = self->extra->children[i];
1054 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +00001055 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001056 Py_INCREF(item);
1057 return item;
1058 }
1059 }
1060
1061 Py_RETURN_NONE;
1062}
1063
1064static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001065element_findtext(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001066{
1067 int i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001068 PyObject* tag;
1069 PyObject* default_value = Py_None;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001070 PyObject* namespaces = Py_None;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001071 _Py_IDENTIFIER(findtext);
Eli Bendersky737b1732012-05-29 06:02:56 +03001072 static char *kwlist[] = {"path", "default", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001073
Eli Bendersky737b1732012-05-29 06:02:56 +03001074 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO:findtext", kwlist,
1075 &tag, &default_value, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001076 return NULL;
1077
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001078 if (checkpath(tag) || namespaces != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001079 return _PyObject_CallMethodId(
1080 elementpath_obj, &PyId_findtext, "OOOO", self, tag, default_value, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001081 );
1082
1083 if (!self->extra) {
1084 Py_INCREF(default_value);
1085 return default_value;
1086 }
1087
1088 for (i = 0; i < self->extra->length; i++) {
1089 ElementObject* item = (ElementObject*) self->extra->children[i];
Mark Dickinson211c6252009-02-01 10:28:51 +00001090 if (Element_CheckExact(item) && (PyObject_RichCompareBool(item->tag, tag, Py_EQ) == 1)) {
1091
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001092 PyObject* text = element_get_text(item);
1093 if (text == Py_None)
Eli Bendersky25771b32013-01-13 05:26:07 -08001094 return PyUnicode_New(0, 0);
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001095 Py_XINCREF(text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001096 return text;
1097 }
1098 }
1099
1100 Py_INCREF(default_value);
1101 return default_value;
1102}
1103
1104static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001105element_findall(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001106{
1107 int i;
1108 PyObject* out;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001109 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001110 PyObject* namespaces = Py_None;
Eli Bendersky737b1732012-05-29 06:02:56 +03001111 static char *kwlist[] = {"path", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001112
Eli Bendersky737b1732012-05-29 06:02:56 +03001113 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:findall", kwlist,
1114 &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001115 return NULL;
1116
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001117 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001118 _Py_IDENTIFIER(findall);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001119 return _PyObject_CallMethodId(
1120 elementpath_obj, &PyId_findall, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001121 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001122 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001123
1124 out = PyList_New(0);
1125 if (!out)
1126 return NULL;
1127
1128 if (!self->extra)
1129 return out;
1130
1131 for (i = 0; i < self->extra->length; i++) {
1132 PyObject* item = self->extra->children[i];
1133 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +00001134 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001135 if (PyList_Append(out, item) < 0) {
1136 Py_DECREF(out);
1137 return NULL;
1138 }
1139 }
1140 }
1141
1142 return out;
1143}
1144
1145static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001146element_iterfind(ElementObject *self, PyObject *args, PyObject *kwds)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001147{
1148 PyObject* tag;
1149 PyObject* namespaces = Py_None;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001150 _Py_IDENTIFIER(iterfind);
Eli Bendersky737b1732012-05-29 06:02:56 +03001151 static char *kwlist[] = {"path", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001152
Eli Bendersky737b1732012-05-29 06:02:56 +03001153 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:iterfind", kwlist,
1154 &tag, &namespaces))
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001155 return NULL;
1156
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001157 return _PyObject_CallMethodId(
1158 elementpath_obj, &PyId_iterfind, "OOO", self, tag, namespaces
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001159 );
1160}
1161
1162static PyObject*
Eli Benderskya8736902013-01-05 06:26:39 -08001163element_get(ElementObject* self, PyObject* args, PyObject* kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001164{
1165 PyObject* value;
Eli Benderskya8736902013-01-05 06:26:39 -08001166 static char* kwlist[] = {"key", "default", 0};
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001167
1168 PyObject* key;
1169 PyObject* default_value = Py_None;
Eli Benderskya8736902013-01-05 06:26:39 -08001170
1171 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:get", kwlist, &key,
1172 &default_value))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001173 return NULL;
1174
1175 if (!self->extra || self->extra->attrib == Py_None)
1176 value = default_value;
1177 else {
1178 value = PyDict_GetItem(self->extra->attrib, key);
1179 if (!value)
1180 value = default_value;
1181 }
1182
1183 Py_INCREF(value);
1184 return value;
1185}
1186
1187static PyObject*
1188element_getchildren(ElementObject* self, PyObject* args)
1189{
1190 int i;
1191 PyObject* list;
1192
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001193 /* FIXME: report as deprecated? */
1194
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001195 if (!PyArg_ParseTuple(args, ":getchildren"))
1196 return NULL;
1197
1198 if (!self->extra)
1199 return PyList_New(0);
1200
1201 list = PyList_New(self->extra->length);
1202 if (!list)
1203 return NULL;
1204
1205 for (i = 0; i < self->extra->length; i++) {
1206 PyObject* item = self->extra->children[i];
1207 Py_INCREF(item);
1208 PyList_SET_ITEM(list, i, item);
1209 }
1210
1211 return list;
1212}
1213
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001214
Eli Bendersky64d11e62012-06-15 07:42:50 +03001215static PyObject *
1216create_elementiter(ElementObject *self, PyObject *tag, int gettext);
1217
1218
1219static PyObject *
Eli Benderskya8736902013-01-05 06:26:39 -08001220element_iter(ElementObject *self, PyObject *args, PyObject *kwds)
Eli Bendersky64d11e62012-06-15 07:42:50 +03001221{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001222 PyObject* tag = Py_None;
Eli Benderskya8736902013-01-05 06:26:39 -08001223 static char* kwlist[] = {"tag", 0};
1224
1225 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:iter", kwlist, &tag))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001226 return NULL;
1227
Eli Bendersky64d11e62012-06-15 07:42:50 +03001228 return create_elementiter(self, tag, 0);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001229}
1230
1231
1232static PyObject*
1233element_itertext(ElementObject* self, PyObject* args)
1234{
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001235 if (!PyArg_ParseTuple(args, ":itertext"))
1236 return NULL;
1237
Eli Bendersky64d11e62012-06-15 07:42:50 +03001238 return create_elementiter(self, Py_None, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001239}
1240
Eli Bendersky64d11e62012-06-15 07:42:50 +03001241
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001242static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001243element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001244{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001245 ElementObject* self = (ElementObject*) self_;
1246
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001247 if (!self->extra || index < 0 || index >= self->extra->length) {
1248 PyErr_SetString(
1249 PyExc_IndexError,
1250 "child index out of range"
1251 );
1252 return NULL;
1253 }
1254
1255 Py_INCREF(self->extra->children[index]);
1256 return self->extra->children[index];
1257}
1258
1259static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001260element_insert(ElementObject* self, PyObject* args)
1261{
1262 int i;
1263
1264 int index;
1265 PyObject* element;
1266 if (!PyArg_ParseTuple(args, "iO!:insert", &index,
1267 &Element_Type, &element))
1268 return NULL;
1269
1270 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001271 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001272
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001273 if (index < 0) {
1274 index += self->extra->length;
1275 if (index < 0)
1276 index = 0;
1277 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001278 if (index > self->extra->length)
1279 index = self->extra->length;
1280
1281 if (element_resize(self, 1) < 0)
1282 return NULL;
1283
1284 for (i = self->extra->length; i > index; i--)
1285 self->extra->children[i] = self->extra->children[i-1];
1286
1287 Py_INCREF(element);
1288 self->extra->children[index] = element;
1289
1290 self->extra->length++;
1291
1292 Py_RETURN_NONE;
1293}
1294
1295static PyObject*
1296element_items(ElementObject* self, PyObject* args)
1297{
1298 if (!PyArg_ParseTuple(args, ":items"))
1299 return NULL;
1300
1301 if (!self->extra || self->extra->attrib == Py_None)
1302 return PyList_New(0);
1303
1304 return PyDict_Items(self->extra->attrib);
1305}
1306
1307static PyObject*
1308element_keys(ElementObject* self, PyObject* args)
1309{
1310 if (!PyArg_ParseTuple(args, ":keys"))
1311 return NULL;
1312
1313 if (!self->extra || self->extra->attrib == Py_None)
1314 return PyList_New(0);
1315
1316 return PyDict_Keys(self->extra->attrib);
1317}
1318
Martin v. Löwis18e16552006-02-15 17:27:45 +00001319static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001320element_length(ElementObject* self)
1321{
1322 if (!self->extra)
1323 return 0;
1324
1325 return self->extra->length;
1326}
1327
1328static PyObject*
1329element_makeelement(PyObject* self, PyObject* args, PyObject* kw)
1330{
1331 PyObject* elem;
1332
1333 PyObject* tag;
1334 PyObject* attrib;
1335 if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib))
1336 return NULL;
1337
1338 attrib = PyDict_Copy(attrib);
1339 if (!attrib)
1340 return NULL;
1341
Eli Bendersky092af1f2012-03-04 07:14:03 +02001342 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001343
1344 Py_DECREF(attrib);
1345
1346 return elem;
1347}
1348
1349static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001350element_remove(ElementObject* self, PyObject* args)
1351{
1352 int i;
1353
1354 PyObject* element;
1355 if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element))
1356 return NULL;
1357
1358 if (!self->extra) {
1359 /* element has no children, so raise exception */
1360 PyErr_SetString(
1361 PyExc_ValueError,
1362 "list.remove(x): x not in list"
1363 );
1364 return NULL;
1365 }
1366
1367 for (i = 0; i < self->extra->length; i++) {
1368 if (self->extra->children[i] == element)
1369 break;
Mark Dickinson211c6252009-02-01 10:28:51 +00001370 if (PyObject_RichCompareBool(self->extra->children[i], element, Py_EQ) == 1)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001371 break;
1372 }
1373
1374 if (i == self->extra->length) {
1375 /* element is not in children, so raise exception */
1376 PyErr_SetString(
1377 PyExc_ValueError,
1378 "list.remove(x): x not in list"
1379 );
1380 return NULL;
1381 }
1382
1383 Py_DECREF(self->extra->children[i]);
1384
1385 self->extra->length--;
1386
1387 for (; i < self->extra->length; i++)
1388 self->extra->children[i] = self->extra->children[i+1];
1389
1390 Py_RETURN_NONE;
1391}
1392
1393static PyObject*
1394element_repr(ElementObject* self)
1395{
Eli Bendersky092af1f2012-03-04 07:14:03 +02001396 if (self->tag)
1397 return PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1398 else
1399 return PyUnicode_FromFormat("<Element at %p>", self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001400}
1401
1402static PyObject*
1403element_set(ElementObject* self, PyObject* args)
1404{
1405 PyObject* attrib;
1406
1407 PyObject* key;
1408 PyObject* value;
1409 if (!PyArg_ParseTuple(args, "OO:set", &key, &value))
1410 return NULL;
1411
1412 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001413 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001414
1415 attrib = element_get_attrib(self);
1416 if (!attrib)
1417 return NULL;
1418
1419 if (PyDict_SetItem(attrib, key, value) < 0)
1420 return NULL;
1421
1422 Py_RETURN_NONE;
1423}
1424
1425static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001426element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001427{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001428 ElementObject* self = (ElementObject*) self_;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001429 int i;
1430 PyObject* old;
1431
1432 if (!self->extra || index < 0 || index >= self->extra->length) {
1433 PyErr_SetString(
1434 PyExc_IndexError,
1435 "child assignment index out of range");
1436 return -1;
1437 }
1438
1439 old = self->extra->children[index];
1440
1441 if (item) {
1442 Py_INCREF(item);
1443 self->extra->children[index] = item;
1444 } else {
1445 self->extra->length--;
1446 for (i = index; i < self->extra->length; i++)
1447 self->extra->children[i] = self->extra->children[i+1];
1448 }
1449
1450 Py_DECREF(old);
1451
1452 return 0;
1453}
1454
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001455static PyObject*
1456element_subscr(PyObject* self_, PyObject* item)
1457{
1458 ElementObject* self = (ElementObject*) self_;
1459
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001460 if (PyIndex_Check(item)) {
1461 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001462
1463 if (i == -1 && PyErr_Occurred()) {
1464 return NULL;
1465 }
1466 if (i < 0 && self->extra)
1467 i += self->extra->length;
1468 return element_getitem(self_, i);
1469 }
1470 else if (PySlice_Check(item)) {
1471 Py_ssize_t start, stop, step, slicelen, cur, i;
1472 PyObject* list;
1473
1474 if (!self->extra)
1475 return PyList_New(0);
1476
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001477 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001478 self->extra->length,
1479 &start, &stop, &step, &slicelen) < 0) {
1480 return NULL;
1481 }
1482
1483 if (slicelen <= 0)
1484 return PyList_New(0);
1485 else {
1486 list = PyList_New(slicelen);
1487 if (!list)
1488 return NULL;
1489
1490 for (cur = start, i = 0; i < slicelen;
1491 cur += step, i++) {
1492 PyObject* item = self->extra->children[cur];
1493 Py_INCREF(item);
1494 PyList_SET_ITEM(list, i, item);
1495 }
1496
1497 return list;
1498 }
1499 }
1500 else {
1501 PyErr_SetString(PyExc_TypeError,
1502 "element indices must be integers");
1503 return NULL;
1504 }
1505}
1506
1507static int
1508element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1509{
1510 ElementObject* self = (ElementObject*) self_;
1511
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001512 if (PyIndex_Check(item)) {
1513 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001514
1515 if (i == -1 && PyErr_Occurred()) {
1516 return -1;
1517 }
1518 if (i < 0 && self->extra)
1519 i += self->extra->length;
1520 return element_setitem(self_, i, value);
1521 }
1522 else if (PySlice_Check(item)) {
1523 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1524
1525 PyObject* recycle = NULL;
1526 PyObject* seq = NULL;
1527
1528 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001529 create_extra(self, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001530
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001531 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001532 self->extra->length,
1533 &start, &stop, &step, &slicelen) < 0) {
1534 return -1;
1535 }
1536
Eli Bendersky865756a2012-03-09 13:38:15 +02001537 if (value == NULL) {
1538 /* Delete slice */
1539 size_t cur;
1540 Py_ssize_t i;
1541
1542 if (slicelen <= 0)
1543 return 0;
1544
1545 /* Since we're deleting, the direction of the range doesn't matter,
1546 * so for simplicity make it always ascending.
1547 */
1548 if (step < 0) {
1549 stop = start + 1;
1550 start = stop + step * (slicelen - 1) - 1;
1551 step = -step;
1552 }
1553
1554 assert((size_t)slicelen <= PY_SIZE_MAX / sizeof(PyObject *));
1555
1556 /* recycle is a list that will contain all the children
1557 * scheduled for removal.
1558 */
1559 if (!(recycle = PyList_New(slicelen))) {
1560 PyErr_NoMemory();
1561 return -1;
1562 }
1563
1564 /* This loop walks over all the children that have to be deleted,
1565 * with cur pointing at them. num_moved is the amount of children
1566 * until the next deleted child that have to be "shifted down" to
1567 * occupy the deleted's places.
1568 * Note that in the ith iteration, shifting is done i+i places down
1569 * because i children were already removed.
1570 */
1571 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1572 /* Compute how many children have to be moved, clipping at the
1573 * list end.
1574 */
1575 Py_ssize_t num_moved = step - 1;
1576 if (cur + step >= (size_t)self->extra->length) {
1577 num_moved = self->extra->length - cur - 1;
1578 }
1579
1580 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1581
1582 memmove(
1583 self->extra->children + cur - i,
1584 self->extra->children + cur + 1,
1585 num_moved * sizeof(PyObject *));
1586 }
1587
1588 /* Leftover "tail" after the last removed child */
1589 cur = start + (size_t)slicelen * step;
1590 if (cur < (size_t)self->extra->length) {
1591 memmove(
1592 self->extra->children + cur - slicelen,
1593 self->extra->children + cur,
1594 (self->extra->length - cur) * sizeof(PyObject *));
1595 }
1596
1597 self->extra->length -= slicelen;
1598
1599 /* Discard the recycle list with all the deleted sub-elements */
1600 Py_XDECREF(recycle);
1601 return 0;
1602 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001603 else {
Eli Bendersky865756a2012-03-09 13:38:15 +02001604 /* A new slice is actually being assigned */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001605 seq = PySequence_Fast(value, "");
1606 if (!seq) {
1607 PyErr_Format(
1608 PyExc_TypeError,
1609 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1610 );
1611 return -1;
1612 }
1613 newlen = PySequence_Size(seq);
1614 }
1615
1616 if (step != 1 && newlen != slicelen)
1617 {
1618 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001619 "attempt to assign sequence of size %zd "
1620 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001621 newlen, slicelen
1622 );
1623 return -1;
1624 }
1625
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001626 /* Resize before creating the recycle bin, to prevent refleaks. */
1627 if (newlen > slicelen) {
1628 if (element_resize(self, newlen - slicelen) < 0) {
1629 if (seq) {
1630 Py_DECREF(seq);
1631 }
1632 return -1;
1633 }
1634 }
1635
1636 if (slicelen > 0) {
1637 /* to avoid recursive calls to this method (via decref), move
1638 old items to the recycle bin here, and get rid of them when
1639 we're done modifying the element */
1640 recycle = PyList_New(slicelen);
1641 if (!recycle) {
1642 if (seq) {
1643 Py_DECREF(seq);
1644 }
1645 return -1;
1646 }
1647 for (cur = start, i = 0; i < slicelen;
1648 cur += step, i++)
1649 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1650 }
1651
1652 if (newlen < slicelen) {
1653 /* delete slice */
1654 for (i = stop; i < self->extra->length; i++)
1655 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1656 } else if (newlen > slicelen) {
1657 /* insert slice */
1658 for (i = self->extra->length-1; i >= stop; i--)
1659 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1660 }
1661
1662 /* replace the slice */
1663 for (cur = start, i = 0; i < newlen;
1664 cur += step, i++) {
1665 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1666 Py_INCREF(element);
1667 self->extra->children[cur] = element;
1668 }
1669
1670 self->extra->length += newlen - slicelen;
1671
1672 if (seq) {
1673 Py_DECREF(seq);
1674 }
1675
1676 /* discard the recycle bin, and everything in it */
1677 Py_XDECREF(recycle);
1678
1679 return 0;
1680 }
1681 else {
1682 PyErr_SetString(PyExc_TypeError,
1683 "element indices must be integers");
1684 return -1;
1685 }
1686}
1687
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001688static PyMethodDef element_methods[] = {
1689
Eli Bendersky0192ba32012-03-30 16:38:33 +03001690 {"clear", (PyCFunction) element_clearmethod, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001691
Eli Benderskya8736902013-01-05 06:26:39 -08001692 {"get", (PyCFunction) element_get, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001693 {"set", (PyCFunction) element_set, METH_VARARGS},
1694
Eli Bendersky737b1732012-05-29 06:02:56 +03001695 {"find", (PyCFunction) element_find, METH_VARARGS | METH_KEYWORDS},
1696 {"findtext", (PyCFunction) element_findtext, METH_VARARGS | METH_KEYWORDS},
1697 {"findall", (PyCFunction) element_findall, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001698
1699 {"append", (PyCFunction) element_append, METH_VARARGS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001700 {"extend", (PyCFunction) element_extend, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001701 {"insert", (PyCFunction) element_insert, METH_VARARGS},
1702 {"remove", (PyCFunction) element_remove, METH_VARARGS},
1703
Eli Benderskya8736902013-01-05 06:26:39 -08001704 {"iter", (PyCFunction) element_iter, METH_VARARGS | METH_KEYWORDS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001705 {"itertext", (PyCFunction) element_itertext, METH_VARARGS},
Eli Bendersky737b1732012-05-29 06:02:56 +03001706 {"iterfind", (PyCFunction) element_iterfind, METH_VARARGS | METH_KEYWORDS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001707
Eli Benderskya8736902013-01-05 06:26:39 -08001708 {"getiterator", (PyCFunction) element_iter, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001709 {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS},
1710
1711 {"items", (PyCFunction) element_items, METH_VARARGS},
1712 {"keys", (PyCFunction) element_keys, METH_VARARGS},
1713
1714 {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS},
1715
1716 {"__copy__", (PyCFunction) element_copy, METH_VARARGS},
1717 {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS},
Martin v. Löwisbce16662012-06-17 10:41:22 +02001718 {"__sizeof__", element_sizeof, METH_NOARGS},
Eli Bendersky698bdb22013-01-10 06:01:06 -08001719 {"__getstate__", (PyCFunction)element_getstate, METH_NOARGS},
1720 {"__setstate__", (PyCFunction)element_setstate, METH_O},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001721
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001722 {NULL, NULL}
1723};
1724
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001725static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001726element_getattro(ElementObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001727{
1728 PyObject* res;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001729 char *name = "";
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001730
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001731 if (PyUnicode_Check(nameobj))
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001732 name = _PyUnicode_AsString(nameobj);
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001733
Alexander Belopolskye239d232010-12-08 23:31:48 +00001734 if (name == NULL)
1735 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001736
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001737 /* handle common attributes first */
1738 if (strcmp(name, "tag") == 0) {
1739 res = self->tag;
1740 Py_INCREF(res);
1741 return res;
1742 } else if (strcmp(name, "text") == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001743 res = element_get_text(self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001744 Py_INCREF(res);
1745 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001746 }
1747
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001748 /* methods */
1749 res = PyObject_GenericGetAttr((PyObject*) self, nameobj);
1750 if (res)
1751 return res;
1752
1753 /* less common attributes */
1754 if (strcmp(name, "tail") == 0) {
1755 PyErr_Clear();
1756 res = element_get_tail(self);
1757 } else if (strcmp(name, "attrib") == 0) {
1758 PyErr_Clear();
1759 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001760 create_extra(self, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001761 res = element_get_attrib(self);
1762 }
1763
1764 if (!res)
1765 return NULL;
1766
1767 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001768 return res;
1769}
1770
Eli Benderskyb20df952012-05-20 06:33:29 +03001771static PyObject*
1772element_setattro(ElementObject* self, PyObject* nameobj, PyObject* value)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001773{
Eli Benderskyb20df952012-05-20 06:33:29 +03001774 char *name = "";
1775 if (PyUnicode_Check(nameobj))
1776 name = _PyUnicode_AsString(nameobj);
1777
1778 if (name == NULL)
1779 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001780
1781 if (strcmp(name, "tag") == 0) {
1782 Py_DECREF(self->tag);
1783 self->tag = value;
1784 Py_INCREF(self->tag);
1785 } else if (strcmp(name, "text") == 0) {
1786 Py_DECREF(JOIN_OBJ(self->text));
1787 self->text = value;
1788 Py_INCREF(self->text);
1789 } else if (strcmp(name, "tail") == 0) {
1790 Py_DECREF(JOIN_OBJ(self->tail));
1791 self->tail = value;
1792 Py_INCREF(self->tail);
1793 } else if (strcmp(name, "attrib") == 0) {
1794 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001795 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001796 Py_DECREF(self->extra->attrib);
1797 self->extra->attrib = value;
1798 Py_INCREF(self->extra->attrib);
1799 } else {
1800 PyErr_SetString(PyExc_AttributeError, name);
Eli Benderskyb20df952012-05-20 06:33:29 +03001801 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001802 }
1803
Eli Benderskyb20df952012-05-20 06:33:29 +03001804 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001805}
1806
1807static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001808 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001809 0, /* sq_concat */
1810 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001811 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001812 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001813 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001814 0,
1815};
1816
1817static PyMappingMethods element_as_mapping = {
1818 (lenfunc) element_length,
1819 (binaryfunc) element_subscr,
1820 (objobjargproc) element_ass_subscr,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001821};
1822
Neal Norwitz227b5332006-03-22 09:28:35 +00001823static PyTypeObject Element_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001824 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08001825 "xml.etree.ElementTree.Element", sizeof(ElementObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001826 /* methods */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001827 (destructor)element_dealloc, /* tp_dealloc */
1828 0, /* tp_print */
1829 0, /* tp_getattr */
Eli Benderskyb20df952012-05-20 06:33:29 +03001830 0, /* tp_setattr */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001831 0, /* tp_reserved */
1832 (reprfunc)element_repr, /* tp_repr */
1833 0, /* tp_as_number */
1834 &element_as_sequence, /* tp_as_sequence */
1835 &element_as_mapping, /* tp_as_mapping */
1836 0, /* tp_hash */
1837 0, /* tp_call */
1838 0, /* tp_str */
1839 (getattrofunc)element_getattro, /* tp_getattro */
Eli Benderskyb20df952012-05-20 06:33:29 +03001840 (setattrofunc)element_setattro, /* tp_setattro */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001841 0, /* tp_as_buffer */
Eli Bendersky0192ba32012-03-30 16:38:33 +03001842 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
1843 /* tp_flags */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001844 0, /* tp_doc */
Eli Bendersky0192ba32012-03-30 16:38:33 +03001845 (traverseproc)element_gc_traverse, /* tp_traverse */
1846 (inquiry)element_gc_clear, /* tp_clear */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001847 0, /* tp_richcompare */
Eli Benderskyebf37a22012-04-03 22:02:37 +03001848 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001849 0, /* tp_iter */
1850 0, /* tp_iternext */
1851 element_methods, /* tp_methods */
1852 0, /* tp_members */
1853 0, /* tp_getset */
1854 0, /* tp_base */
1855 0, /* tp_dict */
1856 0, /* tp_descr_get */
1857 0, /* tp_descr_set */
1858 0, /* tp_dictoffset */
1859 (initproc)element_init, /* tp_init */
1860 PyType_GenericAlloc, /* tp_alloc */
1861 element_new, /* tp_new */
1862 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001863};
1864
Eli Bendersky64d11e62012-06-15 07:42:50 +03001865/******************************* Element iterator ****************************/
1866
1867/* ElementIterObject represents the iteration state over an XML element in
1868 * pre-order traversal. To keep track of which sub-element should be returned
1869 * next, a stack of parents is maintained. This is a standard stack-based
1870 * iterative pre-order traversal of a tree.
1871 * The stack is managed using a single-linked list starting at parent_stack.
1872 * Each stack node contains the saved parent to which we should return after
1873 * the current one is exhausted, and the next child to examine in that parent.
1874 */
1875typedef struct ParentLocator_t {
1876 ElementObject *parent;
1877 Py_ssize_t child_index;
1878 struct ParentLocator_t *next;
1879} ParentLocator;
1880
1881typedef struct {
1882 PyObject_HEAD
1883 ParentLocator *parent_stack;
1884 ElementObject *root_element;
1885 PyObject *sought_tag;
1886 int root_done;
1887 int gettext;
1888} ElementIterObject;
1889
1890
1891static void
1892elementiter_dealloc(ElementIterObject *it)
1893{
1894 ParentLocator *p = it->parent_stack;
1895 while (p) {
1896 ParentLocator *temp = p;
1897 Py_XDECREF(p->parent);
1898 p = p->next;
1899 PyObject_Free(temp);
1900 }
1901
1902 Py_XDECREF(it->sought_tag);
1903 Py_XDECREF(it->root_element);
1904
1905 PyObject_GC_UnTrack(it);
1906 PyObject_GC_Del(it);
1907}
1908
1909static int
1910elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
1911{
1912 ParentLocator *p = it->parent_stack;
1913 while (p) {
1914 Py_VISIT(p->parent);
1915 p = p->next;
1916 }
1917
1918 Py_VISIT(it->root_element);
1919 Py_VISIT(it->sought_tag);
1920 return 0;
1921}
1922
1923/* Helper function for elementiter_next. Add a new parent to the parent stack.
1924 */
1925static ParentLocator *
1926parent_stack_push_new(ParentLocator *stack, ElementObject *parent)
1927{
1928 ParentLocator *new_node = PyObject_Malloc(sizeof(ParentLocator));
1929 if (new_node) {
1930 new_node->parent = parent;
1931 Py_INCREF(parent);
1932 new_node->child_index = 0;
1933 new_node->next = stack;
1934 }
1935 return new_node;
1936}
1937
1938static PyObject *
1939elementiter_next(ElementIterObject *it)
1940{
1941 /* Sub-element iterator.
Eli Bendersky45839902013-01-13 05:14:47 -08001942 *
Eli Bendersky64d11e62012-06-15 07:42:50 +03001943 * A short note on gettext: this function serves both the iter() and
1944 * itertext() methods to avoid code duplication. However, there are a few
1945 * small differences in the way these iterations work. Namely:
1946 * - itertext() only yields text from nodes that have it, and continues
1947 * iterating when a node doesn't have text (so it doesn't return any
1948 * node like iter())
1949 * - itertext() also has to handle tail, after finishing with all the
1950 * children of a node.
1951 */
Eli Bendersky113da642012-06-15 07:52:49 +03001952 ElementObject *cur_parent;
1953 Py_ssize_t child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03001954
1955 while (1) {
1956 /* Handle the case reached in the beginning and end of iteration, where
1957 * the parent stack is empty. The root_done flag gives us indication
1958 * whether we've just started iterating (so root_done is 0), in which
1959 * case the root is returned. If root_done is 1 and we're here, the
1960 * iterator is exhausted.
1961 */
1962 if (!it->parent_stack->parent) {
1963 if (it->root_done) {
1964 PyErr_SetNone(PyExc_StopIteration);
1965 return NULL;
1966 } else {
1967 it->parent_stack = parent_stack_push_new(it->parent_stack,
1968 it->root_element);
1969 if (!it->parent_stack) {
1970 PyErr_NoMemory();
1971 return NULL;
1972 }
1973
1974 it->root_done = 1;
1975 if (it->sought_tag == Py_None ||
1976 PyObject_RichCompareBool(it->root_element->tag,
1977 it->sought_tag, Py_EQ) == 1) {
1978 if (it->gettext) {
Eli Benderskye6174ca2013-01-10 06:27:53 -08001979 PyObject *text = element_get_text(it->root_element);
1980 if (!text)
1981 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03001982 if (PyObject_IsTrue(text)) {
1983 Py_INCREF(text);
1984 return text;
1985 }
1986 } else {
1987 Py_INCREF(it->root_element);
1988 return (PyObject *)it->root_element;
1989 }
1990 }
1991 }
1992 }
1993
1994 /* See if there are children left to traverse in the current parent. If
1995 * yes, visit the next child. If not, pop the stack and try again.
1996 */
Eli Bendersky113da642012-06-15 07:52:49 +03001997 cur_parent = it->parent_stack->parent;
1998 child_index = it->parent_stack->child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03001999 if (cur_parent->extra && child_index < cur_parent->extra->length) {
2000 ElementObject *child = (ElementObject *)
2001 cur_parent->extra->children[child_index];
2002 it->parent_stack->child_index++;
2003 it->parent_stack = parent_stack_push_new(it->parent_stack,
2004 child);
2005 if (!it->parent_stack) {
2006 PyErr_NoMemory();
2007 return NULL;
2008 }
2009
2010 if (it->gettext) {
Eli Benderskye6174ca2013-01-10 06:27:53 -08002011 PyObject *text = element_get_text(child);
2012 if (!text)
2013 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002014 if (PyObject_IsTrue(text)) {
2015 Py_INCREF(text);
2016 return text;
2017 }
2018 } else if (it->sought_tag == Py_None ||
2019 PyObject_RichCompareBool(child->tag,
2020 it->sought_tag, Py_EQ) == 1) {
2021 Py_INCREF(child);
2022 return (PyObject *)child;
2023 }
2024 else
2025 continue;
2026 }
2027 else {
Eli Benderskye6174ca2013-01-10 06:27:53 -08002028 PyObject *tail;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002029 ParentLocator *next = it->parent_stack->next;
Eli Benderskye6174ca2013-01-10 06:27:53 -08002030 if (it->gettext) {
2031 tail = element_get_tail(cur_parent);
2032 if (!tail)
2033 return NULL;
2034 }
2035 else
2036 tail = Py_None;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002037 Py_XDECREF(it->parent_stack->parent);
2038 PyObject_Free(it->parent_stack);
2039 it->parent_stack = next;
2040
2041 /* Note that extra condition on it->parent_stack->parent here;
2042 * this is because itertext() is supposed to only return *inner*
2043 * text, not text following the element it began iteration with.
2044 */
2045 if (it->parent_stack->parent && PyObject_IsTrue(tail)) {
2046 Py_INCREF(tail);
2047 return tail;
2048 }
2049 }
2050 }
2051
2052 return NULL;
2053}
2054
2055
2056static PyTypeObject ElementIter_Type = {
2057 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002058 /* Using the module's name since the pure-Python implementation does not
2059 have such a type. */
Eli Bendersky64d11e62012-06-15 07:42:50 +03002060 "_elementtree._element_iterator", /* tp_name */
2061 sizeof(ElementIterObject), /* tp_basicsize */
2062 0, /* tp_itemsize */
2063 /* methods */
2064 (destructor)elementiter_dealloc, /* tp_dealloc */
2065 0, /* tp_print */
2066 0, /* tp_getattr */
2067 0, /* tp_setattr */
2068 0, /* tp_reserved */
2069 0, /* tp_repr */
2070 0, /* tp_as_number */
2071 0, /* tp_as_sequence */
2072 0, /* tp_as_mapping */
2073 0, /* tp_hash */
2074 0, /* tp_call */
2075 0, /* tp_str */
2076 0, /* tp_getattro */
2077 0, /* tp_setattro */
2078 0, /* tp_as_buffer */
2079 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2080 0, /* tp_doc */
2081 (traverseproc)elementiter_traverse, /* tp_traverse */
2082 0, /* tp_clear */
2083 0, /* tp_richcompare */
2084 0, /* tp_weaklistoffset */
2085 PyObject_SelfIter, /* tp_iter */
2086 (iternextfunc)elementiter_next, /* tp_iternext */
2087 0, /* tp_methods */
2088 0, /* tp_members */
2089 0, /* tp_getset */
2090 0, /* tp_base */
2091 0, /* tp_dict */
2092 0, /* tp_descr_get */
2093 0, /* tp_descr_set */
2094 0, /* tp_dictoffset */
2095 0, /* tp_init */
2096 0, /* tp_alloc */
2097 0, /* tp_new */
2098};
2099
2100
2101static PyObject *
2102create_elementiter(ElementObject *self, PyObject *tag, int gettext)
2103{
2104 ElementIterObject *it;
2105 PyObject *star = NULL;
2106
2107 it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
2108 if (!it)
2109 return NULL;
2110 if (!(it->parent_stack = PyObject_Malloc(sizeof(ParentLocator)))) {
2111 PyObject_GC_Del(it);
2112 return NULL;
2113 }
2114
2115 it->parent_stack->parent = NULL;
2116 it->parent_stack->child_index = 0;
2117 it->parent_stack->next = NULL;
2118
2119 if (PyUnicode_Check(tag))
2120 star = PyUnicode_FromString("*");
2121 else if (PyBytes_Check(tag))
2122 star = PyBytes_FromString("*");
2123
2124 if (star && PyObject_RichCompareBool(tag, star, Py_EQ) == 1)
2125 tag = Py_None;
2126
2127 Py_XDECREF(star);
2128 it->sought_tag = tag;
2129 it->root_done = 0;
2130 it->gettext = gettext;
2131 it->root_element = self;
2132
2133 Py_INCREF(self);
2134 Py_INCREF(tag);
2135
2136 PyObject_GC_Track(it);
2137 return (PyObject *)it;
2138}
2139
2140
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002141/* ==================================================================== */
2142/* the tree builder type */
2143
2144typedef struct {
2145 PyObject_HEAD
2146
Eli Bendersky58d548d2012-05-29 15:45:16 +03002147 PyObject *root; /* root node (first created node) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002148
Antoine Pitrouee329312012-10-04 19:53:29 +02002149 PyObject *this; /* current node */
2150 PyObject *last; /* most recently created node */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002151
Eli Bendersky58d548d2012-05-29 15:45:16 +03002152 PyObject *data; /* data collector (string or list), or NULL */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002153
Eli Bendersky58d548d2012-05-29 15:45:16 +03002154 PyObject *stack; /* element stack */
2155 Py_ssize_t index; /* current stack size (0 means empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002156
Eli Bendersky48d358b2012-05-30 17:57:50 +03002157 PyObject *element_factory;
2158
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002159 /* element tracing */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002160 PyObject *events; /* list of events, or NULL if not collecting */
2161 PyObject *start_event_obj; /* event objects (NULL to ignore) */
2162 PyObject *end_event_obj;
2163 PyObject *start_ns_event_obj;
2164 PyObject *end_ns_event_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002165} TreeBuilderObject;
2166
Neal Norwitz227b5332006-03-22 09:28:35 +00002167static PyTypeObject TreeBuilder_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002168
Christian Heimes90aa7642007-12-19 02:45:37 +00002169#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002170
2171/* -------------------------------------------------------------------- */
2172/* constructor and destructor */
2173
Eli Bendersky58d548d2012-05-29 15:45:16 +03002174static PyObject *
2175treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002176{
Eli Bendersky58d548d2012-05-29 15:45:16 +03002177 TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
2178 if (t != NULL) {
2179 t->root = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002180
Eli Bendersky58d548d2012-05-29 15:45:16 +03002181 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002182 t->this = Py_None;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002183 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002184 t->last = Py_None;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002185
Eli Bendersky58d548d2012-05-29 15:45:16 +03002186 t->data = NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002187 t->element_factory = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002188 t->stack = PyList_New(20);
2189 if (!t->stack) {
2190 Py_DECREF(t->this);
2191 Py_DECREF(t->last);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002192 Py_DECREF((PyObject *) t);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002193 return NULL;
2194 }
2195 t->index = 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002196
Eli Bendersky58d548d2012-05-29 15:45:16 +03002197 t->events = NULL;
2198 t->start_event_obj = t->end_event_obj = NULL;
2199 t->start_ns_event_obj = t->end_ns_event_obj = NULL;
2200 }
2201 return (PyObject *)t;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002202}
2203
Eli Bendersky58d548d2012-05-29 15:45:16 +03002204static int
2205treebuilder_init(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002206{
Eli Benderskyc68e1362012-06-03 06:09:42 +03002207 static char *kwlist[] = {"element_factory", 0};
Eli Bendersky48d358b2012-05-30 17:57:50 +03002208 PyObject *element_factory = NULL;
2209 TreeBuilderObject *self_tb = (TreeBuilderObject *)self;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002210 PyObject *tmp;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002211
2212 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:TreeBuilder", kwlist,
2213 &element_factory)) {
2214 return -1;
2215 }
2216
2217 if (element_factory) {
2218 Py_INCREF(element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002219 tmp = self_tb->element_factory;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002220 self_tb->element_factory = element_factory;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002221 Py_XDECREF(tmp);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002222 }
2223
Eli Bendersky58d548d2012-05-29 15:45:16 +03002224 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002225}
2226
Eli Bendersky48d358b2012-05-30 17:57:50 +03002227static int
2228treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
2229{
2230 Py_VISIT(self->root);
2231 Py_VISIT(self->this);
2232 Py_VISIT(self->last);
2233 Py_VISIT(self->data);
2234 Py_VISIT(self->stack);
2235 Py_VISIT(self->element_factory);
2236 return 0;
2237}
2238
2239static int
2240treebuilder_gc_clear(TreeBuilderObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002241{
Antoine Pitrouc1948842012-10-01 23:40:37 +02002242 Py_CLEAR(self->end_ns_event_obj);
2243 Py_CLEAR(self->start_ns_event_obj);
2244 Py_CLEAR(self->end_event_obj);
2245 Py_CLEAR(self->start_event_obj);
2246 Py_CLEAR(self->events);
2247 Py_CLEAR(self->stack);
2248 Py_CLEAR(self->data);
2249 Py_CLEAR(self->last);
2250 Py_CLEAR(self->this);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002251 Py_CLEAR(self->element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002252 Py_CLEAR(self->root);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002253 return 0;
2254}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002255
Eli Bendersky48d358b2012-05-30 17:57:50 +03002256static void
2257treebuilder_dealloc(TreeBuilderObject *self)
2258{
2259 PyObject_GC_UnTrack(self);
2260 treebuilder_gc_clear(self);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002261 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002262}
2263
2264/* -------------------------------------------------------------------- */
Antoine Pitrouee329312012-10-04 19:53:29 +02002265/* helpers for handling of arbitrary element-like objects */
2266
2267static int
2268treebuilder_set_element_text_or_tail(PyObject *element, PyObject *data,
2269 PyObject **dest, _Py_Identifier *name)
2270{
2271 if (Element_CheckExact(element)) {
2272 Py_DECREF(JOIN_OBJ(*dest));
2273 *dest = JOIN_SET(data, PyList_CheckExact(data));
2274 return 0;
2275 }
2276 else {
2277 PyObject *joined = list_join(data);
2278 int r;
2279 if (joined == NULL)
2280 return -1;
2281 r = _PyObject_SetAttrId(element, name, joined);
2282 Py_DECREF(joined);
2283 return r;
2284 }
2285}
2286
2287/* These two functions steal a reference to data */
2288static int
2289treebuilder_set_element_text(PyObject *element, PyObject *data)
2290{
2291 _Py_IDENTIFIER(text);
2292 return treebuilder_set_element_text_or_tail(
2293 element, data, &((ElementObject *) element)->text, &PyId_text);
2294}
2295
2296static int
2297treebuilder_set_element_tail(PyObject *element, PyObject *data)
2298{
2299 _Py_IDENTIFIER(tail);
2300 return treebuilder_set_element_text_or_tail(
2301 element, data, &((ElementObject *) element)->tail, &PyId_tail);
2302}
2303
2304static int
2305treebuilder_add_subelement(PyObject *element, PyObject *child)
2306{
2307 _Py_IDENTIFIER(append);
2308 if (Element_CheckExact(element)) {
2309 ElementObject *elem = (ElementObject *) element;
2310 return element_add_subelement(elem, child);
2311 }
2312 else {
2313 PyObject *res;
2314 res = _PyObject_CallMethodId(element, &PyId_append, "O", child);
2315 if (res == NULL)
2316 return -1;
2317 Py_DECREF(res);
2318 return 0;
2319 }
2320}
2321
2322/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002323/* handlers */
2324
2325LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002326treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
2327 PyObject* attrib)
2328{
2329 PyObject* node;
2330 PyObject* this;
2331
2332 if (self->data) {
2333 if (self->this == self->last) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002334 if (treebuilder_set_element_text(self->last, self->data))
2335 return NULL;
2336 }
2337 else {
2338 if (treebuilder_set_element_tail(self->last, self->data))
2339 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002340 }
2341 self->data = NULL;
2342 }
2343
Eli Bendersky48d358b2012-05-30 17:57:50 +03002344 if (self->element_factory) {
2345 node = PyObject_CallFunction(self->element_factory, "OO", tag, attrib);
2346 } else {
2347 node = create_new_element(tag, attrib);
2348 }
2349 if (!node) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002350 return NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002351 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002352
Antoine Pitrouee329312012-10-04 19:53:29 +02002353 this = self->this;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002354
2355 if (this != Py_None) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002356 if (treebuilder_add_subelement(this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002357 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002358 } else {
2359 if (self->root) {
2360 PyErr_SetString(
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002361 elementtree_parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002362 "multiple elements on top level"
2363 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002364 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002365 }
2366 Py_INCREF(node);
2367 self->root = node;
2368 }
2369
2370 if (self->index < PyList_GET_SIZE(self->stack)) {
2371 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002372 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002373 Py_INCREF(this);
2374 } else {
2375 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002376 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002377 }
2378 self->index++;
2379
2380 Py_DECREF(this);
2381 Py_INCREF(node);
Antoine Pitrouee329312012-10-04 19:53:29 +02002382 self->this = node;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002383
2384 Py_DECREF(self->last);
2385 Py_INCREF(node);
Antoine Pitrouee329312012-10-04 19:53:29 +02002386 self->last = node;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002387
2388 if (self->start_event_obj) {
2389 PyObject* res;
2390 PyObject* action = self->start_event_obj;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002391 res = PyTuple_Pack(2, action, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002392 if (res) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002393 PyList_Append(self->events, res);
2394 Py_DECREF(res);
2395 } else
2396 PyErr_Clear(); /* FIXME: propagate error */
2397 }
2398
2399 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002400
2401 error:
2402 Py_DECREF(node);
2403 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002404}
2405
2406LOCAL(PyObject*)
2407treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
2408{
2409 if (!self->data) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002410 if (self->last == Py_None) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00002411 /* ignore calls to data before the first call to start */
2412 Py_RETURN_NONE;
2413 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002414 /* store the first item as is */
2415 Py_INCREF(data); self->data = data;
2416 } else {
2417 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00002418 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
2419 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002420 /* XXX this code path unused in Python 3? */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002421 /* expat often generates single character data sections; handle
2422 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00002423 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
2424 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002425 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00002426 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002427 } else if (PyList_CheckExact(self->data)) {
2428 if (PyList_Append(self->data, data) < 0)
2429 return NULL;
2430 } else {
2431 PyObject* list = PyList_New(2);
2432 if (!list)
2433 return NULL;
2434 PyList_SET_ITEM(list, 0, self->data);
2435 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2436 self->data = list;
2437 }
2438 }
2439
2440 Py_RETURN_NONE;
2441}
2442
2443LOCAL(PyObject*)
2444treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2445{
2446 PyObject* item;
2447
2448 if (self->data) {
2449 if (self->this == self->last) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002450 if (treebuilder_set_element_text(self->last, self->data))
2451 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002452 } else {
Antoine Pitrouee329312012-10-04 19:53:29 +02002453 if (treebuilder_set_element_tail(self->last, self->data))
2454 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002455 }
2456 self->data = NULL;
2457 }
2458
2459 if (self->index == 0) {
2460 PyErr_SetString(
2461 PyExc_IndexError,
2462 "pop from empty stack"
2463 );
2464 return NULL;
2465 }
2466
2467 self->index--;
2468
2469 item = PyList_GET_ITEM(self->stack, self->index);
2470 Py_INCREF(item);
2471
2472 Py_DECREF(self->last);
2473
Antoine Pitrouee329312012-10-04 19:53:29 +02002474 self->last = self->this;
2475 self->this = item;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002476
2477 if (self->end_event_obj) {
2478 PyObject* res;
2479 PyObject* action = self->end_event_obj;
2480 PyObject* node = (PyObject*) self->last;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002481 res = PyTuple_Pack(2, action, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002482 if (res) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002483 PyList_Append(self->events, res);
2484 Py_DECREF(res);
2485 } else
2486 PyErr_Clear(); /* FIXME: propagate error */
2487 }
2488
2489 Py_INCREF(self->last);
2490 return (PyObject*) self->last;
2491}
2492
2493LOCAL(void)
2494treebuilder_handle_namespace(TreeBuilderObject* self, int start,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002495 PyObject *prefix, PyObject *uri)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002496{
2497 PyObject* res;
2498 PyObject* action;
2499 PyObject* parcel;
2500
2501 if (!self->events)
2502 return;
2503
2504 if (start) {
2505 if (!self->start_ns_event_obj)
2506 return;
2507 action = self->start_ns_event_obj;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002508 parcel = Py_BuildValue("OO", prefix, uri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002509 if (!parcel)
2510 return;
2511 Py_INCREF(action);
2512 } else {
2513 if (!self->end_ns_event_obj)
2514 return;
2515 action = self->end_ns_event_obj;
2516 Py_INCREF(action);
2517 parcel = Py_None;
2518 Py_INCREF(parcel);
2519 }
2520
2521 res = PyTuple_New(2);
2522
2523 if (res) {
2524 PyTuple_SET_ITEM(res, 0, action);
2525 PyTuple_SET_ITEM(res, 1, parcel);
2526 PyList_Append(self->events, res);
2527 Py_DECREF(res);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002528 }
2529 else {
2530 Py_DECREF(action);
2531 Py_DECREF(parcel);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002532 PyErr_Clear(); /* FIXME: propagate error */
Antoine Pitrouc1948842012-10-01 23:40:37 +02002533 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002534}
2535
2536/* -------------------------------------------------------------------- */
2537/* methods (in alphabetical order) */
2538
2539static PyObject*
2540treebuilder_data(TreeBuilderObject* self, PyObject* args)
2541{
2542 PyObject* data;
2543 if (!PyArg_ParseTuple(args, "O:data", &data))
2544 return NULL;
2545
2546 return treebuilder_handle_data(self, data);
2547}
2548
2549static PyObject*
2550treebuilder_end(TreeBuilderObject* self, PyObject* args)
2551{
2552 PyObject* tag;
2553 if (!PyArg_ParseTuple(args, "O:end", &tag))
2554 return NULL;
2555
2556 return treebuilder_handle_end(self, tag);
2557}
2558
2559LOCAL(PyObject*)
2560treebuilder_done(TreeBuilderObject* self)
2561{
2562 PyObject* res;
2563
2564 /* FIXME: check stack size? */
2565
2566 if (self->root)
2567 res = self->root;
2568 else
2569 res = Py_None;
2570
2571 Py_INCREF(res);
2572 return res;
2573}
2574
2575static PyObject*
2576treebuilder_close(TreeBuilderObject* self, PyObject* args)
2577{
2578 if (!PyArg_ParseTuple(args, ":close"))
2579 return NULL;
2580
2581 return treebuilder_done(self);
2582}
2583
2584static PyObject*
2585treebuilder_start(TreeBuilderObject* self, PyObject* args)
2586{
2587 PyObject* tag;
2588 PyObject* attrib = Py_None;
2589 if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib))
2590 return NULL;
2591
2592 return treebuilder_handle_start(self, tag, attrib);
2593}
2594
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002595static PyMethodDef treebuilder_methods[] = {
2596 {"data", (PyCFunction) treebuilder_data, METH_VARARGS},
2597 {"start", (PyCFunction) treebuilder_start, METH_VARARGS},
2598 {"end", (PyCFunction) treebuilder_end, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002599 {"close", (PyCFunction) treebuilder_close, METH_VARARGS},
2600 {NULL, NULL}
2601};
2602
Neal Norwitz227b5332006-03-22 09:28:35 +00002603static PyTypeObject TreeBuilder_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002604 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002605 "xml.etree.ElementTree.TreeBuilder", sizeof(TreeBuilderObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002606 /* methods */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002607 (destructor)treebuilder_dealloc, /* tp_dealloc */
2608 0, /* tp_print */
2609 0, /* tp_getattr */
2610 0, /* tp_setattr */
2611 0, /* tp_reserved */
2612 0, /* tp_repr */
2613 0, /* tp_as_number */
2614 0, /* tp_as_sequence */
2615 0, /* tp_as_mapping */
2616 0, /* tp_hash */
2617 0, /* tp_call */
2618 0, /* tp_str */
2619 0, /* tp_getattro */
2620 0, /* tp_setattro */
2621 0, /* tp_as_buffer */
Eli Bendersky48d358b2012-05-30 17:57:50 +03002622 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
2623 /* tp_flags */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002624 0, /* tp_doc */
Eli Bendersky48d358b2012-05-30 17:57:50 +03002625 (traverseproc)treebuilder_gc_traverse, /* tp_traverse */
2626 (inquiry)treebuilder_gc_clear, /* tp_clear */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002627 0, /* tp_richcompare */
2628 0, /* tp_weaklistoffset */
2629 0, /* tp_iter */
2630 0, /* tp_iternext */
2631 treebuilder_methods, /* tp_methods */
2632 0, /* tp_members */
2633 0, /* tp_getset */
2634 0, /* tp_base */
2635 0, /* tp_dict */
2636 0, /* tp_descr_get */
2637 0, /* tp_descr_set */
2638 0, /* tp_dictoffset */
2639 (initproc)treebuilder_init, /* tp_init */
2640 PyType_GenericAlloc, /* tp_alloc */
2641 treebuilder_new, /* tp_new */
2642 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002643};
2644
2645/* ==================================================================== */
2646/* the expat interface */
2647
2648#if defined(USE_EXPAT)
2649
2650#include "expat.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002651#include "pyexpat.h"
Eli Bendersky20d41742012-06-01 09:48:37 +03002652static struct PyExpat_CAPI *expat_capi;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002653#define EXPAT(func) (expat_capi->func)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002654
Eli Bendersky52467b12012-06-01 07:13:08 +03002655static XML_Memory_Handling_Suite ExpatMemoryHandler = {
2656 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
2657
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002658typedef struct {
2659 PyObject_HEAD
2660
2661 XML_Parser parser;
2662
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002663 PyObject *target;
2664 PyObject *entity;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002665
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002666 PyObject *names;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002667
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002668 PyObject *handle_start;
2669 PyObject *handle_data;
2670 PyObject *handle_end;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002671
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002672 PyObject *handle_comment;
2673 PyObject *handle_pi;
2674 PyObject *handle_doctype;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002675
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002676 PyObject *handle_close;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002677
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002678} XMLParserObject;
2679
Neal Norwitz227b5332006-03-22 09:28:35 +00002680static PyTypeObject XMLParser_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002681
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002682#define XMLParser_CheckExact(op) (Py_TYPE(op) == &XMLParser_Type)
2683
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002684/* helpers */
2685
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002686LOCAL(PyObject*)
2687makeuniversal(XMLParserObject* self, const char* string)
2688{
2689 /* convert a UTF-8 tag/attribute name from the expat parser
2690 to a universal name string */
2691
Antoine Pitrouc1948842012-10-01 23:40:37 +02002692 Py_ssize_t size = (Py_ssize_t) strlen(string);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002693 PyObject* key;
2694 PyObject* value;
2695
2696 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00002697 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002698 if (!key)
2699 return NULL;
2700
2701 value = PyDict_GetItem(self->names, key);
2702
2703 if (value) {
2704 Py_INCREF(value);
2705 } else {
2706 /* new name. convert to universal name, and decode as
2707 necessary */
2708
2709 PyObject* tag;
2710 char* p;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002711 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002712
2713 /* look for namespace separator */
2714 for (i = 0; i < size; i++)
2715 if (string[i] == '}')
2716 break;
2717 if (i != size) {
2718 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002719 tag = PyBytes_FromStringAndSize(NULL, size+1);
2720 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002721 p[0] = '{';
2722 memcpy(p+1, string, size);
2723 size++;
2724 } else {
2725 /* plain name; use key as tag */
2726 Py_INCREF(key);
2727 tag = key;
2728 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002729
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002730 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002731 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00002732 value = PyUnicode_DecodeUTF8(p, size, "strict");
2733 Py_DECREF(tag);
2734 if (!value) {
2735 Py_DECREF(key);
2736 return NULL;
2737 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002738
2739 /* add to names dictionary */
2740 if (PyDict_SetItem(self->names, key, value) < 0) {
2741 Py_DECREF(key);
2742 Py_DECREF(value);
2743 return NULL;
2744 }
2745 }
2746
2747 Py_DECREF(key);
2748 return value;
2749}
2750
Eli Bendersky5b77d812012-03-16 08:20:05 +02002751/* Set the ParseError exception with the given parameters.
2752 * If message is not NULL, it's used as the error string. Otherwise, the
2753 * message string is the default for the given error_code.
2754*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002755static void
Eli Bendersky5b77d812012-03-16 08:20:05 +02002756expat_set_error(enum XML_Error error_code, int line, int column, char *message)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002757{
Eli Bendersky5b77d812012-03-16 08:20:05 +02002758 PyObject *errmsg, *error, *position, *code;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002759
Victor Stinner499dfcf2011-03-21 13:26:24 +01002760 errmsg = PyUnicode_FromFormat("%s: line %d, column %d",
Eli Bendersky5b77d812012-03-16 08:20:05 +02002761 message ? message : EXPAT(ErrorString)(error_code),
2762 line, column);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002763 if (errmsg == NULL)
2764 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002765
Victor Stinner499dfcf2011-03-21 13:26:24 +01002766 error = PyObject_CallFunction(elementtree_parseerror_obj, "O", errmsg);
2767 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002768 if (!error)
2769 return;
2770
Eli Bendersky5b77d812012-03-16 08:20:05 +02002771 /* Add code and position attributes */
2772 code = PyLong_FromLong((long)error_code);
2773 if (!code) {
2774 Py_DECREF(error);
2775 return;
2776 }
2777 if (PyObject_SetAttrString(error, "code", code) == -1) {
2778 Py_DECREF(error);
2779 Py_DECREF(code);
2780 return;
2781 }
2782 Py_DECREF(code);
2783
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002784 position = Py_BuildValue("(ii)", line, column);
2785 if (!position) {
2786 Py_DECREF(error);
2787 return;
2788 }
2789 if (PyObject_SetAttrString(error, "position", position) == -1) {
2790 Py_DECREF(error);
2791 Py_DECREF(position);
2792 return;
2793 }
2794 Py_DECREF(position);
2795
2796 PyErr_SetObject(elementtree_parseerror_obj, error);
2797 Py_DECREF(error);
2798}
2799
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002800/* -------------------------------------------------------------------- */
2801/* handlers */
2802
2803static void
2804expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2805 int data_len)
2806{
2807 PyObject* key;
2808 PyObject* value;
2809 PyObject* res;
2810
2811 if (data_len < 2 || data_in[0] != '&')
2812 return;
2813
Neal Norwitz0269b912007-08-08 06:56:02 +00002814 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002815 if (!key)
2816 return;
2817
2818 value = PyDict_GetItem(self->entity, key);
2819
2820 if (value) {
2821 if (TreeBuilder_CheckExact(self->target))
2822 res = treebuilder_handle_data(
2823 (TreeBuilderObject*) self->target, value
2824 );
2825 else if (self->handle_data)
2826 res = PyObject_CallFunction(self->handle_data, "O", value);
2827 else
2828 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002829 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002830 } else if (!PyErr_Occurred()) {
2831 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00002832 char message[128] = "undefined entity ";
2833 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002834 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02002835 XML_ERROR_UNDEFINED_ENTITY,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002836 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02002837 EXPAT(GetErrorColumnNumber)(self->parser),
2838 message
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002839 );
2840 }
2841
2842 Py_DECREF(key);
2843}
2844
2845static void
2846expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2847 const XML_Char **attrib_in)
2848{
2849 PyObject* res;
2850 PyObject* tag;
2851 PyObject* attrib;
2852 int ok;
2853
2854 /* tag name */
2855 tag = makeuniversal(self, tag_in);
2856 if (!tag)
2857 return; /* parser will look for errors */
2858
2859 /* attributes */
2860 if (attrib_in[0]) {
2861 attrib = PyDict_New();
2862 if (!attrib)
2863 return;
2864 while (attrib_in[0] && attrib_in[1]) {
2865 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00002866 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002867 if (!key || !value) {
2868 Py_XDECREF(value);
2869 Py_XDECREF(key);
2870 Py_DECREF(attrib);
2871 return;
2872 }
2873 ok = PyDict_SetItem(attrib, key, value);
2874 Py_DECREF(value);
2875 Py_DECREF(key);
2876 if (ok < 0) {
2877 Py_DECREF(attrib);
2878 return;
2879 }
2880 attrib_in += 2;
2881 }
2882 } else {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002883 /* Pass an empty dictionary on */
Eli Bendersky48d358b2012-05-30 17:57:50 +03002884 attrib = PyDict_New();
2885 if (!attrib)
2886 return;
2887 }
2888
2889 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002890 /* shortcut */
2891 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2892 tag, attrib);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002893 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002894 else if (self->handle_start) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002895 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002896 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002897 res = NULL;
2898
2899 Py_DECREF(tag);
2900 Py_DECREF(attrib);
2901
2902 Py_XDECREF(res);
2903}
2904
2905static void
2906expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2907 int data_len)
2908{
2909 PyObject* data;
2910 PyObject* res;
2911
Neal Norwitz0269b912007-08-08 06:56:02 +00002912 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002913 if (!data)
2914 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002915
2916 if (TreeBuilder_CheckExact(self->target))
2917 /* shortcut */
2918 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
2919 else if (self->handle_data)
2920 res = PyObject_CallFunction(self->handle_data, "O", data);
2921 else
2922 res = NULL;
2923
2924 Py_DECREF(data);
2925
2926 Py_XDECREF(res);
2927}
2928
2929static void
2930expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
2931{
2932 PyObject* tag;
2933 PyObject* res = NULL;
2934
2935 if (TreeBuilder_CheckExact(self->target))
2936 /* shortcut */
2937 /* the standard tree builder doesn't look at the end tag */
2938 res = treebuilder_handle_end(
2939 (TreeBuilderObject*) self->target, Py_None
2940 );
2941 else if (self->handle_end) {
2942 tag = makeuniversal(self, tag_in);
2943 if (tag) {
2944 res = PyObject_CallFunction(self->handle_end, "O", tag);
2945 Py_DECREF(tag);
2946 }
2947 }
2948
2949 Py_XDECREF(res);
2950}
2951
2952static void
2953expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
2954 const XML_Char *uri)
2955{
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002956 PyObject* sprefix = NULL;
2957 PyObject* suri = NULL;
2958
2959 suri = PyUnicode_DecodeUTF8(uri, strlen(uri), "strict");
2960 if (!suri)
2961 return;
2962
2963 if (prefix)
2964 sprefix = PyUnicode_DecodeUTF8(prefix, strlen(prefix), "strict");
2965 else
2966 sprefix = PyUnicode_FromString("");
2967 if (!sprefix) {
2968 Py_DECREF(suri);
2969 return;
2970 }
2971
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002972 treebuilder_handle_namespace(
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002973 (TreeBuilderObject*) self->target, 1, sprefix, suri
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002974 );
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002975
2976 Py_DECREF(sprefix);
2977 Py_DECREF(suri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002978}
2979
2980static void
2981expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
2982{
2983 treebuilder_handle_namespace(
2984 (TreeBuilderObject*) self->target, 0, NULL, NULL
2985 );
2986}
2987
2988static void
2989expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
2990{
2991 PyObject* comment;
2992 PyObject* res;
2993
2994 if (self->handle_comment) {
Neal Norwitz0269b912007-08-08 06:56:02 +00002995 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002996 if (comment) {
2997 res = PyObject_CallFunction(self->handle_comment, "O", comment);
2998 Py_XDECREF(res);
2999 Py_DECREF(comment);
3000 }
3001 }
3002}
3003
Eli Bendersky45839902013-01-13 05:14:47 -08003004static void
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003005expat_start_doctype_handler(XMLParserObject *self,
3006 const XML_Char *doctype_name,
3007 const XML_Char *sysid,
3008 const XML_Char *pubid,
3009 int has_internal_subset)
3010{
3011 PyObject *self_pyobj = (PyObject *)self;
3012 PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
3013 PyObject *parser_doctype = NULL;
3014 PyObject *res = NULL;
3015
3016 doctype_name_obj = makeuniversal(self, doctype_name);
3017 if (!doctype_name_obj)
3018 return;
3019
3020 if (sysid) {
3021 sysid_obj = makeuniversal(self, sysid);
3022 if (!sysid_obj) {
3023 Py_DECREF(doctype_name_obj);
3024 return;
3025 }
3026 } else {
3027 Py_INCREF(Py_None);
3028 sysid_obj = Py_None;
3029 }
3030
3031 if (pubid) {
3032 pubid_obj = makeuniversal(self, pubid);
3033 if (!pubid_obj) {
3034 Py_DECREF(doctype_name_obj);
3035 Py_DECREF(sysid_obj);
3036 return;
3037 }
3038 } else {
3039 Py_INCREF(Py_None);
3040 pubid_obj = Py_None;
3041 }
3042
3043 /* If the target has a handler for doctype, call it. */
3044 if (self->handle_doctype) {
3045 res = PyObject_CallFunction(self->handle_doctype, "OOO",
3046 doctype_name_obj, pubid_obj, sysid_obj);
3047 Py_CLEAR(res);
3048 }
3049
3050 /* Now see if the parser itself has a doctype method. If yes and it's
3051 * a subclass, call it but warn about deprecation. If it's not a subclass
3052 * (i.e. vanilla XMLParser), do nothing.
3053 */
3054 parser_doctype = PyObject_GetAttrString(self_pyobj, "doctype");
3055 if (parser_doctype) {
3056 if (!XMLParser_CheckExact(self_pyobj)) {
3057 if (PyErr_WarnEx(PyExc_DeprecationWarning,
3058 "This method of XMLParser is deprecated. Define"
3059 " doctype() method on the TreeBuilder target.",
3060 1) < 0) {
3061 goto clear;
3062 }
3063 res = PyObject_CallFunction(parser_doctype, "OOO",
3064 doctype_name_obj, pubid_obj, sysid_obj);
3065 Py_CLEAR(res);
3066 }
3067 }
3068
3069clear:
3070 Py_XDECREF(parser_doctype);
3071 Py_DECREF(doctype_name_obj);
3072 Py_DECREF(pubid_obj);
3073 Py_DECREF(sysid_obj);
3074}
3075
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003076static void
3077expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
3078 const XML_Char* data_in)
3079{
3080 PyObject* target;
3081 PyObject* data;
3082 PyObject* res;
3083
3084 if (self->handle_pi) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003085 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3086 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003087 if (target && data) {
3088 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
3089 Py_XDECREF(res);
3090 Py_DECREF(data);
3091 Py_DECREF(target);
3092 } else {
3093 Py_XDECREF(data);
3094 Py_XDECREF(target);
3095 }
3096 }
3097}
3098
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003099static int
3100expat_unknown_encoding_handler(XMLParserObject *self, const XML_Char *name,
3101 XML_Encoding *info)
3102{
3103 PyObject* u;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003104 unsigned char s[256];
3105 int i;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003106 void *data;
3107 unsigned int kind;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003108
3109 memset(info, 0, sizeof(XML_Encoding));
3110
3111 for (i = 0; i < 256; i++)
3112 s[i] = i;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003113
Fredrik Lundhc3389992005-12-25 11:40:19 +00003114 u = PyUnicode_Decode((char*) s, 256, name, "replace");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003115 if (!u)
3116 return XML_STATUS_ERROR;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003117 if (PyUnicode_READY(u))
3118 return XML_STATUS_ERROR;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003119
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003120 if (PyUnicode_GET_LENGTH(u) != 256) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003121 Py_DECREF(u);
3122 return XML_STATUS_ERROR;
3123 }
3124
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003125 kind = PyUnicode_KIND(u);
3126 data = PyUnicode_DATA(u);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003127 for (i = 0; i < 256; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003128 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
3129 if (ch != Py_UNICODE_REPLACEMENT_CHARACTER)
3130 info->map[i] = ch;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003131 else
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003132 info->map[i] = -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003133 }
3134
3135 Py_DECREF(u);
3136
3137 return XML_STATUS_OK;
3138}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003139
3140/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003141
Eli Bendersky52467b12012-06-01 07:13:08 +03003142static PyObject *
3143xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003144{
Eli Bendersky52467b12012-06-01 07:13:08 +03003145 XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
3146 if (self) {
3147 self->parser = NULL;
3148 self->target = self->entity = self->names = NULL;
3149 self->handle_start = self->handle_data = self->handle_end = NULL;
3150 self->handle_comment = self->handle_pi = self->handle_close = NULL;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003151 self->handle_doctype = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003152 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003153 return (PyObject *)self;
3154}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003155
Eli Bendersky52467b12012-06-01 07:13:08 +03003156static int
3157xmlparser_init(PyObject *self, PyObject *args, PyObject *kwds)
3158{
3159 XMLParserObject *self_xp = (XMLParserObject *)self;
3160 PyObject *target = NULL, *html = NULL;
3161 char *encoding = NULL;
Eli Benderskyc68e1362012-06-03 06:09:42 +03003162 static char *kwlist[] = {"html", "target", "encoding", 0};
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003163
Eli Bendersky52467b12012-06-01 07:13:08 +03003164 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|OOz:XMLParser", kwlist,
3165 &html, &target, &encoding)) {
3166 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003167 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003168
Eli Bendersky52467b12012-06-01 07:13:08 +03003169 self_xp->entity = PyDict_New();
3170 if (!self_xp->entity)
3171 return -1;
3172
3173 self_xp->names = PyDict_New();
3174 if (!self_xp->names) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02003175 Py_CLEAR(self_xp->entity);
Eli Bendersky52467b12012-06-01 07:13:08 +03003176 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003177 }
3178
Eli Bendersky52467b12012-06-01 07:13:08 +03003179 self_xp->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
3180 if (!self_xp->parser) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02003181 Py_CLEAR(self_xp->entity);
3182 Py_CLEAR(self_xp->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003183 PyErr_NoMemory();
Eli Bendersky52467b12012-06-01 07:13:08 +03003184 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003185 }
3186
Eli Bendersky52467b12012-06-01 07:13:08 +03003187 if (target) {
3188 Py_INCREF(target);
3189 } else {
Eli Bendersky58d548d2012-05-29 15:45:16 +03003190 target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003191 if (!target) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02003192 Py_CLEAR(self_xp->entity);
3193 Py_CLEAR(self_xp->names);
Eli Bendersky52467b12012-06-01 07:13:08 +03003194 EXPAT(ParserFree)(self_xp->parser);
3195 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003196 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003197 }
3198 self_xp->target = target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003199
Eli Bendersky52467b12012-06-01 07:13:08 +03003200 self_xp->handle_start = PyObject_GetAttrString(target, "start");
3201 self_xp->handle_data = PyObject_GetAttrString(target, "data");
3202 self_xp->handle_end = PyObject_GetAttrString(target, "end");
3203 self_xp->handle_comment = PyObject_GetAttrString(target, "comment");
3204 self_xp->handle_pi = PyObject_GetAttrString(target, "pi");
3205 self_xp->handle_close = PyObject_GetAttrString(target, "close");
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003206 self_xp->handle_doctype = PyObject_GetAttrString(target, "doctype");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003207
3208 PyErr_Clear();
Eli Bendersky45839902013-01-13 05:14:47 -08003209
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003210 /* configure parser */
Eli Bendersky52467b12012-06-01 07:13:08 +03003211 EXPAT(SetUserData)(self_xp->parser, self_xp);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003212 EXPAT(SetElementHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003213 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003214 (XML_StartElementHandler) expat_start_handler,
3215 (XML_EndElementHandler) expat_end_handler
3216 );
3217 EXPAT(SetDefaultHandlerExpand)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003218 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003219 (XML_DefaultHandler) expat_default_handler
3220 );
3221 EXPAT(SetCharacterDataHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003222 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003223 (XML_CharacterDataHandler) expat_data_handler
3224 );
Eli Bendersky52467b12012-06-01 07:13:08 +03003225 if (self_xp->handle_comment)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003226 EXPAT(SetCommentHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003227 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003228 (XML_CommentHandler) expat_comment_handler
3229 );
Eli Bendersky52467b12012-06-01 07:13:08 +03003230 if (self_xp->handle_pi)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003231 EXPAT(SetProcessingInstructionHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003232 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003233 (XML_ProcessingInstructionHandler) expat_pi_handler
3234 );
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003235 EXPAT(SetStartDoctypeDeclHandler)(
3236 self_xp->parser,
3237 (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
3238 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003239 EXPAT(SetUnknownEncodingHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003240 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003241 (XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL
3242 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003243
Eli Bendersky52467b12012-06-01 07:13:08 +03003244 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003245}
3246
Eli Bendersky52467b12012-06-01 07:13:08 +03003247static int
3248xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
3249{
3250 Py_VISIT(self->handle_close);
3251 Py_VISIT(self->handle_pi);
3252 Py_VISIT(self->handle_comment);
3253 Py_VISIT(self->handle_end);
3254 Py_VISIT(self->handle_data);
3255 Py_VISIT(self->handle_start);
3256
3257 Py_VISIT(self->target);
3258 Py_VISIT(self->entity);
3259 Py_VISIT(self->names);
3260
3261 return 0;
3262}
3263
3264static int
3265xmlparser_gc_clear(XMLParserObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003266{
3267 EXPAT(ParserFree)(self->parser);
3268
Antoine Pitrouc1948842012-10-01 23:40:37 +02003269 Py_CLEAR(self->handle_close);
3270 Py_CLEAR(self->handle_pi);
3271 Py_CLEAR(self->handle_comment);
3272 Py_CLEAR(self->handle_end);
3273 Py_CLEAR(self->handle_data);
3274 Py_CLEAR(self->handle_start);
3275 Py_CLEAR(self->handle_doctype);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003276
Antoine Pitrouc1948842012-10-01 23:40:37 +02003277 Py_CLEAR(self->target);
3278 Py_CLEAR(self->entity);
3279 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003280
Eli Bendersky52467b12012-06-01 07:13:08 +03003281 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003282}
3283
Eli Bendersky52467b12012-06-01 07:13:08 +03003284static void
3285xmlparser_dealloc(XMLParserObject* self)
3286{
3287 PyObject_GC_UnTrack(self);
3288 xmlparser_gc_clear(self);
3289 Py_TYPE(self)->tp_free((PyObject *)self);
3290}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003291
3292LOCAL(PyObject*)
3293expat_parse(XMLParserObject* self, char* data, int data_len, int final)
3294{
3295 int ok;
3296
3297 ok = EXPAT(Parse)(self->parser, data, data_len, final);
3298
3299 if (PyErr_Occurred())
3300 return NULL;
3301
3302 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003303 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02003304 EXPAT(GetErrorCode)(self->parser),
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003305 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02003306 EXPAT(GetErrorColumnNumber)(self->parser),
3307 NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003308 );
3309 return NULL;
3310 }
3311
3312 Py_RETURN_NONE;
3313}
3314
3315static PyObject*
3316xmlparser_close(XMLParserObject* self, PyObject* args)
3317{
3318 /* end feeding data to parser */
3319
3320 PyObject* res;
3321 if (!PyArg_ParseTuple(args, ":close"))
3322 return NULL;
3323
3324 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003325 if (!res)
3326 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003327
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003328 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003329 Py_DECREF(res);
3330 return treebuilder_done((TreeBuilderObject*) self->target);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003331 } if (self->handle_close) {
3332 Py_DECREF(res);
3333 return PyObject_CallFunction(self->handle_close, "");
3334 } else
3335 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003336}
3337
3338static PyObject*
3339xmlparser_feed(XMLParserObject* self, PyObject* args)
3340{
3341 /* feed data to parser */
3342
3343 char* data;
3344 int data_len;
3345 if (!PyArg_ParseTuple(args, "s#:feed", &data, &data_len))
3346 return NULL;
3347
3348 return expat_parse(self, data, data_len, 0);
3349}
3350
3351static PyObject*
3352xmlparser_parse(XMLParserObject* self, PyObject* args)
3353{
3354 /* (internal) parse until end of input stream */
3355
3356 PyObject* reader;
3357 PyObject* buffer;
Eli Benderskyf996e772012-03-16 05:53:30 +02003358 PyObject* temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003359 PyObject* res;
3360
3361 PyObject* fileobj;
3362 if (!PyArg_ParseTuple(args, "O:_parse", &fileobj))
3363 return NULL;
3364
3365 reader = PyObject_GetAttrString(fileobj, "read");
3366 if (!reader)
3367 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003368
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003369 /* read from open file object */
3370 for (;;) {
3371
3372 buffer = PyObject_CallFunction(reader, "i", 64*1024);
3373
3374 if (!buffer) {
3375 /* read failed (e.g. due to KeyboardInterrupt) */
3376 Py_DECREF(reader);
3377 return NULL;
3378 }
3379
Eli Benderskyf996e772012-03-16 05:53:30 +02003380 if (PyUnicode_CheckExact(buffer)) {
3381 /* A unicode object is encoded into bytes using UTF-8 */
3382 if (PyUnicode_GET_SIZE(buffer) == 0) {
3383 Py_DECREF(buffer);
3384 break;
3385 }
3386 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
Antoine Pitrouc1948842012-10-01 23:40:37 +02003387 Py_DECREF(buffer);
Eli Benderskyf996e772012-03-16 05:53:30 +02003388 if (!temp) {
3389 /* Propagate exception from PyUnicode_AsEncodedString */
Eli Benderskyf996e772012-03-16 05:53:30 +02003390 Py_DECREF(reader);
3391 return NULL;
3392 }
Eli Benderskyf996e772012-03-16 05:53:30 +02003393 buffer = temp;
3394 }
3395 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003396 Py_DECREF(buffer);
3397 break;
3398 }
3399
3400 res = expat_parse(
Christian Heimes72b710a2008-05-26 13:28:38 +00003401 self, PyBytes_AS_STRING(buffer), PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003402 );
3403
3404 Py_DECREF(buffer);
3405
3406 if (!res) {
3407 Py_DECREF(reader);
3408 return NULL;
3409 }
3410 Py_DECREF(res);
3411
3412 }
3413
3414 Py_DECREF(reader);
3415
3416 res = expat_parse(self, "", 0, 1);
3417
3418 if (res && TreeBuilder_CheckExact(self->target)) {
3419 Py_DECREF(res);
3420 return treebuilder_done((TreeBuilderObject*) self->target);
3421 }
3422
3423 return res;
3424}
3425
3426static PyObject*
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003427xmlparser_doctype(XMLParserObject *self, PyObject *args)
3428{
3429 Py_RETURN_NONE;
3430}
3431
3432static PyObject*
3433xmlparser_setevents(XMLParserObject *self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003434{
3435 /* activate element event reporting */
3436
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003437 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003438 TreeBuilderObject* target;
3439
3440 PyObject* events; /* event collector */
3441 PyObject* event_set = Py_None;
3442 if (!PyArg_ParseTuple(args, "O!|O:_setevents", &PyList_Type, &events,
3443 &event_set))
3444 return NULL;
3445
3446 if (!TreeBuilder_CheckExact(self->target)) {
3447 PyErr_SetString(
3448 PyExc_TypeError,
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003449 "event handling only supported for ElementTree.TreeBuilder "
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003450 "targets"
3451 );
3452 return NULL;
3453 }
3454
3455 target = (TreeBuilderObject*) self->target;
3456
3457 Py_INCREF(events);
3458 Py_XDECREF(target->events);
3459 target->events = events;
3460
3461 /* clear out existing events */
Antoine Pitrouc1948842012-10-01 23:40:37 +02003462 Py_CLEAR(target->start_event_obj);
3463 Py_CLEAR(target->end_event_obj);
3464 Py_CLEAR(target->start_ns_event_obj);
3465 Py_CLEAR(target->end_ns_event_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003466
3467 if (event_set == Py_None) {
3468 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003469 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003470 Py_RETURN_NONE;
3471 }
3472
3473 if (!PyTuple_Check(event_set)) /* FIXME: handle arbitrary sequences */
3474 goto error;
3475
3476 for (i = 0; i < PyTuple_GET_SIZE(event_set); i++) {
3477 PyObject* item = PyTuple_GET_ITEM(event_set, i);
3478 char* event;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003479 if (PyUnicode_Check(item)) {
3480 event = _PyUnicode_AsString(item);
Victor Stinner0477bf32010-03-22 12:11:44 +00003481 if (event == NULL)
3482 goto error;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003483 } else if (PyBytes_Check(item))
3484 event = PyBytes_AS_STRING(item);
3485 else {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003486 goto error;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003487 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003488 if (strcmp(event, "start") == 0) {
3489 Py_INCREF(item);
3490 target->start_event_obj = item;
3491 } else if (strcmp(event, "end") == 0) {
3492 Py_INCREF(item);
3493 Py_XDECREF(target->end_event_obj);
3494 target->end_event_obj = item;
3495 } else if (strcmp(event, "start-ns") == 0) {
3496 Py_INCREF(item);
3497 Py_XDECREF(target->start_ns_event_obj);
3498 target->start_ns_event_obj = item;
3499 EXPAT(SetNamespaceDeclHandler)(
3500 self->parser,
3501 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3502 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3503 );
3504 } else if (strcmp(event, "end-ns") == 0) {
3505 Py_INCREF(item);
3506 Py_XDECREF(target->end_ns_event_obj);
3507 target->end_ns_event_obj = item;
3508 EXPAT(SetNamespaceDeclHandler)(
3509 self->parser,
3510 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3511 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3512 );
3513 } else {
3514 PyErr_Format(
3515 PyExc_ValueError,
3516 "unknown event '%s'", event
3517 );
3518 return NULL;
3519 }
3520 }
3521
3522 Py_RETURN_NONE;
3523
3524 error:
3525 PyErr_SetString(
3526 PyExc_TypeError,
3527 "invalid event tuple"
3528 );
3529 return NULL;
3530}
3531
3532static PyMethodDef xmlparser_methods[] = {
3533 {"feed", (PyCFunction) xmlparser_feed, METH_VARARGS},
3534 {"close", (PyCFunction) xmlparser_close, METH_VARARGS},
3535 {"_parse", (PyCFunction) xmlparser_parse, METH_VARARGS},
3536 {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003537 {"doctype", (PyCFunction) xmlparser_doctype, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003538 {NULL, NULL}
3539};
3540
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003541static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003542xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003543{
Alexander Belopolskye239d232010-12-08 23:31:48 +00003544 if (PyUnicode_Check(nameobj)) {
3545 PyObject* res;
3546 if (PyUnicode_CompareWithASCIIString(nameobj, "entity") == 0)
3547 res = self->entity;
3548 else if (PyUnicode_CompareWithASCIIString(nameobj, "target") == 0)
3549 res = self->target;
3550 else if (PyUnicode_CompareWithASCIIString(nameobj, "version") == 0) {
3551 return PyUnicode_FromFormat(
3552 "Expat %d.%d.%d", XML_MAJOR_VERSION,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003553 XML_MINOR_VERSION, XML_MICRO_VERSION);
Alexander Belopolskye239d232010-12-08 23:31:48 +00003554 }
3555 else
3556 goto generic;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003557
Alexander Belopolskye239d232010-12-08 23:31:48 +00003558 Py_INCREF(res);
3559 return res;
3560 }
3561 generic:
3562 return PyObject_GenericGetAttr((PyObject*) self, nameobj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003563}
3564
Neal Norwitz227b5332006-03-22 09:28:35 +00003565static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003566 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08003567 "xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003568 /* methods */
Eli Bendersky52467b12012-06-01 07:13:08 +03003569 (destructor)xmlparser_dealloc, /* tp_dealloc */
3570 0, /* tp_print */
3571 0, /* tp_getattr */
3572 0, /* tp_setattr */
3573 0, /* tp_reserved */
3574 0, /* tp_repr */
3575 0, /* tp_as_number */
3576 0, /* tp_as_sequence */
3577 0, /* tp_as_mapping */
3578 0, /* tp_hash */
3579 0, /* tp_call */
3580 0, /* tp_str */
3581 (getattrofunc)xmlparser_getattro, /* tp_getattro */
3582 0, /* tp_setattro */
3583 0, /* tp_as_buffer */
3584 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3585 /* tp_flags */
3586 0, /* tp_doc */
3587 (traverseproc)xmlparser_gc_traverse, /* tp_traverse */
3588 (inquiry)xmlparser_gc_clear, /* tp_clear */
3589 0, /* tp_richcompare */
3590 0, /* tp_weaklistoffset */
3591 0, /* tp_iter */
3592 0, /* tp_iternext */
3593 xmlparser_methods, /* tp_methods */
3594 0, /* tp_members */
3595 0, /* tp_getset */
3596 0, /* tp_base */
3597 0, /* tp_dict */
3598 0, /* tp_descr_get */
3599 0, /* tp_descr_set */
3600 0, /* tp_dictoffset */
3601 (initproc)xmlparser_init, /* tp_init */
3602 PyType_GenericAlloc, /* tp_alloc */
3603 xmlparser_new, /* tp_new */
3604 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003605};
3606
3607#endif
3608
3609/* ==================================================================== */
3610/* python module interface */
3611
3612static PyMethodDef _functions[] = {
Eli Benderskya8736902013-01-05 06:26:39 -08003613 {"SubElement", (PyCFunction) subelement, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003614 {NULL, NULL}
3615};
3616
Martin v. Löwis1a214512008-06-11 05:26:20 +00003617
3618static struct PyModuleDef _elementtreemodule = {
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003619 PyModuleDef_HEAD_INIT,
3620 "_elementtree",
3621 NULL,
3622 -1,
3623 _functions,
3624 NULL,
3625 NULL,
3626 NULL,
3627 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00003628};
3629
Neal Norwitzf6657e62006-12-28 04:47:50 +00003630PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00003631PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003632{
Eli Bendersky64d11e62012-06-15 07:42:50 +03003633 PyObject *m, *temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003634
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003635 /* Initialize object types */
3636 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003637 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003638 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003639 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003640#if defined(USE_EXPAT)
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003641 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003642 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003643#endif
3644
Martin v. Löwis1a214512008-06-11 05:26:20 +00003645 m = PyModule_Create(&_elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003646 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00003647 return NULL;
3648
Eli Bendersky828efde2012-04-05 05:40:58 +03003649 if (!(temp = PyImport_ImportModule("copy")))
3650 return NULL;
3651 elementtree_deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
3652 Py_XDECREF(temp);
3653
3654 if (!(elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
3655 return NULL;
3656
Eli Bendersky20d41742012-06-01 09:48:37 +03003657 /* link against pyexpat */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003658 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
3659 if (expat_capi) {
3660 /* check that it's usable */
3661 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
3662 expat_capi->size < sizeof(struct PyExpat_CAPI) ||
3663 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
3664 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
Eli Bendersky52467b12012-06-01 07:13:08 +03003665 expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
Eli Benderskyef391ac2012-07-21 20:28:46 +03003666 PyErr_SetString(PyExc_ImportError,
3667 "pyexpat version is incompatible");
3668 return NULL;
Eli Bendersky52467b12012-06-01 07:13:08 +03003669 }
Eli Benderskyef391ac2012-07-21 20:28:46 +03003670 } else {
Eli Bendersky52467b12012-06-01 07:13:08 +03003671 return NULL;
Eli Benderskyef391ac2012-07-21 20:28:46 +03003672 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003673
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003674 elementtree_parseerror_obj = PyErr_NewException(
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003675 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003676 );
3677 Py_INCREF(elementtree_parseerror_obj);
3678 PyModule_AddObject(m, "ParseError", elementtree_parseerror_obj);
3679
Eli Bendersky092af1f2012-03-04 07:14:03 +02003680 Py_INCREF((PyObject *)&Element_Type);
3681 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
3682
Eli Bendersky58d548d2012-05-29 15:45:16 +03003683 Py_INCREF((PyObject *)&TreeBuilder_Type);
3684 PyModule_AddObject(m, "TreeBuilder", (PyObject *)&TreeBuilder_Type);
3685
Eli Bendersky52467b12012-06-01 07:13:08 +03003686#if defined(USE_EXPAT)
3687 Py_INCREF((PyObject *)&XMLParser_Type);
3688 PyModule_AddObject(m, "XMLParser", (PyObject *)&XMLParser_Type);
3689#endif
3690
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003691 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003692}