blob: 7e01352fa766f5f5891ed66bc27533dd6829f2f2 [file] [log] [blame]
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001/*
2 * ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003 * $Id: _elementtree.c 3473 2009-01-11 22:53:55Z fredrik $
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
5 * elementtree accelerator
6 *
7 * History:
8 * 1999-06-20 fl created (as part of sgmlop)
9 * 2001-05-29 fl effdom edition
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000010 * 2003-02-27 fl elementtree edition (alpha)
11 * 2004-06-03 fl updates for elementtree 1.2
Florent Xiclunaf15351d2010-03-13 23:24:31 +000012 * 2005-01-05 fl major optimization effort
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000013 * 2005-01-11 fl first public release (cElementTree 0.8)
14 * 2005-01-12 fl split element object into base and extras
15 * 2005-01-13 fl use tagged pointers for tail/text (cElementTree 0.9)
16 * 2005-01-17 fl added treebuilder close method
17 * 2005-01-17 fl fixed crash in getchildren
18 * 2005-01-18 fl removed observer api, added iterparse (cElementTree 0.9.3)
19 * 2005-01-23 fl revised iterparse api; added namespace event support (0.9.8)
20 * 2005-01-26 fl added VERSION module property (cElementTree 1.0)
21 * 2005-01-28 fl added remove method (1.0.1)
22 * 2005-03-01 fl added iselement function; fixed makeelement aliasing (1.0.2)
23 * 2005-03-13 fl export Comment and ProcessingInstruction/PI helpers
24 * 2005-03-26 fl added Comment and PI support to XMLParser
25 * 2005-03-27 fl event optimizations; complain about bogus events
26 * 2005-08-08 fl fixed read error handling in parse
27 * 2005-08-11 fl added runtime test for copy workaround (1.0.3)
28 * 2005-12-13 fl added expat_capi support (for xml.etree) (1.0.4)
29 * 2005-12-16 fl added support for non-standard encodings
Fredrik Lundh44ed4db2006-03-12 21:06:35 +000030 * 2006-03-08 fl fixed a couple of potential null-refs and leaks
31 * 2006-03-12 fl merge in 2.5 ssize_t changes
Florent Xiclunaf15351d2010-03-13 23:24:31 +000032 * 2007-08-25 fl call custom builder's close method from XMLParser
33 * 2007-08-31 fl added iter, extend from ET 1.3
34 * 2007-09-01 fl fixed ParseError exception, setslice source type, etc
35 * 2007-09-03 fl fixed handling of negative insert indexes
36 * 2007-09-04 fl added itertext from ET 1.3
37 * 2007-09-06 fl added position attribute to ParseError exception
38 * 2008-06-06 fl delay error reporting in iterparse (from Hrvoje Niksic)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000039 *
Florent Xiclunaf15351d2010-03-13 23:24:31 +000040 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
41 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000042 *
43 * info@pythonware.com
44 * http://www.pythonware.com
45 */
46
Fredrik Lundh6d52b552005-12-16 22:06:43 +000047/* Licensed to PSF under a Contributor Agreement. */
Florent Xiclunaf15351d2010-03-13 23:24:31 +000048/* See http://www.python.org/psf/license for licensing details. */
Fredrik Lundh6d52b552005-12-16 22:06:43 +000049
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000050#include "Python.h"
Eli Benderskyebf37a22012-04-03 22:02:37 +030051#include "structmember.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000052
Thomas Wouters00ee7ba2006-08-21 19:07:27 +000053#define VERSION "1.0.6"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000054
55/* -------------------------------------------------------------------- */
56/* configuration */
57
58/* Leave defined to include the expat-based XMLParser type */
59#define USE_EXPAT
60
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000061/* An element can hold this many children without extra memory
62 allocations. */
63#define STATIC_CHILDREN 4
64
65/* For best performance, chose a value so that 80-90% of all nodes
66 have no more than the given number of children. Set this to zero
67 to minimize the size of the element structure itself (this only
68 helps if you have lots of leaf nodes with attributes). */
69
70/* Also note that pymalloc always allocates blocks in multiples of
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010071 eight bytes. For the current C version of ElementTree, this means
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000072 that the number of children should be an even number, at least on
73 32-bit platforms. */
74
75/* -------------------------------------------------------------------- */
76
77#if 0
78static int memory = 0;
79#define ALLOC(size, comment)\
80do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
81#define RELEASE(size, comment)\
82do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
83#else
84#define ALLOC(size, comment)
85#define RELEASE(size, comment)
86#endif
87
88/* compiler tweaks */
89#if defined(_MSC_VER)
90#define LOCAL(type) static __inline type __fastcall
91#else
92#define LOCAL(type) static type
93#endif
94
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000095/* macros used to store 'join' flags in string object pointers. note
96 that all use of text and tail as object pointers must be wrapped in
97 JOIN_OBJ. see comments in the ElementObject definition for more
98 info. */
99#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
100#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
Antoine Pitrouca8aa4a2012-09-20 20:56:47 +0200101#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~(Py_uintptr_t)1))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000102
Eli Benderskydd3661e2013-09-13 06:24:25 -0700103/* Py_CLEAR for a PyObject* that uses a join flag. Pass the pointer by
104 * reference since this function sets it to NULL.
105*/
doko@ubuntu.com0648bf72013-09-18 12:12:28 +0200106static void _clear_joined_ptr(PyObject **p)
Eli Benderskydd3661e2013-09-13 06:24:25 -0700107{
108 if (*p) {
109 PyObject *tmp = JOIN_OBJ(*p);
110 *p = NULL;
111 Py_DECREF(tmp);
112 }
113}
114
Ronald Oussoren138d0802013-07-19 11:11:25 +0200115/* Types defined by this extension */
116static PyTypeObject Element_Type;
117static PyTypeObject ElementIter_Type;
118static PyTypeObject TreeBuilder_Type;
119static PyTypeObject XMLParser_Type;
120
121
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000122/* glue functions (see the init function for details) */
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000123static PyObject* elementtree_parseerror_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000124static PyObject* elementtree_deepcopy_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000125static PyObject* elementpath_obj;
126
127/* helpers */
128
129LOCAL(PyObject*)
130deepcopy(PyObject* object, PyObject* memo)
131{
132 /* do a deep copy of the given object */
133
134 PyObject* args;
135 PyObject* result;
136
137 if (!elementtree_deepcopy_obj) {
138 PyErr_SetString(
139 PyExc_RuntimeError,
140 "deepcopy helper not found"
141 );
142 return NULL;
143 }
144
Antoine Pitrouc1948842012-10-01 23:40:37 +0200145 args = PyTuple_Pack(2, object, memo);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000146 if (!args)
147 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000148 result = PyObject_CallObject(elementtree_deepcopy_obj, args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000149 Py_DECREF(args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000150 return result;
151}
152
153LOCAL(PyObject*)
154list_join(PyObject* list)
155{
156 /* join list elements (destroying the list in the process) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000157 PyObject* joiner;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000158 PyObject* result;
159
Antoine Pitrouc1948842012-10-01 23:40:37 +0200160 joiner = PyUnicode_FromStringAndSize("", 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000161 if (!joiner)
162 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200163 result = PyUnicode_Join(joiner, list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000164 Py_DECREF(joiner);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200165 if (result)
166 Py_DECREF(list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000167 return result;
168}
169
Eli Bendersky48d358b2012-05-30 17:57:50 +0300170/* Is the given object an empty dictionary?
171*/
172static int
173is_empty_dict(PyObject *obj)
174{
175 return PyDict_CheckExact(obj) && PyDict_Size(obj) == 0;
176}
177
178
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000179/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200180/* the Element type */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000181
182typedef struct {
183
184 /* attributes (a dictionary object), or None if no attributes */
185 PyObject* attrib;
186
187 /* child elements */
188 int length; /* actual number of items */
189 int allocated; /* allocated items */
190
191 /* this either points to _children or to a malloced buffer */
192 PyObject* *children;
193
194 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100195
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000196} ElementObjectExtra;
197
198typedef struct {
199 PyObject_HEAD
200
201 /* element tag (a string). */
202 PyObject* tag;
203
204 /* text before first child. note that this is a tagged pointer;
205 use JOIN_OBJ to get the object pointer. the join flag is used
206 to distinguish lists created by the tree builder from lists
207 assigned to the attribute by application code; the former
208 should be joined before being returned to the user, the latter
209 should be left intact. */
210 PyObject* text;
211
212 /* text after this element, in parent. note that this is a tagged
213 pointer; use JOIN_OBJ to get the object pointer. */
214 PyObject* tail;
215
216 ElementObjectExtra* extra;
217
Eli Benderskyebf37a22012-04-03 22:02:37 +0300218 PyObject *weakreflist; /* For tp_weaklistoffset */
219
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000220} ElementObject;
221
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000222
Christian Heimes90aa7642007-12-19 02:45:37 +0000223#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000224
225/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200226/* Element constructors and destructor */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000227
228LOCAL(int)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200229create_extra(ElementObject* self, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000230{
231 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
232 if (!self->extra)
233 return -1;
234
235 if (!attrib)
236 attrib = Py_None;
237
238 Py_INCREF(attrib);
239 self->extra->attrib = attrib;
240
241 self->extra->length = 0;
242 self->extra->allocated = STATIC_CHILDREN;
243 self->extra->children = self->extra->_children;
244
245 return 0;
246}
247
248LOCAL(void)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200249dealloc_extra(ElementObject* self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000250{
Eli Bendersky08b85292012-04-04 15:55:07 +0300251 ElementObjectExtra *myextra;
252 int i;
253
Eli Benderskyebf37a22012-04-03 22:02:37 +0300254 if (!self->extra)
255 return;
256
257 /* Avoid DECREFs calling into this code again (cycles, etc.)
258 */
Eli Bendersky08b85292012-04-04 15:55:07 +0300259 myextra = self->extra;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300260 self->extra = NULL;
261
262 Py_DECREF(myextra->attrib);
263
Eli Benderskyebf37a22012-04-03 22:02:37 +0300264 for (i = 0; i < myextra->length; i++)
265 Py_DECREF(myextra->children[i]);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000266
Eli Benderskyebf37a22012-04-03 22:02:37 +0300267 if (myextra->children != myextra->_children)
268 PyObject_Free(myextra->children);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000269
Eli Benderskyebf37a22012-04-03 22:02:37 +0300270 PyObject_Free(myextra);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000271}
272
Eli Bendersky092af1f2012-03-04 07:14:03 +0200273/* Convenience internal function to create new Element objects with the given
274 * tag and attributes.
275*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000276LOCAL(PyObject*)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200277create_new_element(PyObject* tag, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000278{
279 ElementObject* self;
280
Eli Bendersky0192ba32012-03-30 16:38:33 +0300281 self = PyObject_GC_New(ElementObject, &Element_Type);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000282 if (self == NULL)
283 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000284 self->extra = NULL;
285
Eli Bendersky48d358b2012-05-30 17:57:50 +0300286 if (attrib != Py_None && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200287 if (create_extra(self, attrib) < 0) {
Thomas Wouters477c8d52006-05-27 19:21:47 +0000288 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000289 return NULL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000290 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000291 }
292
293 Py_INCREF(tag);
294 self->tag = tag;
295
296 Py_INCREF(Py_None);
297 self->text = Py_None;
298
299 Py_INCREF(Py_None);
300 self->tail = Py_None;
301
Eli Benderskyebf37a22012-04-03 22:02:37 +0300302 self->weakreflist = NULL;
303
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000304 ALLOC(sizeof(ElementObject), "create element");
Eli Bendersky0192ba32012-03-30 16:38:33 +0300305 PyObject_GC_Track(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000306 return (PyObject*) self;
307}
308
Eli Bendersky092af1f2012-03-04 07:14:03 +0200309static PyObject *
310element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
311{
312 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
313 if (e != NULL) {
314 Py_INCREF(Py_None);
315 e->tag = Py_None;
316
317 Py_INCREF(Py_None);
318 e->text = Py_None;
319
320 Py_INCREF(Py_None);
321 e->tail = Py_None;
322
323 e->extra = NULL;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300324 e->weakreflist = NULL;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200325 }
326 return (PyObject *)e;
327}
328
Eli Bendersky737b1732012-05-29 06:02:56 +0300329/* Helper function for extracting the attrib dictionary from a keywords dict.
330 * This is required by some constructors/functions in this module that can
Eli Bendersky45839902013-01-13 05:14:47 -0800331 * either accept attrib as a keyword argument or all attributes splashed
Eli Bendersky737b1732012-05-29 06:02:56 +0300332 * directly into *kwds.
Eli Benderskyd4cb4b72013-04-22 05:25:25 -0700333 *
334 * Return a dictionary with the content of kwds merged into the content of
335 * attrib. If there is no attrib keyword, return a copy of kwds.
Eli Bendersky737b1732012-05-29 06:02:56 +0300336 */
337static PyObject*
338get_attrib_from_keywords(PyObject *kwds)
339{
340 PyObject *attrib_str = PyUnicode_FromString("attrib");
341 PyObject *attrib = PyDict_GetItem(kwds, attrib_str);
342
343 if (attrib) {
344 /* If attrib was found in kwds, copy its value and remove it from
345 * kwds
346 */
347 if (!PyDict_Check(attrib)) {
348 Py_DECREF(attrib_str);
349 PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
350 Py_TYPE(attrib)->tp_name);
351 return NULL;
352 }
353 attrib = PyDict_Copy(attrib);
354 PyDict_DelItem(kwds, attrib_str);
355 } else {
356 attrib = PyDict_New();
357 }
358
359 Py_DECREF(attrib_str);
360
361 if (attrib)
Christian Heimes7ed42942013-07-20 15:12:09 +0200362 if (PyDict_Update(attrib, kwds) < 0)
363 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300364 return attrib;
365}
366
Eli Bendersky092af1f2012-03-04 07:14:03 +0200367static int
368element_init(PyObject *self, PyObject *args, PyObject *kwds)
369{
370 PyObject *tag;
371 PyObject *tmp;
372 PyObject *attrib = NULL;
373 ElementObject *self_elem;
374
375 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
376 return -1;
377
Eli Bendersky737b1732012-05-29 06:02:56 +0300378 if (attrib) {
379 /* attrib passed as positional arg */
380 attrib = PyDict_Copy(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200381 if (!attrib)
382 return -1;
Eli Bendersky737b1732012-05-29 06:02:56 +0300383 if (kwds) {
384 if (PyDict_Update(attrib, kwds) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200385 Py_DECREF(attrib);
Eli Bendersky737b1732012-05-29 06:02:56 +0300386 return -1;
387 }
388 }
389 } else if (kwds) {
390 /* have keywords args */
391 attrib = get_attrib_from_keywords(kwds);
392 if (!attrib)
393 return -1;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200394 }
395
396 self_elem = (ElementObject *)self;
397
Antoine Pitrouc1948842012-10-01 23:40:37 +0200398 if (attrib != NULL && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200399 if (create_extra(self_elem, attrib) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200400 Py_DECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200401 return -1;
402 }
403 }
404
Eli Bendersky48d358b2012-05-30 17:57:50 +0300405 /* We own a reference to attrib here and it's no longer needed. */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200406 Py_XDECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200407
408 /* Replace the objects already pointed to by tag, text and tail. */
409 tmp = self_elem->tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200410 Py_INCREF(tag);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200411 self_elem->tag = tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200412 Py_DECREF(tmp);
413
414 tmp = self_elem->text;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200415 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200416 self_elem->text = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200417 Py_DECREF(JOIN_OBJ(tmp));
418
419 tmp = self_elem->tail;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200420 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200421 self_elem->tail = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200422 Py_DECREF(JOIN_OBJ(tmp));
423
424 return 0;
425}
426
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000427LOCAL(int)
428element_resize(ElementObject* self, int extra)
429{
430 int size;
431 PyObject* *children;
432
433 /* make sure self->children can hold the given number of extra
434 elements. set an exception and return -1 if allocation failed */
435
436 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200437 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000438
439 size = self->extra->length + extra;
440
441 if (size > self->extra->allocated) {
442 /* use Python 2.4's list growth strategy */
443 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000444 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100445 * which needs at least 4 bytes.
446 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000447 * be safe.
448 */
449 size = size ? size : 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000450 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000451 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100452 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000453 * false alarm always assume at least one child to be safe.
454 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000455 children = PyObject_Realloc(self->extra->children,
456 size * sizeof(PyObject*));
457 if (!children)
458 goto nomemory;
459 } else {
460 children = PyObject_Malloc(size * sizeof(PyObject*));
461 if (!children)
462 goto nomemory;
463 /* copy existing children from static area to malloc buffer */
464 memcpy(children, self->extra->children,
465 self->extra->length * sizeof(PyObject*));
466 }
467 self->extra->children = children;
468 self->extra->allocated = size;
469 }
470
471 return 0;
472
473 nomemory:
474 PyErr_NoMemory();
475 return -1;
476}
477
478LOCAL(int)
479element_add_subelement(ElementObject* self, PyObject* element)
480{
481 /* add a child element to a parent */
482
483 if (element_resize(self, 1) < 0)
484 return -1;
485
486 Py_INCREF(element);
487 self->extra->children[self->extra->length] = element;
488
489 self->extra->length++;
490
491 return 0;
492}
493
494LOCAL(PyObject*)
495element_get_attrib(ElementObject* self)
496{
497 /* return borrowed reference to attrib dictionary */
498 /* note: this function assumes that the extra section exists */
499
500 PyObject* res = self->extra->attrib;
501
502 if (res == Py_None) {
503 /* create missing dictionary */
504 res = PyDict_New();
505 if (!res)
506 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200507 Py_DECREF(Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000508 self->extra->attrib = res;
509 }
510
511 return res;
512}
513
514LOCAL(PyObject*)
515element_get_text(ElementObject* self)
516{
517 /* return borrowed reference to text attribute */
518
519 PyObject* res = self->text;
520
521 if (JOIN_GET(res)) {
522 res = JOIN_OBJ(res);
523 if (PyList_CheckExact(res)) {
524 res = list_join(res);
525 if (!res)
526 return NULL;
527 self->text = res;
528 }
529 }
530
531 return res;
532}
533
534LOCAL(PyObject*)
535element_get_tail(ElementObject* self)
536{
537 /* return borrowed reference to text attribute */
538
539 PyObject* res = self->tail;
540
541 if (JOIN_GET(res)) {
542 res = JOIN_OBJ(res);
543 if (PyList_CheckExact(res)) {
544 res = list_join(res);
545 if (!res)
546 return NULL;
547 self->tail = res;
548 }
549 }
550
551 return res;
552}
553
554static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300555subelement(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000556{
557 PyObject* elem;
558
559 ElementObject* parent;
560 PyObject* tag;
561 PyObject* attrib = NULL;
562 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
563 &Element_Type, &parent, &tag,
564 &PyDict_Type, &attrib))
565 return NULL;
566
Eli Bendersky737b1732012-05-29 06:02:56 +0300567 if (attrib) {
568 /* attrib passed as positional arg */
569 attrib = PyDict_Copy(attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000570 if (!attrib)
571 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300572 if (kwds) {
573 if (PyDict_Update(attrib, kwds) < 0) {
574 return NULL;
575 }
576 }
577 } else if (kwds) {
578 /* have keyword args */
579 attrib = get_attrib_from_keywords(kwds);
580 if (!attrib)
581 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000582 } else {
Eli Bendersky737b1732012-05-29 06:02:56 +0300583 /* no attrib arg, no kwds, so no attribute */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000584 Py_INCREF(Py_None);
585 attrib = Py_None;
586 }
587
Eli Bendersky092af1f2012-03-04 07:14:03 +0200588 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000589
590 Py_DECREF(attrib);
591
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000592 if (element_add_subelement(parent, elem) < 0) {
593 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000594 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000595 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000596
597 return elem;
598}
599
Eli Bendersky0192ba32012-03-30 16:38:33 +0300600static int
601element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
602{
603 Py_VISIT(self->tag);
604 Py_VISIT(JOIN_OBJ(self->text));
605 Py_VISIT(JOIN_OBJ(self->tail));
606
607 if (self->extra) {
608 int i;
609 Py_VISIT(self->extra->attrib);
610
611 for (i = 0; i < self->extra->length; ++i)
612 Py_VISIT(self->extra->children[i]);
613 }
614 return 0;
615}
616
617static int
618element_gc_clear(ElementObject *self)
619{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300620 Py_CLEAR(self->tag);
Eli Benderskydd3661e2013-09-13 06:24:25 -0700621 _clear_joined_ptr(&self->text);
622 _clear_joined_ptr(&self->tail);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300623
624 /* After dropping all references from extra, it's no longer valid anyway,
Eli Benderskyebf37a22012-04-03 22:02:37 +0300625 * so fully deallocate it.
Eli Bendersky0192ba32012-03-30 16:38:33 +0300626 */
Eli Benderskyebf37a22012-04-03 22:02:37 +0300627 dealloc_extra(self);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300628 return 0;
629}
630
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000631static void
632element_dealloc(ElementObject* self)
633{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300634 PyObject_GC_UnTrack(self);
Eli Benderskyebf37a22012-04-03 22:02:37 +0300635
636 if (self->weakreflist != NULL)
637 PyObject_ClearWeakRefs((PyObject *) self);
638
Eli Bendersky0192ba32012-03-30 16:38:33 +0300639 /* element_gc_clear clears all references and deallocates extra
640 */
641 element_gc_clear(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000642
643 RELEASE(sizeof(ElementObject), "destroy element");
Eli Bendersky092af1f2012-03-04 07:14:03 +0200644 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000645}
646
647/* -------------------------------------------------------------------- */
648/* methods (in alphabetical order) */
649
650static PyObject*
651element_append(ElementObject* self, PyObject* args)
652{
653 PyObject* element;
654 if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element))
655 return NULL;
656
657 if (element_add_subelement(self, element) < 0)
658 return NULL;
659
660 Py_RETURN_NONE;
661}
662
663static PyObject*
Eli Bendersky0192ba32012-03-30 16:38:33 +0300664element_clearmethod(ElementObject* self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000665{
666 if (!PyArg_ParseTuple(args, ":clear"))
667 return NULL;
668
Eli Benderskyebf37a22012-04-03 22:02:37 +0300669 dealloc_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000670
671 Py_INCREF(Py_None);
672 Py_DECREF(JOIN_OBJ(self->text));
673 self->text = Py_None;
674
675 Py_INCREF(Py_None);
676 Py_DECREF(JOIN_OBJ(self->tail));
677 self->tail = Py_None;
678
679 Py_RETURN_NONE;
680}
681
682static PyObject*
683element_copy(ElementObject* self, PyObject* args)
684{
685 int i;
686 ElementObject* element;
687
688 if (!PyArg_ParseTuple(args, ":__copy__"))
689 return NULL;
690
Eli Bendersky092af1f2012-03-04 07:14:03 +0200691 element = (ElementObject*) create_new_element(
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000692 self->tag, (self->extra) ? self->extra->attrib : Py_None
693 );
694 if (!element)
695 return NULL;
696
697 Py_DECREF(JOIN_OBJ(element->text));
698 element->text = self->text;
699 Py_INCREF(JOIN_OBJ(element->text));
700
701 Py_DECREF(JOIN_OBJ(element->tail));
702 element->tail = self->tail;
703 Py_INCREF(JOIN_OBJ(element->tail));
704
705 if (self->extra) {
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100706
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000707 if (element_resize(element, self->extra->length) < 0) {
708 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000709 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000710 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000711
712 for (i = 0; i < self->extra->length; i++) {
713 Py_INCREF(self->extra->children[i]);
714 element->extra->children[i] = self->extra->children[i];
715 }
716
717 element->extra->length = self->extra->length;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100718
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000719 }
720
721 return (PyObject*) element;
722}
723
724static PyObject*
725element_deepcopy(ElementObject* self, PyObject* args)
726{
727 int i;
728 ElementObject* element;
729 PyObject* tag;
730 PyObject* attrib;
731 PyObject* text;
732 PyObject* tail;
733 PyObject* id;
734
735 PyObject* memo;
736 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
737 return NULL;
738
739 tag = deepcopy(self->tag, memo);
740 if (!tag)
741 return NULL;
742
743 if (self->extra) {
744 attrib = deepcopy(self->extra->attrib, memo);
745 if (!attrib) {
746 Py_DECREF(tag);
747 return NULL;
748 }
749 } else {
750 Py_INCREF(Py_None);
751 attrib = Py_None;
752 }
753
Eli Bendersky092af1f2012-03-04 07:14:03 +0200754 element = (ElementObject*) create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000755
756 Py_DECREF(tag);
757 Py_DECREF(attrib);
758
759 if (!element)
760 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100761
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000762 text = deepcopy(JOIN_OBJ(self->text), memo);
763 if (!text)
764 goto error;
765 Py_DECREF(element->text);
766 element->text = JOIN_SET(text, JOIN_GET(self->text));
767
768 tail = deepcopy(JOIN_OBJ(self->tail), memo);
769 if (!tail)
770 goto error;
771 Py_DECREF(element->tail);
772 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
773
774 if (self->extra) {
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100775
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000776 if (element_resize(element, self->extra->length) < 0)
777 goto error;
778
779 for (i = 0; i < self->extra->length; i++) {
780 PyObject* child = deepcopy(self->extra->children[i], memo);
781 if (!child) {
782 element->extra->length = i;
783 goto error;
784 }
785 element->extra->children[i] = child;
786 }
787
788 element->extra->length = self->extra->length;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100789
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000790 }
791
792 /* add object to memo dictionary (so deepcopy won't visit it again) */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200793 id = PyLong_FromSsize_t((Py_uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000794 if (!id)
795 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000796
797 i = PyDict_SetItem(memo, id, (PyObject*) element);
798
799 Py_DECREF(id);
800
801 if (i < 0)
802 goto error;
803
804 return (PyObject*) element;
805
806 error:
807 Py_DECREF(element);
808 return NULL;
809}
810
Martin v. Löwisbce16662012-06-17 10:41:22 +0200811static PyObject*
Antoine Pitrou09fcb722013-10-23 19:20:21 +0200812element_sizeof(PyObject* myself, PyObject* args)
Martin v. Löwisbce16662012-06-17 10:41:22 +0200813{
Antoine Pitrou09fcb722013-10-23 19:20:21 +0200814 ElementObject *self = (ElementObject*)myself;
Martin v. Löwisbce16662012-06-17 10:41:22 +0200815 Py_ssize_t result = sizeof(ElementObject);
816 if (self->extra) {
817 result += sizeof(ElementObjectExtra);
818 if (self->extra->children != self->extra->_children)
819 result += sizeof(PyObject*) * self->extra->allocated;
820 }
821 return PyLong_FromSsize_t(result);
822}
823
Eli Bendersky698bdb22013-01-10 06:01:06 -0800824/* dict keys for getstate/setstate. */
825#define PICKLED_TAG "tag"
826#define PICKLED_CHILDREN "_children"
827#define PICKLED_ATTRIB "attrib"
828#define PICKLED_TAIL "tail"
829#define PICKLED_TEXT "text"
830
831/* __getstate__ returns a fabricated instance dict as in the pure-Python
832 * Element implementation, for interoperability/interchangeability. This
833 * makes the pure-Python implementation details an API, but (a) there aren't
834 * any unnecessary structures there; and (b) it buys compatibility with 3.2
835 * pickles. See issue #16076.
836 */
837static PyObject *
838element_getstate(ElementObject *self)
839{
840 int i, noattrib;
841 PyObject *instancedict = NULL, *children;
842
843 /* Build a list of children. */
844 children = PyList_New(self->extra ? self->extra->length : 0);
845 if (!children)
846 return NULL;
847 for (i = 0; i < PyList_GET_SIZE(children); i++) {
848 PyObject *child = self->extra->children[i];
849 Py_INCREF(child);
850 PyList_SET_ITEM(children, i, child);
851 }
852
853 /* Construct the state object. */
854 noattrib = (self->extra == NULL || self->extra->attrib == Py_None);
855 if (noattrib)
856 instancedict = Py_BuildValue("{sOsOs{}sOsO}",
857 PICKLED_TAG, self->tag,
858 PICKLED_CHILDREN, children,
859 PICKLED_ATTRIB,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700860 PICKLED_TEXT, JOIN_OBJ(self->text),
861 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800862 else
863 instancedict = Py_BuildValue("{sOsOsOsOsO}",
864 PICKLED_TAG, self->tag,
865 PICKLED_CHILDREN, children,
866 PICKLED_ATTRIB, self->extra->attrib,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700867 PICKLED_TEXT, JOIN_OBJ(self->text),
868 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800869 if (instancedict) {
870 Py_DECREF(children);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800871 return instancedict;
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800872 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800873 else {
874 for (i = 0; i < PyList_GET_SIZE(children); i++)
875 Py_DECREF(PyList_GET_ITEM(children, i));
876 Py_DECREF(children);
877
878 return NULL;
879 }
880}
881
882static PyObject *
883element_setstate_from_attributes(ElementObject *self,
884 PyObject *tag,
885 PyObject *attrib,
886 PyObject *text,
887 PyObject *tail,
888 PyObject *children)
889{
890 Py_ssize_t i, nchildren;
891
892 if (!tag) {
893 PyErr_SetString(PyExc_TypeError, "tag may not be NULL");
894 return NULL;
895 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800896
897 Py_CLEAR(self->tag);
898 self->tag = tag;
899 Py_INCREF(self->tag);
900
Eli Benderskydd3661e2013-09-13 06:24:25 -0700901 _clear_joined_ptr(&self->text);
902 self->text = text ? JOIN_SET(text, PyList_CheckExact(text)) : Py_None;
903 Py_INCREF(JOIN_OBJ(self->text));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800904
Eli Benderskydd3661e2013-09-13 06:24:25 -0700905 _clear_joined_ptr(&self->tail);
906 self->tail = tail ? JOIN_SET(tail, PyList_CheckExact(tail)) : Py_None;
907 Py_INCREF(JOIN_OBJ(self->tail));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800908
909 /* Handle ATTRIB and CHILDREN. */
910 if (!children && !attrib)
911 Py_RETURN_NONE;
912
913 /* Compute 'nchildren'. */
914 if (children) {
915 if (!PyList_Check(children)) {
916 PyErr_SetString(PyExc_TypeError, "'_children' is not a list");
917 return NULL;
918 }
919 nchildren = PyList_Size(children);
920 }
921 else {
922 nchildren = 0;
923 }
924
925 /* Allocate 'extra'. */
926 if (element_resize(self, nchildren)) {
927 return NULL;
928 }
929 assert(self->extra && self->extra->allocated >= nchildren);
930
931 /* Copy children */
932 for (i = 0; i < nchildren; i++) {
933 self->extra->children[i] = PyList_GET_ITEM(children, i);
934 Py_INCREF(self->extra->children[i]);
935 }
936
937 self->extra->length = nchildren;
938 self->extra->allocated = nchildren;
939
940 /* Stash attrib. */
941 if (attrib) {
942 Py_CLEAR(self->extra->attrib);
943 self->extra->attrib = attrib;
944 Py_INCREF(attrib);
945 }
946
947 Py_RETURN_NONE;
948}
949
950/* __setstate__ for Element instance from the Python implementation.
951 * 'state' should be the instance dict.
952 */
953static PyObject *
954element_setstate_from_Python(ElementObject *self, PyObject *state)
955{
956 static char *kwlist[] = {PICKLED_TAG, PICKLED_ATTRIB, PICKLED_TEXT,
957 PICKLED_TAIL, PICKLED_CHILDREN, 0};
958 PyObject *args;
959 PyObject *tag, *attrib, *text, *tail, *children;
Eli Bendersky799e3ed2013-01-12 05:42:38 -0800960 PyObject *retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800961
Eli Bendersky698bdb22013-01-10 06:01:06 -0800962 tag = attrib = text = tail = children = NULL;
963 args = PyTuple_New(0);
Eli Bendersky799e3ed2013-01-12 05:42:38 -0800964 if (!args)
Eli Bendersky698bdb22013-01-10 06:01:06 -0800965 return NULL;
Eli Bendersky799e3ed2013-01-12 05:42:38 -0800966
967 if (PyArg_ParseTupleAndKeywords(args, state, "|$OOOOO", kwlist, &tag,
968 &attrib, &text, &tail, &children))
969 retval = element_setstate_from_attributes(self, tag, attrib, text,
970 tail, children);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800971 else
Eli Bendersky799e3ed2013-01-12 05:42:38 -0800972 retval = NULL;
973
974 Py_DECREF(args);
975 return retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800976}
977
978static PyObject *
979element_setstate(ElementObject *self, PyObject *state)
980{
981 if (!PyDict_CheckExact(state)) {
982 PyErr_Format(PyExc_TypeError,
983 "Don't know how to unpickle \"%.200R\" as an Element",
984 state);
985 return NULL;
986 }
987 else
988 return element_setstate_from_Python(self, state);
989}
990
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000991LOCAL(int)
992checkpath(PyObject* tag)
993{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000994 Py_ssize_t i;
995 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000996
997 /* check if a tag contains an xpath character */
998
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000999#define PATHCHAR(ch) \
1000 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001001
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001002 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001003 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
1004 void *data = PyUnicode_DATA(tag);
1005 unsigned int kind = PyUnicode_KIND(tag);
1006 for (i = 0; i < len; i++) {
1007 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1008 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001009 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001010 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001011 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001012 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001013 return 1;
1014 }
1015 return 0;
1016 }
Christian Heimes72b710a2008-05-26 13:28:38 +00001017 if (PyBytes_Check(tag)) {
1018 char *p = PyBytes_AS_STRING(tag);
1019 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001020 if (p[i] == '{')
1021 check = 0;
1022 else if (p[i] == '}')
1023 check = 1;
1024 else if (check && PATHCHAR(p[i]))
1025 return 1;
1026 }
1027 return 0;
1028 }
1029
1030 return 1; /* unknown type; might be path expression */
1031}
1032
1033static PyObject*
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001034element_extend(ElementObject* self, PyObject* args)
1035{
1036 PyObject* seq;
1037 Py_ssize_t i, seqlen = 0;
1038
1039 PyObject* seq_in;
1040 if (!PyArg_ParseTuple(args, "O:extend", &seq_in))
1041 return NULL;
1042
1043 seq = PySequence_Fast(seq_in, "");
1044 if (!seq) {
1045 PyErr_Format(
1046 PyExc_TypeError,
1047 "expected sequence, not \"%.200s\"", Py_TYPE(seq_in)->tp_name
1048 );
1049 return NULL;
1050 }
1051
1052 seqlen = PySequence_Size(seq);
1053 for (i = 0; i < seqlen; i++) {
1054 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001055 if (!PyObject_IsInstance(element, (PyObject *)&Element_Type)) {
1056 Py_DECREF(seq);
1057 PyErr_Format(
1058 PyExc_TypeError,
1059 "expected an Element, not \"%.200s\"",
1060 Py_TYPE(element)->tp_name);
1061 return NULL;
1062 }
1063
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001064 if (element_add_subelement(self, element) < 0) {
1065 Py_DECREF(seq);
1066 return NULL;
1067 }
1068 }
1069
1070 Py_DECREF(seq);
1071
1072 Py_RETURN_NONE;
1073}
1074
1075static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001076element_find(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001077{
1078 int i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001079 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001080 PyObject* namespaces = Py_None;
Eli Bendersky737b1732012-05-29 06:02:56 +03001081 static char *kwlist[] = {"path", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001082
Eli Bendersky737b1732012-05-29 06:02:56 +03001083 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:find", kwlist,
1084 &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001085 return NULL;
1086
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001087 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001088 _Py_IDENTIFIER(find);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001089 return _PyObject_CallMethodId(
1090 elementpath_obj, &PyId_find, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001091 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001092 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001093
1094 if (!self->extra)
1095 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001096
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001097 for (i = 0; i < self->extra->length; i++) {
1098 PyObject* item = self->extra->children[i];
1099 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +00001100 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001101 Py_INCREF(item);
1102 return item;
1103 }
1104 }
1105
1106 Py_RETURN_NONE;
1107}
1108
1109static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001110element_findtext(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001111{
1112 int i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001113 PyObject* tag;
1114 PyObject* default_value = Py_None;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001115 PyObject* namespaces = Py_None;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001116 _Py_IDENTIFIER(findtext);
Eli Bendersky737b1732012-05-29 06:02:56 +03001117 static char *kwlist[] = {"path", "default", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001118
Eli Bendersky737b1732012-05-29 06:02:56 +03001119 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO:findtext", kwlist,
1120 &tag, &default_value, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001121 return NULL;
1122
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001123 if (checkpath(tag) || namespaces != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001124 return _PyObject_CallMethodId(
1125 elementpath_obj, &PyId_findtext, "OOOO", self, tag, default_value, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001126 );
1127
1128 if (!self->extra) {
1129 Py_INCREF(default_value);
1130 return default_value;
1131 }
1132
1133 for (i = 0; i < self->extra->length; i++) {
1134 ElementObject* item = (ElementObject*) self->extra->children[i];
Mark Dickinson211c6252009-02-01 10:28:51 +00001135 if (Element_CheckExact(item) && (PyObject_RichCompareBool(item->tag, tag, Py_EQ) == 1)) {
1136
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001137 PyObject* text = element_get_text(item);
1138 if (text == Py_None)
Eli Bendersky25771b32013-01-13 05:26:07 -08001139 return PyUnicode_New(0, 0);
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001140 Py_XINCREF(text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001141 return text;
1142 }
1143 }
1144
1145 Py_INCREF(default_value);
1146 return default_value;
1147}
1148
1149static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001150element_findall(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001151{
1152 int i;
1153 PyObject* out;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001154 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001155 PyObject* namespaces = Py_None;
Eli Bendersky737b1732012-05-29 06:02:56 +03001156 static char *kwlist[] = {"path", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001157
Eli Bendersky737b1732012-05-29 06:02:56 +03001158 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:findall", kwlist,
1159 &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001160 return NULL;
1161
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001162 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001163 _Py_IDENTIFIER(findall);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001164 return _PyObject_CallMethodId(
1165 elementpath_obj, &PyId_findall, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001166 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001167 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001168
1169 out = PyList_New(0);
1170 if (!out)
1171 return NULL;
1172
1173 if (!self->extra)
1174 return out;
1175
1176 for (i = 0; i < self->extra->length; i++) {
1177 PyObject* item = self->extra->children[i];
1178 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +00001179 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001180 if (PyList_Append(out, item) < 0) {
1181 Py_DECREF(out);
1182 return NULL;
1183 }
1184 }
1185 }
1186
1187 return out;
1188}
1189
1190static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001191element_iterfind(ElementObject *self, PyObject *args, PyObject *kwds)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001192{
1193 PyObject* tag;
1194 PyObject* namespaces = Py_None;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001195 _Py_IDENTIFIER(iterfind);
Eli Bendersky737b1732012-05-29 06:02:56 +03001196 static char *kwlist[] = {"path", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001197
Eli Bendersky737b1732012-05-29 06:02:56 +03001198 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:iterfind", kwlist,
1199 &tag, &namespaces))
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001200 return NULL;
1201
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001202 return _PyObject_CallMethodId(
1203 elementpath_obj, &PyId_iterfind, "OOO", self, tag, namespaces
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001204 );
1205}
1206
1207static PyObject*
Eli Benderskya8736902013-01-05 06:26:39 -08001208element_get(ElementObject* self, PyObject* args, PyObject* kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001209{
1210 PyObject* value;
Eli Benderskya8736902013-01-05 06:26:39 -08001211 static char* kwlist[] = {"key", "default", 0};
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001212
1213 PyObject* key;
1214 PyObject* default_value = Py_None;
Eli Benderskya8736902013-01-05 06:26:39 -08001215
1216 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:get", kwlist, &key,
1217 &default_value))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001218 return NULL;
1219
1220 if (!self->extra || self->extra->attrib == Py_None)
1221 value = default_value;
1222 else {
1223 value = PyDict_GetItem(self->extra->attrib, key);
1224 if (!value)
1225 value = default_value;
1226 }
1227
1228 Py_INCREF(value);
1229 return value;
1230}
1231
1232static PyObject*
1233element_getchildren(ElementObject* self, PyObject* args)
1234{
1235 int i;
1236 PyObject* list;
1237
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001238 /* FIXME: report as deprecated? */
1239
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001240 if (!PyArg_ParseTuple(args, ":getchildren"))
1241 return NULL;
1242
1243 if (!self->extra)
1244 return PyList_New(0);
1245
1246 list = PyList_New(self->extra->length);
1247 if (!list)
1248 return NULL;
1249
1250 for (i = 0; i < self->extra->length; i++) {
1251 PyObject* item = self->extra->children[i];
1252 Py_INCREF(item);
1253 PyList_SET_ITEM(list, i, item);
1254 }
1255
1256 return list;
1257}
1258
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001259
Eli Bendersky64d11e62012-06-15 07:42:50 +03001260static PyObject *
1261create_elementiter(ElementObject *self, PyObject *tag, int gettext);
1262
1263
1264static PyObject *
Eli Benderskya8736902013-01-05 06:26:39 -08001265element_iter(ElementObject *self, PyObject *args, PyObject *kwds)
Eli Bendersky64d11e62012-06-15 07:42:50 +03001266{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001267 PyObject* tag = Py_None;
Eli Benderskya8736902013-01-05 06:26:39 -08001268 static char* kwlist[] = {"tag", 0};
1269
1270 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:iter", kwlist, &tag))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001271 return NULL;
1272
Eli Bendersky64d11e62012-06-15 07:42:50 +03001273 return create_elementiter(self, tag, 0);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001274}
1275
1276
1277static PyObject*
1278element_itertext(ElementObject* self, PyObject* args)
1279{
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001280 if (!PyArg_ParseTuple(args, ":itertext"))
1281 return NULL;
1282
Eli Bendersky64d11e62012-06-15 07:42:50 +03001283 return create_elementiter(self, Py_None, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001284}
1285
Eli Bendersky64d11e62012-06-15 07:42:50 +03001286
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001287static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001288element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001289{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001290 ElementObject* self = (ElementObject*) self_;
1291
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001292 if (!self->extra || index < 0 || index >= self->extra->length) {
1293 PyErr_SetString(
1294 PyExc_IndexError,
1295 "child index out of range"
1296 );
1297 return NULL;
1298 }
1299
1300 Py_INCREF(self->extra->children[index]);
1301 return self->extra->children[index];
1302}
1303
1304static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001305element_insert(ElementObject* self, PyObject* args)
1306{
1307 int i;
1308
1309 int index;
1310 PyObject* element;
1311 if (!PyArg_ParseTuple(args, "iO!:insert", &index,
1312 &Element_Type, &element))
1313 return NULL;
1314
1315 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001316 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001317
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001318 if (index < 0) {
1319 index += self->extra->length;
1320 if (index < 0)
1321 index = 0;
1322 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001323 if (index > self->extra->length)
1324 index = self->extra->length;
1325
1326 if (element_resize(self, 1) < 0)
1327 return NULL;
1328
1329 for (i = self->extra->length; i > index; i--)
1330 self->extra->children[i] = self->extra->children[i-1];
1331
1332 Py_INCREF(element);
1333 self->extra->children[index] = element;
1334
1335 self->extra->length++;
1336
1337 Py_RETURN_NONE;
1338}
1339
1340static PyObject*
1341element_items(ElementObject* self, PyObject* args)
1342{
1343 if (!PyArg_ParseTuple(args, ":items"))
1344 return NULL;
1345
1346 if (!self->extra || self->extra->attrib == Py_None)
1347 return PyList_New(0);
1348
1349 return PyDict_Items(self->extra->attrib);
1350}
1351
1352static PyObject*
1353element_keys(ElementObject* self, PyObject* args)
1354{
1355 if (!PyArg_ParseTuple(args, ":keys"))
1356 return NULL;
1357
1358 if (!self->extra || self->extra->attrib == Py_None)
1359 return PyList_New(0);
1360
1361 return PyDict_Keys(self->extra->attrib);
1362}
1363
Martin v. Löwis18e16552006-02-15 17:27:45 +00001364static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001365element_length(ElementObject* self)
1366{
1367 if (!self->extra)
1368 return 0;
1369
1370 return self->extra->length;
1371}
1372
1373static PyObject*
1374element_makeelement(PyObject* self, PyObject* args, PyObject* kw)
1375{
1376 PyObject* elem;
1377
1378 PyObject* tag;
1379 PyObject* attrib;
1380 if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib))
1381 return NULL;
1382
1383 attrib = PyDict_Copy(attrib);
1384 if (!attrib)
1385 return NULL;
1386
Eli Bendersky092af1f2012-03-04 07:14:03 +02001387 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001388
1389 Py_DECREF(attrib);
1390
1391 return elem;
1392}
1393
1394static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001395element_remove(ElementObject* self, PyObject* args)
1396{
1397 int i;
1398
1399 PyObject* element;
1400 if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element))
1401 return NULL;
1402
1403 if (!self->extra) {
1404 /* element has no children, so raise exception */
1405 PyErr_SetString(
1406 PyExc_ValueError,
1407 "list.remove(x): x not in list"
1408 );
1409 return NULL;
1410 }
1411
1412 for (i = 0; i < self->extra->length; i++) {
1413 if (self->extra->children[i] == element)
1414 break;
Mark Dickinson211c6252009-02-01 10:28:51 +00001415 if (PyObject_RichCompareBool(self->extra->children[i], element, Py_EQ) == 1)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001416 break;
1417 }
1418
1419 if (i == self->extra->length) {
1420 /* element is not in children, so raise exception */
1421 PyErr_SetString(
1422 PyExc_ValueError,
1423 "list.remove(x): x not in list"
1424 );
1425 return NULL;
1426 }
1427
1428 Py_DECREF(self->extra->children[i]);
1429
1430 self->extra->length--;
1431
1432 for (; i < self->extra->length; i++)
1433 self->extra->children[i] = self->extra->children[i+1];
1434
1435 Py_RETURN_NONE;
1436}
1437
1438static PyObject*
1439element_repr(ElementObject* self)
1440{
Eli Bendersky092af1f2012-03-04 07:14:03 +02001441 if (self->tag)
1442 return PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1443 else
1444 return PyUnicode_FromFormat("<Element at %p>", self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001445}
1446
1447static PyObject*
1448element_set(ElementObject* self, PyObject* args)
1449{
1450 PyObject* attrib;
1451
1452 PyObject* key;
1453 PyObject* value;
1454 if (!PyArg_ParseTuple(args, "OO:set", &key, &value))
1455 return NULL;
1456
1457 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001458 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001459
1460 attrib = element_get_attrib(self);
1461 if (!attrib)
1462 return NULL;
1463
1464 if (PyDict_SetItem(attrib, key, value) < 0)
1465 return NULL;
1466
1467 Py_RETURN_NONE;
1468}
1469
1470static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001471element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001472{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001473 ElementObject* self = (ElementObject*) self_;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001474 int i;
1475 PyObject* old;
1476
1477 if (!self->extra || index < 0 || index >= self->extra->length) {
1478 PyErr_SetString(
1479 PyExc_IndexError,
1480 "child assignment index out of range");
1481 return -1;
1482 }
1483
1484 old = self->extra->children[index];
1485
1486 if (item) {
1487 Py_INCREF(item);
1488 self->extra->children[index] = item;
1489 } else {
1490 self->extra->length--;
1491 for (i = index; i < self->extra->length; i++)
1492 self->extra->children[i] = self->extra->children[i+1];
1493 }
1494
1495 Py_DECREF(old);
1496
1497 return 0;
1498}
1499
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001500static PyObject*
1501element_subscr(PyObject* self_, PyObject* item)
1502{
1503 ElementObject* self = (ElementObject*) self_;
1504
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001505 if (PyIndex_Check(item)) {
1506 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001507
1508 if (i == -1 && PyErr_Occurred()) {
1509 return NULL;
1510 }
1511 if (i < 0 && self->extra)
1512 i += self->extra->length;
1513 return element_getitem(self_, i);
1514 }
1515 else if (PySlice_Check(item)) {
1516 Py_ssize_t start, stop, step, slicelen, cur, i;
1517 PyObject* list;
1518
1519 if (!self->extra)
1520 return PyList_New(0);
1521
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001522 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001523 self->extra->length,
1524 &start, &stop, &step, &slicelen) < 0) {
1525 return NULL;
1526 }
1527
1528 if (slicelen <= 0)
1529 return PyList_New(0);
1530 else {
1531 list = PyList_New(slicelen);
1532 if (!list)
1533 return NULL;
1534
1535 for (cur = start, i = 0; i < slicelen;
1536 cur += step, i++) {
1537 PyObject* item = self->extra->children[cur];
1538 Py_INCREF(item);
1539 PyList_SET_ITEM(list, i, item);
1540 }
1541
1542 return list;
1543 }
1544 }
1545 else {
1546 PyErr_SetString(PyExc_TypeError,
1547 "element indices must be integers");
1548 return NULL;
1549 }
1550}
1551
1552static int
1553element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1554{
1555 ElementObject* self = (ElementObject*) self_;
1556
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001557 if (PyIndex_Check(item)) {
1558 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001559
1560 if (i == -1 && PyErr_Occurred()) {
1561 return -1;
1562 }
1563 if (i < 0 && self->extra)
1564 i += self->extra->length;
1565 return element_setitem(self_, i, value);
1566 }
1567 else if (PySlice_Check(item)) {
1568 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1569
1570 PyObject* recycle = NULL;
1571 PyObject* seq = NULL;
1572
1573 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001574 create_extra(self, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001575
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001576 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001577 self->extra->length,
1578 &start, &stop, &step, &slicelen) < 0) {
1579 return -1;
1580 }
1581
Eli Bendersky865756a2012-03-09 13:38:15 +02001582 if (value == NULL) {
1583 /* Delete slice */
1584 size_t cur;
1585 Py_ssize_t i;
1586
1587 if (slicelen <= 0)
1588 return 0;
1589
1590 /* Since we're deleting, the direction of the range doesn't matter,
1591 * so for simplicity make it always ascending.
1592 */
1593 if (step < 0) {
1594 stop = start + 1;
1595 start = stop + step * (slicelen - 1) - 1;
1596 step = -step;
1597 }
1598
1599 assert((size_t)slicelen <= PY_SIZE_MAX / sizeof(PyObject *));
1600
1601 /* recycle is a list that will contain all the children
1602 * scheduled for removal.
1603 */
1604 if (!(recycle = PyList_New(slicelen))) {
1605 PyErr_NoMemory();
1606 return -1;
1607 }
1608
1609 /* This loop walks over all the children that have to be deleted,
1610 * with cur pointing at them. num_moved is the amount of children
1611 * until the next deleted child that have to be "shifted down" to
1612 * occupy the deleted's places.
1613 * Note that in the ith iteration, shifting is done i+i places down
1614 * because i children were already removed.
1615 */
1616 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1617 /* Compute how many children have to be moved, clipping at the
1618 * list end.
1619 */
1620 Py_ssize_t num_moved = step - 1;
1621 if (cur + step >= (size_t)self->extra->length) {
1622 num_moved = self->extra->length - cur - 1;
1623 }
1624
1625 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1626
1627 memmove(
1628 self->extra->children + cur - i,
1629 self->extra->children + cur + 1,
1630 num_moved * sizeof(PyObject *));
1631 }
1632
1633 /* Leftover "tail" after the last removed child */
1634 cur = start + (size_t)slicelen * step;
1635 if (cur < (size_t)self->extra->length) {
1636 memmove(
1637 self->extra->children + cur - slicelen,
1638 self->extra->children + cur,
1639 (self->extra->length - cur) * sizeof(PyObject *));
1640 }
1641
1642 self->extra->length -= slicelen;
1643
1644 /* Discard the recycle list with all the deleted sub-elements */
1645 Py_XDECREF(recycle);
1646 return 0;
1647 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001648 else {
Eli Bendersky865756a2012-03-09 13:38:15 +02001649 /* A new slice is actually being assigned */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001650 seq = PySequence_Fast(value, "");
1651 if (!seq) {
1652 PyErr_Format(
1653 PyExc_TypeError,
1654 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1655 );
1656 return -1;
1657 }
1658 newlen = PySequence_Size(seq);
1659 }
1660
1661 if (step != 1 && newlen != slicelen)
1662 {
1663 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001664 "attempt to assign sequence of size %zd "
1665 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001666 newlen, slicelen
1667 );
1668 return -1;
1669 }
1670
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001671 /* Resize before creating the recycle bin, to prevent refleaks. */
1672 if (newlen > slicelen) {
1673 if (element_resize(self, newlen - slicelen) < 0) {
1674 if (seq) {
1675 Py_DECREF(seq);
1676 }
1677 return -1;
1678 }
1679 }
1680
1681 if (slicelen > 0) {
1682 /* to avoid recursive calls to this method (via decref), move
1683 old items to the recycle bin here, and get rid of them when
1684 we're done modifying the element */
1685 recycle = PyList_New(slicelen);
1686 if (!recycle) {
1687 if (seq) {
1688 Py_DECREF(seq);
1689 }
1690 return -1;
1691 }
1692 for (cur = start, i = 0; i < slicelen;
1693 cur += step, i++)
1694 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1695 }
1696
1697 if (newlen < slicelen) {
1698 /* delete slice */
1699 for (i = stop; i < self->extra->length; i++)
1700 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1701 } else if (newlen > slicelen) {
1702 /* insert slice */
1703 for (i = self->extra->length-1; i >= stop; i--)
1704 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1705 }
1706
1707 /* replace the slice */
1708 for (cur = start, i = 0; i < newlen;
1709 cur += step, i++) {
1710 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1711 Py_INCREF(element);
1712 self->extra->children[cur] = element;
1713 }
1714
1715 self->extra->length += newlen - slicelen;
1716
1717 if (seq) {
1718 Py_DECREF(seq);
1719 }
1720
1721 /* discard the recycle bin, and everything in it */
1722 Py_XDECREF(recycle);
1723
1724 return 0;
1725 }
1726 else {
1727 PyErr_SetString(PyExc_TypeError,
1728 "element indices must be integers");
1729 return -1;
1730 }
1731}
1732
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001733static PyMethodDef element_methods[] = {
1734
Eli Bendersky0192ba32012-03-30 16:38:33 +03001735 {"clear", (PyCFunction) element_clearmethod, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001736
Eli Benderskya8736902013-01-05 06:26:39 -08001737 {"get", (PyCFunction) element_get, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001738 {"set", (PyCFunction) element_set, METH_VARARGS},
1739
Eli Bendersky737b1732012-05-29 06:02:56 +03001740 {"find", (PyCFunction) element_find, METH_VARARGS | METH_KEYWORDS},
1741 {"findtext", (PyCFunction) element_findtext, METH_VARARGS | METH_KEYWORDS},
1742 {"findall", (PyCFunction) element_findall, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001743
1744 {"append", (PyCFunction) element_append, METH_VARARGS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001745 {"extend", (PyCFunction) element_extend, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001746 {"insert", (PyCFunction) element_insert, METH_VARARGS},
1747 {"remove", (PyCFunction) element_remove, METH_VARARGS},
1748
Eli Benderskya8736902013-01-05 06:26:39 -08001749 {"iter", (PyCFunction) element_iter, METH_VARARGS | METH_KEYWORDS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001750 {"itertext", (PyCFunction) element_itertext, METH_VARARGS},
Eli Bendersky737b1732012-05-29 06:02:56 +03001751 {"iterfind", (PyCFunction) element_iterfind, METH_VARARGS | METH_KEYWORDS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001752
Eli Benderskya8736902013-01-05 06:26:39 -08001753 {"getiterator", (PyCFunction) element_iter, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001754 {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS},
1755
1756 {"items", (PyCFunction) element_items, METH_VARARGS},
1757 {"keys", (PyCFunction) element_keys, METH_VARARGS},
1758
1759 {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS},
1760
1761 {"__copy__", (PyCFunction) element_copy, METH_VARARGS},
1762 {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS},
Martin v. Löwisbce16662012-06-17 10:41:22 +02001763 {"__sizeof__", element_sizeof, METH_NOARGS},
Eli Bendersky698bdb22013-01-10 06:01:06 -08001764 {"__getstate__", (PyCFunction)element_getstate, METH_NOARGS},
1765 {"__setstate__", (PyCFunction)element_setstate, METH_O},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001766
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001767 {NULL, NULL}
1768};
1769
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001770static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001771element_getattro(ElementObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001772{
1773 PyObject* res;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001774 char *name = "";
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001775
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001776 if (PyUnicode_Check(nameobj))
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001777 name = _PyUnicode_AsString(nameobj);
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001778
Alexander Belopolskye239d232010-12-08 23:31:48 +00001779 if (name == NULL)
1780 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001781
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001782 /* handle common attributes first */
1783 if (strcmp(name, "tag") == 0) {
1784 res = self->tag;
1785 Py_INCREF(res);
1786 return res;
1787 } else if (strcmp(name, "text") == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001788 res = element_get_text(self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001789 Py_INCREF(res);
1790 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001791 }
1792
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001793 /* methods */
1794 res = PyObject_GenericGetAttr((PyObject*) self, nameobj);
1795 if (res)
1796 return res;
1797
1798 /* less common attributes */
1799 if (strcmp(name, "tail") == 0) {
1800 PyErr_Clear();
1801 res = element_get_tail(self);
1802 } else if (strcmp(name, "attrib") == 0) {
1803 PyErr_Clear();
1804 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001805 create_extra(self, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001806 res = element_get_attrib(self);
1807 }
1808
1809 if (!res)
1810 return NULL;
1811
1812 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001813 return res;
1814}
1815
Eli Benderskyef9683b2013-05-18 07:52:34 -07001816static int
Eli Benderskyb20df952012-05-20 06:33:29 +03001817element_setattro(ElementObject* self, PyObject* nameobj, PyObject* value)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001818{
Eli Benderskyb20df952012-05-20 06:33:29 +03001819 char *name = "";
1820 if (PyUnicode_Check(nameobj))
1821 name = _PyUnicode_AsString(nameobj);
1822
Eli Benderskyef9683b2013-05-18 07:52:34 -07001823 if (name == NULL) {
1824 return -1;
1825 } else if (strcmp(name, "tag") == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001826 Py_DECREF(self->tag);
1827 self->tag = value;
1828 Py_INCREF(self->tag);
1829 } else if (strcmp(name, "text") == 0) {
1830 Py_DECREF(JOIN_OBJ(self->text));
1831 self->text = value;
1832 Py_INCREF(self->text);
1833 } else if (strcmp(name, "tail") == 0) {
1834 Py_DECREF(JOIN_OBJ(self->tail));
1835 self->tail = value;
1836 Py_INCREF(self->tail);
1837 } else if (strcmp(name, "attrib") == 0) {
1838 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001839 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001840 Py_DECREF(self->extra->attrib);
1841 self->extra->attrib = value;
1842 Py_INCREF(self->extra->attrib);
1843 } else {
Eli Benderskyef9683b2013-05-18 07:52:34 -07001844 PyErr_SetString(PyExc_AttributeError,
Eli Bendersky6a55dc32013-05-19 16:59:59 -07001845 "Can't set arbitrary attributes on Element");
Eli Benderskyef9683b2013-05-18 07:52:34 -07001846 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001847 }
1848
Eli Benderskyef9683b2013-05-18 07:52:34 -07001849 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001850}
1851
1852static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001853 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001854 0, /* sq_concat */
1855 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001856 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001857 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001858 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001859 0,
1860};
1861
1862static PyMappingMethods element_as_mapping = {
1863 (lenfunc) element_length,
1864 (binaryfunc) element_subscr,
1865 (objobjargproc) element_ass_subscr,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001866};
1867
Neal Norwitz227b5332006-03-22 09:28:35 +00001868static PyTypeObject Element_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001869 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08001870 "xml.etree.ElementTree.Element", sizeof(ElementObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001871 /* methods */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001872 (destructor)element_dealloc, /* tp_dealloc */
1873 0, /* tp_print */
1874 0, /* tp_getattr */
Eli Benderskyb20df952012-05-20 06:33:29 +03001875 0, /* tp_setattr */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001876 0, /* tp_reserved */
1877 (reprfunc)element_repr, /* tp_repr */
1878 0, /* tp_as_number */
1879 &element_as_sequence, /* tp_as_sequence */
1880 &element_as_mapping, /* tp_as_mapping */
1881 0, /* tp_hash */
1882 0, /* tp_call */
1883 0, /* tp_str */
1884 (getattrofunc)element_getattro, /* tp_getattro */
Eli Benderskyb20df952012-05-20 06:33:29 +03001885 (setattrofunc)element_setattro, /* tp_setattro */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001886 0, /* tp_as_buffer */
Eli Bendersky0192ba32012-03-30 16:38:33 +03001887 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
1888 /* tp_flags */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001889 0, /* tp_doc */
Eli Bendersky0192ba32012-03-30 16:38:33 +03001890 (traverseproc)element_gc_traverse, /* tp_traverse */
1891 (inquiry)element_gc_clear, /* tp_clear */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001892 0, /* tp_richcompare */
Eli Benderskyebf37a22012-04-03 22:02:37 +03001893 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001894 0, /* tp_iter */
1895 0, /* tp_iternext */
1896 element_methods, /* tp_methods */
1897 0, /* tp_members */
1898 0, /* tp_getset */
1899 0, /* tp_base */
1900 0, /* tp_dict */
1901 0, /* tp_descr_get */
1902 0, /* tp_descr_set */
1903 0, /* tp_dictoffset */
1904 (initproc)element_init, /* tp_init */
1905 PyType_GenericAlloc, /* tp_alloc */
1906 element_new, /* tp_new */
1907 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001908};
1909
Eli Bendersky64d11e62012-06-15 07:42:50 +03001910/******************************* Element iterator ****************************/
1911
1912/* ElementIterObject represents the iteration state over an XML element in
1913 * pre-order traversal. To keep track of which sub-element should be returned
1914 * next, a stack of parents is maintained. This is a standard stack-based
1915 * iterative pre-order traversal of a tree.
1916 * The stack is managed using a single-linked list starting at parent_stack.
1917 * Each stack node contains the saved parent to which we should return after
1918 * the current one is exhausted, and the next child to examine in that parent.
1919 */
1920typedef struct ParentLocator_t {
1921 ElementObject *parent;
1922 Py_ssize_t child_index;
1923 struct ParentLocator_t *next;
1924} ParentLocator;
1925
1926typedef struct {
1927 PyObject_HEAD
1928 ParentLocator *parent_stack;
1929 ElementObject *root_element;
1930 PyObject *sought_tag;
1931 int root_done;
1932 int gettext;
1933} ElementIterObject;
1934
1935
1936static void
1937elementiter_dealloc(ElementIterObject *it)
1938{
1939 ParentLocator *p = it->parent_stack;
1940 while (p) {
1941 ParentLocator *temp = p;
1942 Py_XDECREF(p->parent);
1943 p = p->next;
1944 PyObject_Free(temp);
1945 }
1946
1947 Py_XDECREF(it->sought_tag);
1948 Py_XDECREF(it->root_element);
1949
1950 PyObject_GC_UnTrack(it);
1951 PyObject_GC_Del(it);
1952}
1953
1954static int
1955elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
1956{
1957 ParentLocator *p = it->parent_stack;
1958 while (p) {
1959 Py_VISIT(p->parent);
1960 p = p->next;
1961 }
1962
1963 Py_VISIT(it->root_element);
1964 Py_VISIT(it->sought_tag);
1965 return 0;
1966}
1967
1968/* Helper function for elementiter_next. Add a new parent to the parent stack.
1969 */
1970static ParentLocator *
1971parent_stack_push_new(ParentLocator *stack, ElementObject *parent)
1972{
1973 ParentLocator *new_node = PyObject_Malloc(sizeof(ParentLocator));
1974 if (new_node) {
1975 new_node->parent = parent;
1976 Py_INCREF(parent);
1977 new_node->child_index = 0;
1978 new_node->next = stack;
1979 }
1980 return new_node;
1981}
1982
1983static PyObject *
1984elementiter_next(ElementIterObject *it)
1985{
1986 /* Sub-element iterator.
Eli Bendersky45839902013-01-13 05:14:47 -08001987 *
Eli Bendersky64d11e62012-06-15 07:42:50 +03001988 * A short note on gettext: this function serves both the iter() and
1989 * itertext() methods to avoid code duplication. However, there are a few
1990 * small differences in the way these iterations work. Namely:
1991 * - itertext() only yields text from nodes that have it, and continues
1992 * iterating when a node doesn't have text (so it doesn't return any
1993 * node like iter())
1994 * - itertext() also has to handle tail, after finishing with all the
1995 * children of a node.
1996 */
Eli Bendersky113da642012-06-15 07:52:49 +03001997 ElementObject *cur_parent;
1998 Py_ssize_t child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03001999
2000 while (1) {
2001 /* Handle the case reached in the beginning and end of iteration, where
2002 * the parent stack is empty. The root_done flag gives us indication
2003 * whether we've just started iterating (so root_done is 0), in which
2004 * case the root is returned. If root_done is 1 and we're here, the
2005 * iterator is exhausted.
2006 */
2007 if (!it->parent_stack->parent) {
2008 if (it->root_done) {
2009 PyErr_SetNone(PyExc_StopIteration);
2010 return NULL;
2011 } else {
2012 it->parent_stack = parent_stack_push_new(it->parent_stack,
2013 it->root_element);
2014 if (!it->parent_stack) {
2015 PyErr_NoMemory();
2016 return NULL;
2017 }
2018
2019 it->root_done = 1;
2020 if (it->sought_tag == Py_None ||
2021 PyObject_RichCompareBool(it->root_element->tag,
2022 it->sought_tag, Py_EQ) == 1) {
2023 if (it->gettext) {
Eli Benderskye6174ca2013-01-10 06:27:53 -08002024 PyObject *text = element_get_text(it->root_element);
2025 if (!text)
2026 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002027 if (PyObject_IsTrue(text)) {
2028 Py_INCREF(text);
2029 return text;
2030 }
2031 } else {
2032 Py_INCREF(it->root_element);
2033 return (PyObject *)it->root_element;
2034 }
2035 }
2036 }
2037 }
2038
2039 /* See if there are children left to traverse in the current parent. If
2040 * yes, visit the next child. If not, pop the stack and try again.
2041 */
Eli Bendersky113da642012-06-15 07:52:49 +03002042 cur_parent = it->parent_stack->parent;
2043 child_index = it->parent_stack->child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002044 if (cur_parent->extra && child_index < cur_parent->extra->length) {
2045 ElementObject *child = (ElementObject *)
2046 cur_parent->extra->children[child_index];
2047 it->parent_stack->child_index++;
2048 it->parent_stack = parent_stack_push_new(it->parent_stack,
2049 child);
2050 if (!it->parent_stack) {
2051 PyErr_NoMemory();
2052 return NULL;
2053 }
2054
2055 if (it->gettext) {
Eli Benderskye6174ca2013-01-10 06:27:53 -08002056 PyObject *text = element_get_text(child);
2057 if (!text)
2058 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002059 if (PyObject_IsTrue(text)) {
2060 Py_INCREF(text);
2061 return text;
2062 }
2063 } else if (it->sought_tag == Py_None ||
2064 PyObject_RichCompareBool(child->tag,
2065 it->sought_tag, Py_EQ) == 1) {
2066 Py_INCREF(child);
2067 return (PyObject *)child;
2068 }
2069 else
2070 continue;
2071 }
2072 else {
Eli Benderskye6174ca2013-01-10 06:27:53 -08002073 PyObject *tail;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002074 ParentLocator *next = it->parent_stack->next;
Eli Benderskye6174ca2013-01-10 06:27:53 -08002075 if (it->gettext) {
2076 tail = element_get_tail(cur_parent);
2077 if (!tail)
2078 return NULL;
2079 }
2080 else
2081 tail = Py_None;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002082 Py_XDECREF(it->parent_stack->parent);
2083 PyObject_Free(it->parent_stack);
2084 it->parent_stack = next;
2085
2086 /* Note that extra condition on it->parent_stack->parent here;
2087 * this is because itertext() is supposed to only return *inner*
2088 * text, not text following the element it began iteration with.
2089 */
2090 if (it->parent_stack->parent && PyObject_IsTrue(tail)) {
2091 Py_INCREF(tail);
2092 return tail;
2093 }
2094 }
2095 }
2096
2097 return NULL;
2098}
2099
2100
2101static PyTypeObject ElementIter_Type = {
2102 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002103 /* Using the module's name since the pure-Python implementation does not
2104 have such a type. */
Eli Bendersky64d11e62012-06-15 07:42:50 +03002105 "_elementtree._element_iterator", /* tp_name */
2106 sizeof(ElementIterObject), /* tp_basicsize */
2107 0, /* tp_itemsize */
2108 /* methods */
2109 (destructor)elementiter_dealloc, /* tp_dealloc */
2110 0, /* tp_print */
2111 0, /* tp_getattr */
2112 0, /* tp_setattr */
2113 0, /* tp_reserved */
2114 0, /* tp_repr */
2115 0, /* tp_as_number */
2116 0, /* tp_as_sequence */
2117 0, /* tp_as_mapping */
2118 0, /* tp_hash */
2119 0, /* tp_call */
2120 0, /* tp_str */
2121 0, /* tp_getattro */
2122 0, /* tp_setattro */
2123 0, /* tp_as_buffer */
2124 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2125 0, /* tp_doc */
2126 (traverseproc)elementiter_traverse, /* tp_traverse */
2127 0, /* tp_clear */
2128 0, /* tp_richcompare */
2129 0, /* tp_weaklistoffset */
2130 PyObject_SelfIter, /* tp_iter */
2131 (iternextfunc)elementiter_next, /* tp_iternext */
2132 0, /* tp_methods */
2133 0, /* tp_members */
2134 0, /* tp_getset */
2135 0, /* tp_base */
2136 0, /* tp_dict */
2137 0, /* tp_descr_get */
2138 0, /* tp_descr_set */
2139 0, /* tp_dictoffset */
2140 0, /* tp_init */
2141 0, /* tp_alloc */
2142 0, /* tp_new */
2143};
2144
2145
2146static PyObject *
2147create_elementiter(ElementObject *self, PyObject *tag, int gettext)
2148{
2149 ElementIterObject *it;
2150 PyObject *star = NULL;
2151
2152 it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
2153 if (!it)
2154 return NULL;
2155 if (!(it->parent_stack = PyObject_Malloc(sizeof(ParentLocator)))) {
2156 PyObject_GC_Del(it);
2157 return NULL;
2158 }
2159
2160 it->parent_stack->parent = NULL;
2161 it->parent_stack->child_index = 0;
2162 it->parent_stack->next = NULL;
2163
2164 if (PyUnicode_Check(tag))
2165 star = PyUnicode_FromString("*");
2166 else if (PyBytes_Check(tag))
2167 star = PyBytes_FromString("*");
2168
2169 if (star && PyObject_RichCompareBool(tag, star, Py_EQ) == 1)
2170 tag = Py_None;
2171
2172 Py_XDECREF(star);
2173 it->sought_tag = tag;
2174 it->root_done = 0;
2175 it->gettext = gettext;
2176 it->root_element = self;
2177
2178 Py_INCREF(self);
2179 Py_INCREF(tag);
2180
2181 PyObject_GC_Track(it);
2182 return (PyObject *)it;
2183}
2184
2185
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002186/* ==================================================================== */
2187/* the tree builder type */
2188
2189typedef struct {
2190 PyObject_HEAD
2191
Eli Bendersky58d548d2012-05-29 15:45:16 +03002192 PyObject *root; /* root node (first created node) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002193
Antoine Pitrouee329312012-10-04 19:53:29 +02002194 PyObject *this; /* current node */
2195 PyObject *last; /* most recently created node */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002196
Eli Bendersky58d548d2012-05-29 15:45:16 +03002197 PyObject *data; /* data collector (string or list), or NULL */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002198
Eli Bendersky58d548d2012-05-29 15:45:16 +03002199 PyObject *stack; /* element stack */
2200 Py_ssize_t index; /* current stack size (0 means empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002201
Eli Bendersky48d358b2012-05-30 17:57:50 +03002202 PyObject *element_factory;
2203
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002204 /* element tracing */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002205 PyObject *events; /* list of events, or NULL if not collecting */
2206 PyObject *start_event_obj; /* event objects (NULL to ignore) */
2207 PyObject *end_event_obj;
2208 PyObject *start_ns_event_obj;
2209 PyObject *end_ns_event_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002210} TreeBuilderObject;
2211
Christian Heimes90aa7642007-12-19 02:45:37 +00002212#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002213
2214/* -------------------------------------------------------------------- */
2215/* constructor and destructor */
2216
Eli Bendersky58d548d2012-05-29 15:45:16 +03002217static PyObject *
2218treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002219{
Eli Bendersky58d548d2012-05-29 15:45:16 +03002220 TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
2221 if (t != NULL) {
2222 t->root = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002223
Eli Bendersky58d548d2012-05-29 15:45:16 +03002224 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002225 t->this = Py_None;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002226 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002227 t->last = Py_None;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002228
Eli Bendersky58d548d2012-05-29 15:45:16 +03002229 t->data = NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002230 t->element_factory = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002231 t->stack = PyList_New(20);
2232 if (!t->stack) {
2233 Py_DECREF(t->this);
2234 Py_DECREF(t->last);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002235 Py_DECREF((PyObject *) t);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002236 return NULL;
2237 }
2238 t->index = 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002239
Eli Bendersky58d548d2012-05-29 15:45:16 +03002240 t->events = NULL;
2241 t->start_event_obj = t->end_event_obj = NULL;
2242 t->start_ns_event_obj = t->end_ns_event_obj = NULL;
2243 }
2244 return (PyObject *)t;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002245}
2246
Eli Bendersky58d548d2012-05-29 15:45:16 +03002247static int
2248treebuilder_init(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002249{
Eli Benderskyc68e1362012-06-03 06:09:42 +03002250 static char *kwlist[] = {"element_factory", 0};
Eli Bendersky48d358b2012-05-30 17:57:50 +03002251 PyObject *element_factory = NULL;
2252 TreeBuilderObject *self_tb = (TreeBuilderObject *)self;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002253 PyObject *tmp;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002254
2255 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:TreeBuilder", kwlist,
2256 &element_factory)) {
2257 return -1;
2258 }
2259
2260 if (element_factory) {
2261 Py_INCREF(element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002262 tmp = self_tb->element_factory;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002263 self_tb->element_factory = element_factory;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002264 Py_XDECREF(tmp);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002265 }
2266
Eli Bendersky58d548d2012-05-29 15:45:16 +03002267 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002268}
2269
Eli Bendersky48d358b2012-05-30 17:57:50 +03002270static int
2271treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
2272{
2273 Py_VISIT(self->root);
2274 Py_VISIT(self->this);
2275 Py_VISIT(self->last);
2276 Py_VISIT(self->data);
2277 Py_VISIT(self->stack);
2278 Py_VISIT(self->element_factory);
2279 return 0;
2280}
2281
2282static int
2283treebuilder_gc_clear(TreeBuilderObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002284{
Antoine Pitrouc1948842012-10-01 23:40:37 +02002285 Py_CLEAR(self->end_ns_event_obj);
2286 Py_CLEAR(self->start_ns_event_obj);
2287 Py_CLEAR(self->end_event_obj);
2288 Py_CLEAR(self->start_event_obj);
2289 Py_CLEAR(self->events);
2290 Py_CLEAR(self->stack);
2291 Py_CLEAR(self->data);
2292 Py_CLEAR(self->last);
2293 Py_CLEAR(self->this);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002294 Py_CLEAR(self->element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002295 Py_CLEAR(self->root);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002296 return 0;
2297}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002298
Eli Bendersky48d358b2012-05-30 17:57:50 +03002299static void
2300treebuilder_dealloc(TreeBuilderObject *self)
2301{
2302 PyObject_GC_UnTrack(self);
2303 treebuilder_gc_clear(self);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002304 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002305}
2306
2307/* -------------------------------------------------------------------- */
Antoine Pitrouee329312012-10-04 19:53:29 +02002308/* helpers for handling of arbitrary element-like objects */
2309
2310static int
2311treebuilder_set_element_text_or_tail(PyObject *element, PyObject *data,
2312 PyObject **dest, _Py_Identifier *name)
2313{
2314 if (Element_CheckExact(element)) {
2315 Py_DECREF(JOIN_OBJ(*dest));
2316 *dest = JOIN_SET(data, PyList_CheckExact(data));
2317 return 0;
2318 }
2319 else {
2320 PyObject *joined = list_join(data);
2321 int r;
2322 if (joined == NULL)
2323 return -1;
2324 r = _PyObject_SetAttrId(element, name, joined);
2325 Py_DECREF(joined);
2326 return r;
2327 }
2328}
2329
2330/* These two functions steal a reference to data */
2331static int
2332treebuilder_set_element_text(PyObject *element, PyObject *data)
2333{
2334 _Py_IDENTIFIER(text);
2335 return treebuilder_set_element_text_or_tail(
2336 element, data, &((ElementObject *) element)->text, &PyId_text);
2337}
2338
2339static int
2340treebuilder_set_element_tail(PyObject *element, PyObject *data)
2341{
2342 _Py_IDENTIFIER(tail);
2343 return treebuilder_set_element_text_or_tail(
2344 element, data, &((ElementObject *) element)->tail, &PyId_tail);
2345}
2346
2347static int
2348treebuilder_add_subelement(PyObject *element, PyObject *child)
2349{
2350 _Py_IDENTIFIER(append);
2351 if (Element_CheckExact(element)) {
2352 ElementObject *elem = (ElementObject *) element;
2353 return element_add_subelement(elem, child);
2354 }
2355 else {
2356 PyObject *res;
2357 res = _PyObject_CallMethodId(element, &PyId_append, "O", child);
2358 if (res == NULL)
2359 return -1;
2360 Py_DECREF(res);
2361 return 0;
2362 }
2363}
2364
2365/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002366/* handlers */
2367
2368LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002369treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
2370 PyObject* attrib)
2371{
2372 PyObject* node;
2373 PyObject* this;
2374
2375 if (self->data) {
2376 if (self->this == self->last) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002377 if (treebuilder_set_element_text(self->last, self->data))
2378 return NULL;
2379 }
2380 else {
2381 if (treebuilder_set_element_tail(self->last, self->data))
2382 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002383 }
2384 self->data = NULL;
2385 }
2386
Eli Bendersky08231a92013-05-18 15:47:16 -07002387 if (self->element_factory && self->element_factory != Py_None) {
Eli Bendersky48d358b2012-05-30 17:57:50 +03002388 node = PyObject_CallFunction(self->element_factory, "OO", tag, attrib);
2389 } else {
2390 node = create_new_element(tag, attrib);
2391 }
2392 if (!node) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002393 return NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002394 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002395
Antoine Pitrouee329312012-10-04 19:53:29 +02002396 this = self->this;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002397
2398 if (this != Py_None) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002399 if (treebuilder_add_subelement(this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002400 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002401 } else {
2402 if (self->root) {
2403 PyErr_SetString(
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002404 elementtree_parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002405 "multiple elements on top level"
2406 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002407 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002408 }
2409 Py_INCREF(node);
2410 self->root = node;
2411 }
2412
2413 if (self->index < PyList_GET_SIZE(self->stack)) {
2414 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002415 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002416 Py_INCREF(this);
2417 } else {
2418 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002419 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002420 }
2421 self->index++;
2422
2423 Py_DECREF(this);
2424 Py_INCREF(node);
Antoine Pitrouee329312012-10-04 19:53:29 +02002425 self->this = node;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002426
2427 Py_DECREF(self->last);
2428 Py_INCREF(node);
Antoine Pitrouee329312012-10-04 19:53:29 +02002429 self->last = node;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002430
2431 if (self->start_event_obj) {
2432 PyObject* res;
2433 PyObject* action = self->start_event_obj;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002434 res = PyTuple_Pack(2, action, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002435 if (res) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002436 PyList_Append(self->events, res);
2437 Py_DECREF(res);
2438 } else
2439 PyErr_Clear(); /* FIXME: propagate error */
2440 }
2441
2442 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002443
2444 error:
2445 Py_DECREF(node);
2446 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002447}
2448
2449LOCAL(PyObject*)
2450treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
2451{
2452 if (!self->data) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002453 if (self->last == Py_None) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00002454 /* ignore calls to data before the first call to start */
2455 Py_RETURN_NONE;
2456 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002457 /* store the first item as is */
2458 Py_INCREF(data); self->data = data;
2459 } else {
2460 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00002461 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
2462 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002463 /* XXX this code path unused in Python 3? */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002464 /* expat often generates single character data sections; handle
2465 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00002466 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
2467 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002468 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00002469 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002470 } else if (PyList_CheckExact(self->data)) {
2471 if (PyList_Append(self->data, data) < 0)
2472 return NULL;
2473 } else {
2474 PyObject* list = PyList_New(2);
2475 if (!list)
2476 return NULL;
2477 PyList_SET_ITEM(list, 0, self->data);
2478 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2479 self->data = list;
2480 }
2481 }
2482
2483 Py_RETURN_NONE;
2484}
2485
2486LOCAL(PyObject*)
2487treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2488{
2489 PyObject* item;
2490
2491 if (self->data) {
2492 if (self->this == self->last) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002493 if (treebuilder_set_element_text(self->last, self->data))
2494 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002495 } else {
Antoine Pitrouee329312012-10-04 19:53:29 +02002496 if (treebuilder_set_element_tail(self->last, self->data))
2497 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002498 }
2499 self->data = NULL;
2500 }
2501
2502 if (self->index == 0) {
2503 PyErr_SetString(
2504 PyExc_IndexError,
2505 "pop from empty stack"
2506 );
2507 return NULL;
2508 }
2509
2510 self->index--;
2511
2512 item = PyList_GET_ITEM(self->stack, self->index);
2513 Py_INCREF(item);
2514
2515 Py_DECREF(self->last);
2516
Antoine Pitrouee329312012-10-04 19:53:29 +02002517 self->last = self->this;
2518 self->this = item;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002519
2520 if (self->end_event_obj) {
2521 PyObject* res;
2522 PyObject* action = self->end_event_obj;
2523 PyObject* node = (PyObject*) self->last;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002524 res = PyTuple_Pack(2, action, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002525 if (res) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002526 PyList_Append(self->events, res);
2527 Py_DECREF(res);
2528 } else
2529 PyErr_Clear(); /* FIXME: propagate error */
2530 }
2531
2532 Py_INCREF(self->last);
2533 return (PyObject*) self->last;
2534}
2535
2536LOCAL(void)
2537treebuilder_handle_namespace(TreeBuilderObject* self, int start,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002538 PyObject *prefix, PyObject *uri)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002539{
2540 PyObject* res;
2541 PyObject* action;
2542 PyObject* parcel;
2543
2544 if (!self->events)
2545 return;
2546
2547 if (start) {
2548 if (!self->start_ns_event_obj)
2549 return;
2550 action = self->start_ns_event_obj;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002551 parcel = Py_BuildValue("OO", prefix, uri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002552 if (!parcel)
2553 return;
2554 Py_INCREF(action);
2555 } else {
2556 if (!self->end_ns_event_obj)
2557 return;
2558 action = self->end_ns_event_obj;
2559 Py_INCREF(action);
2560 parcel = Py_None;
2561 Py_INCREF(parcel);
2562 }
2563
2564 res = PyTuple_New(2);
2565
2566 if (res) {
2567 PyTuple_SET_ITEM(res, 0, action);
2568 PyTuple_SET_ITEM(res, 1, parcel);
2569 PyList_Append(self->events, res);
2570 Py_DECREF(res);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002571 }
2572 else {
2573 Py_DECREF(action);
2574 Py_DECREF(parcel);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002575 PyErr_Clear(); /* FIXME: propagate error */
Antoine Pitrouc1948842012-10-01 23:40:37 +02002576 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002577}
2578
2579/* -------------------------------------------------------------------- */
2580/* methods (in alphabetical order) */
2581
2582static PyObject*
2583treebuilder_data(TreeBuilderObject* self, PyObject* args)
2584{
2585 PyObject* data;
2586 if (!PyArg_ParseTuple(args, "O:data", &data))
2587 return NULL;
2588
2589 return treebuilder_handle_data(self, data);
2590}
2591
2592static PyObject*
2593treebuilder_end(TreeBuilderObject* self, PyObject* args)
2594{
2595 PyObject* tag;
2596 if (!PyArg_ParseTuple(args, "O:end", &tag))
2597 return NULL;
2598
2599 return treebuilder_handle_end(self, tag);
2600}
2601
2602LOCAL(PyObject*)
2603treebuilder_done(TreeBuilderObject* self)
2604{
2605 PyObject* res;
2606
2607 /* FIXME: check stack size? */
2608
2609 if (self->root)
2610 res = self->root;
2611 else
2612 res = Py_None;
2613
2614 Py_INCREF(res);
2615 return res;
2616}
2617
2618static PyObject*
2619treebuilder_close(TreeBuilderObject* self, PyObject* args)
2620{
2621 if (!PyArg_ParseTuple(args, ":close"))
2622 return NULL;
2623
2624 return treebuilder_done(self);
2625}
2626
2627static PyObject*
2628treebuilder_start(TreeBuilderObject* self, PyObject* args)
2629{
2630 PyObject* tag;
2631 PyObject* attrib = Py_None;
2632 if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib))
2633 return NULL;
2634
2635 return treebuilder_handle_start(self, tag, attrib);
2636}
2637
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002638static PyMethodDef treebuilder_methods[] = {
2639 {"data", (PyCFunction) treebuilder_data, METH_VARARGS},
2640 {"start", (PyCFunction) treebuilder_start, METH_VARARGS},
2641 {"end", (PyCFunction) treebuilder_end, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002642 {"close", (PyCFunction) treebuilder_close, METH_VARARGS},
2643 {NULL, NULL}
2644};
2645
Neal Norwitz227b5332006-03-22 09:28:35 +00002646static PyTypeObject TreeBuilder_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002647 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002648 "xml.etree.ElementTree.TreeBuilder", sizeof(TreeBuilderObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002649 /* methods */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002650 (destructor)treebuilder_dealloc, /* tp_dealloc */
2651 0, /* tp_print */
2652 0, /* tp_getattr */
2653 0, /* tp_setattr */
2654 0, /* tp_reserved */
2655 0, /* tp_repr */
2656 0, /* tp_as_number */
2657 0, /* tp_as_sequence */
2658 0, /* tp_as_mapping */
2659 0, /* tp_hash */
2660 0, /* tp_call */
2661 0, /* tp_str */
2662 0, /* tp_getattro */
2663 0, /* tp_setattro */
2664 0, /* tp_as_buffer */
Eli Bendersky48d358b2012-05-30 17:57:50 +03002665 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
2666 /* tp_flags */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002667 0, /* tp_doc */
Eli Bendersky48d358b2012-05-30 17:57:50 +03002668 (traverseproc)treebuilder_gc_traverse, /* tp_traverse */
2669 (inquiry)treebuilder_gc_clear, /* tp_clear */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002670 0, /* tp_richcompare */
2671 0, /* tp_weaklistoffset */
2672 0, /* tp_iter */
2673 0, /* tp_iternext */
2674 treebuilder_methods, /* tp_methods */
2675 0, /* tp_members */
2676 0, /* tp_getset */
2677 0, /* tp_base */
2678 0, /* tp_dict */
2679 0, /* tp_descr_get */
2680 0, /* tp_descr_set */
2681 0, /* tp_dictoffset */
2682 (initproc)treebuilder_init, /* tp_init */
2683 PyType_GenericAlloc, /* tp_alloc */
2684 treebuilder_new, /* tp_new */
2685 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002686};
2687
2688/* ==================================================================== */
2689/* the expat interface */
2690
2691#if defined(USE_EXPAT)
2692
2693#include "expat.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002694#include "pyexpat.h"
Eli Bendersky20d41742012-06-01 09:48:37 +03002695static struct PyExpat_CAPI *expat_capi;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002696#define EXPAT(func) (expat_capi->func)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002697
Eli Bendersky52467b12012-06-01 07:13:08 +03002698static XML_Memory_Handling_Suite ExpatMemoryHandler = {
2699 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
2700
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002701typedef struct {
2702 PyObject_HEAD
2703
2704 XML_Parser parser;
2705
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002706 PyObject *target;
2707 PyObject *entity;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002708
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002709 PyObject *names;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002710
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002711 PyObject *handle_start;
2712 PyObject *handle_data;
2713 PyObject *handle_end;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002714
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002715 PyObject *handle_comment;
2716 PyObject *handle_pi;
2717 PyObject *handle_doctype;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002718
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002719 PyObject *handle_close;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002720
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002721} XMLParserObject;
2722
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002723#define XMLParser_CheckExact(op) (Py_TYPE(op) == &XMLParser_Type)
2724
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002725/* helpers */
2726
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002727LOCAL(PyObject*)
2728makeuniversal(XMLParserObject* self, const char* string)
2729{
2730 /* convert a UTF-8 tag/attribute name from the expat parser
2731 to a universal name string */
2732
Antoine Pitrouc1948842012-10-01 23:40:37 +02002733 Py_ssize_t size = (Py_ssize_t) strlen(string);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002734 PyObject* key;
2735 PyObject* value;
2736
2737 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00002738 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002739 if (!key)
2740 return NULL;
2741
2742 value = PyDict_GetItem(self->names, key);
2743
2744 if (value) {
2745 Py_INCREF(value);
2746 } else {
2747 /* new name. convert to universal name, and decode as
2748 necessary */
2749
2750 PyObject* tag;
2751 char* p;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002752 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002753
2754 /* look for namespace separator */
2755 for (i = 0; i < size; i++)
2756 if (string[i] == '}')
2757 break;
2758 if (i != size) {
2759 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002760 tag = PyBytes_FromStringAndSize(NULL, size+1);
2761 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002762 p[0] = '{';
2763 memcpy(p+1, string, size);
2764 size++;
2765 } else {
2766 /* plain name; use key as tag */
2767 Py_INCREF(key);
2768 tag = key;
2769 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002770
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002771 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002772 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00002773 value = PyUnicode_DecodeUTF8(p, size, "strict");
2774 Py_DECREF(tag);
2775 if (!value) {
2776 Py_DECREF(key);
2777 return NULL;
2778 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002779
2780 /* add to names dictionary */
2781 if (PyDict_SetItem(self->names, key, value) < 0) {
2782 Py_DECREF(key);
2783 Py_DECREF(value);
2784 return NULL;
2785 }
2786 }
2787
2788 Py_DECREF(key);
2789 return value;
2790}
2791
Eli Bendersky5b77d812012-03-16 08:20:05 +02002792/* Set the ParseError exception with the given parameters.
2793 * If message is not NULL, it's used as the error string. Otherwise, the
2794 * message string is the default for the given error_code.
2795*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002796static void
Eli Bendersky5b77d812012-03-16 08:20:05 +02002797expat_set_error(enum XML_Error error_code, int line, int column, char *message)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002798{
Eli Bendersky5b77d812012-03-16 08:20:05 +02002799 PyObject *errmsg, *error, *position, *code;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002800
Victor Stinner499dfcf2011-03-21 13:26:24 +01002801 errmsg = PyUnicode_FromFormat("%s: line %d, column %d",
Eli Bendersky5b77d812012-03-16 08:20:05 +02002802 message ? message : EXPAT(ErrorString)(error_code),
2803 line, column);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002804 if (errmsg == NULL)
2805 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002806
Victor Stinner499dfcf2011-03-21 13:26:24 +01002807 error = PyObject_CallFunction(elementtree_parseerror_obj, "O", errmsg);
2808 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002809 if (!error)
2810 return;
2811
Eli Bendersky5b77d812012-03-16 08:20:05 +02002812 /* Add code and position attributes */
2813 code = PyLong_FromLong((long)error_code);
2814 if (!code) {
2815 Py_DECREF(error);
2816 return;
2817 }
2818 if (PyObject_SetAttrString(error, "code", code) == -1) {
2819 Py_DECREF(error);
2820 Py_DECREF(code);
2821 return;
2822 }
2823 Py_DECREF(code);
2824
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002825 position = Py_BuildValue("(ii)", line, column);
2826 if (!position) {
2827 Py_DECREF(error);
2828 return;
2829 }
2830 if (PyObject_SetAttrString(error, "position", position) == -1) {
2831 Py_DECREF(error);
2832 Py_DECREF(position);
2833 return;
2834 }
2835 Py_DECREF(position);
2836
2837 PyErr_SetObject(elementtree_parseerror_obj, error);
2838 Py_DECREF(error);
2839}
2840
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002841/* -------------------------------------------------------------------- */
2842/* handlers */
2843
2844static void
2845expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2846 int data_len)
2847{
2848 PyObject* key;
2849 PyObject* value;
2850 PyObject* res;
2851
2852 if (data_len < 2 || data_in[0] != '&')
2853 return;
2854
Neal Norwitz0269b912007-08-08 06:56:02 +00002855 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002856 if (!key)
2857 return;
2858
2859 value = PyDict_GetItem(self->entity, key);
2860
2861 if (value) {
2862 if (TreeBuilder_CheckExact(self->target))
2863 res = treebuilder_handle_data(
2864 (TreeBuilderObject*) self->target, value
2865 );
2866 else if (self->handle_data)
2867 res = PyObject_CallFunction(self->handle_data, "O", value);
2868 else
2869 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002870 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002871 } else if (!PyErr_Occurred()) {
2872 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00002873 char message[128] = "undefined entity ";
2874 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002875 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02002876 XML_ERROR_UNDEFINED_ENTITY,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002877 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02002878 EXPAT(GetErrorColumnNumber)(self->parser),
2879 message
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002880 );
2881 }
2882
2883 Py_DECREF(key);
2884}
2885
2886static void
2887expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2888 const XML_Char **attrib_in)
2889{
2890 PyObject* res;
2891 PyObject* tag;
2892 PyObject* attrib;
2893 int ok;
2894
2895 /* tag name */
2896 tag = makeuniversal(self, tag_in);
2897 if (!tag)
2898 return; /* parser will look for errors */
2899
2900 /* attributes */
2901 if (attrib_in[0]) {
2902 attrib = PyDict_New();
2903 if (!attrib)
2904 return;
2905 while (attrib_in[0] && attrib_in[1]) {
2906 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00002907 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002908 if (!key || !value) {
2909 Py_XDECREF(value);
2910 Py_XDECREF(key);
2911 Py_DECREF(attrib);
2912 return;
2913 }
2914 ok = PyDict_SetItem(attrib, key, value);
2915 Py_DECREF(value);
2916 Py_DECREF(key);
2917 if (ok < 0) {
2918 Py_DECREF(attrib);
2919 return;
2920 }
2921 attrib_in += 2;
2922 }
2923 } else {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002924 /* Pass an empty dictionary on */
Eli Bendersky48d358b2012-05-30 17:57:50 +03002925 attrib = PyDict_New();
2926 if (!attrib)
2927 return;
2928 }
2929
2930 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002931 /* shortcut */
2932 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2933 tag, attrib);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002934 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002935 else if (self->handle_start) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002936 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002937 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002938 res = NULL;
2939
2940 Py_DECREF(tag);
2941 Py_DECREF(attrib);
2942
2943 Py_XDECREF(res);
2944}
2945
2946static void
2947expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2948 int data_len)
2949{
2950 PyObject* data;
2951 PyObject* res;
2952
Neal Norwitz0269b912007-08-08 06:56:02 +00002953 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002954 if (!data)
2955 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002956
2957 if (TreeBuilder_CheckExact(self->target))
2958 /* shortcut */
2959 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
2960 else if (self->handle_data)
2961 res = PyObject_CallFunction(self->handle_data, "O", data);
2962 else
2963 res = NULL;
2964
2965 Py_DECREF(data);
2966
2967 Py_XDECREF(res);
2968}
2969
2970static void
2971expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
2972{
2973 PyObject* tag;
2974 PyObject* res = NULL;
2975
2976 if (TreeBuilder_CheckExact(self->target))
2977 /* shortcut */
2978 /* the standard tree builder doesn't look at the end tag */
2979 res = treebuilder_handle_end(
2980 (TreeBuilderObject*) self->target, Py_None
2981 );
2982 else if (self->handle_end) {
2983 tag = makeuniversal(self, tag_in);
2984 if (tag) {
2985 res = PyObject_CallFunction(self->handle_end, "O", tag);
2986 Py_DECREF(tag);
2987 }
2988 }
2989
2990 Py_XDECREF(res);
2991}
2992
2993static void
2994expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
2995 const XML_Char *uri)
2996{
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002997 PyObject* sprefix = NULL;
2998 PyObject* suri = NULL;
2999
Eli Bendersky5dd40e52013-11-28 06:31:58 -08003000 if (uri)
3001 suri = PyUnicode_DecodeUTF8(uri, strlen(uri), "strict");
3002 else
3003 suri = PyUnicode_FromString("");
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003004 if (!suri)
3005 return;
3006
3007 if (prefix)
3008 sprefix = PyUnicode_DecodeUTF8(prefix, strlen(prefix), "strict");
3009 else
3010 sprefix = PyUnicode_FromString("");
3011 if (!sprefix) {
3012 Py_DECREF(suri);
3013 return;
3014 }
3015
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003016 treebuilder_handle_namespace(
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003017 (TreeBuilderObject*) self->target, 1, sprefix, suri
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003018 );
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003019
3020 Py_DECREF(sprefix);
3021 Py_DECREF(suri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003022}
3023
3024static void
3025expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
3026{
3027 treebuilder_handle_namespace(
3028 (TreeBuilderObject*) self->target, 0, NULL, NULL
3029 );
3030}
3031
3032static void
3033expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
3034{
3035 PyObject* comment;
3036 PyObject* res;
3037
3038 if (self->handle_comment) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003039 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003040 if (comment) {
3041 res = PyObject_CallFunction(self->handle_comment, "O", comment);
3042 Py_XDECREF(res);
3043 Py_DECREF(comment);
3044 }
3045 }
3046}
3047
Eli Bendersky45839902013-01-13 05:14:47 -08003048static void
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003049expat_start_doctype_handler(XMLParserObject *self,
3050 const XML_Char *doctype_name,
3051 const XML_Char *sysid,
3052 const XML_Char *pubid,
3053 int has_internal_subset)
3054{
3055 PyObject *self_pyobj = (PyObject *)self;
3056 PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
3057 PyObject *parser_doctype = NULL;
3058 PyObject *res = NULL;
3059
3060 doctype_name_obj = makeuniversal(self, doctype_name);
3061 if (!doctype_name_obj)
3062 return;
3063
3064 if (sysid) {
3065 sysid_obj = makeuniversal(self, sysid);
3066 if (!sysid_obj) {
3067 Py_DECREF(doctype_name_obj);
3068 return;
3069 }
3070 } else {
3071 Py_INCREF(Py_None);
3072 sysid_obj = Py_None;
3073 }
3074
3075 if (pubid) {
3076 pubid_obj = makeuniversal(self, pubid);
3077 if (!pubid_obj) {
3078 Py_DECREF(doctype_name_obj);
3079 Py_DECREF(sysid_obj);
3080 return;
3081 }
3082 } else {
3083 Py_INCREF(Py_None);
3084 pubid_obj = Py_None;
3085 }
3086
3087 /* If the target has a handler for doctype, call it. */
3088 if (self->handle_doctype) {
3089 res = PyObject_CallFunction(self->handle_doctype, "OOO",
3090 doctype_name_obj, pubid_obj, sysid_obj);
3091 Py_CLEAR(res);
3092 }
3093
3094 /* Now see if the parser itself has a doctype method. If yes and it's
3095 * a subclass, call it but warn about deprecation. If it's not a subclass
3096 * (i.e. vanilla XMLParser), do nothing.
3097 */
3098 parser_doctype = PyObject_GetAttrString(self_pyobj, "doctype");
3099 if (parser_doctype) {
3100 if (!XMLParser_CheckExact(self_pyobj)) {
3101 if (PyErr_WarnEx(PyExc_DeprecationWarning,
3102 "This method of XMLParser is deprecated. Define"
3103 " doctype() method on the TreeBuilder target.",
3104 1) < 0) {
3105 goto clear;
3106 }
3107 res = PyObject_CallFunction(parser_doctype, "OOO",
3108 doctype_name_obj, pubid_obj, sysid_obj);
3109 Py_CLEAR(res);
3110 }
3111 }
3112
3113clear:
3114 Py_XDECREF(parser_doctype);
3115 Py_DECREF(doctype_name_obj);
3116 Py_DECREF(pubid_obj);
3117 Py_DECREF(sysid_obj);
3118}
3119
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003120static void
3121expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
3122 const XML_Char* data_in)
3123{
3124 PyObject* target;
3125 PyObject* data;
3126 PyObject* res;
3127
3128 if (self->handle_pi) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003129 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3130 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003131 if (target && data) {
3132 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
3133 Py_XDECREF(res);
3134 Py_DECREF(data);
3135 Py_DECREF(target);
3136 } else {
3137 Py_XDECREF(data);
3138 Py_XDECREF(target);
3139 }
3140 }
3141}
3142
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003143/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003144
Eli Bendersky52467b12012-06-01 07:13:08 +03003145static PyObject *
3146xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003147{
Eli Bendersky52467b12012-06-01 07:13:08 +03003148 XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
3149 if (self) {
3150 self->parser = NULL;
3151 self->target = self->entity = self->names = NULL;
3152 self->handle_start = self->handle_data = self->handle_end = NULL;
3153 self->handle_comment = self->handle_pi = self->handle_close = NULL;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003154 self->handle_doctype = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003155 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003156 return (PyObject *)self;
3157}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003158
Eli Bendersky52467b12012-06-01 07:13:08 +03003159static int
3160xmlparser_init(PyObject *self, PyObject *args, PyObject *kwds)
3161{
3162 XMLParserObject *self_xp = (XMLParserObject *)self;
3163 PyObject *target = NULL, *html = NULL;
3164 char *encoding = NULL;
Eli Benderskyc68e1362012-06-03 06:09:42 +03003165 static char *kwlist[] = {"html", "target", "encoding", 0};
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003166
Eli Bendersky52467b12012-06-01 07:13:08 +03003167 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|OOz:XMLParser", kwlist,
3168 &html, &target, &encoding)) {
3169 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003170 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003171
Eli Bendersky52467b12012-06-01 07:13:08 +03003172 self_xp->entity = PyDict_New();
3173 if (!self_xp->entity)
3174 return -1;
3175
3176 self_xp->names = PyDict_New();
3177 if (!self_xp->names) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02003178 Py_CLEAR(self_xp->entity);
Eli Bendersky52467b12012-06-01 07:13:08 +03003179 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003180 }
3181
Eli Bendersky52467b12012-06-01 07:13:08 +03003182 self_xp->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
3183 if (!self_xp->parser) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02003184 Py_CLEAR(self_xp->entity);
3185 Py_CLEAR(self_xp->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003186 PyErr_NoMemory();
Eli Bendersky52467b12012-06-01 07:13:08 +03003187 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003188 }
3189
Eli Bendersky52467b12012-06-01 07:13:08 +03003190 if (target) {
3191 Py_INCREF(target);
3192 } else {
Eli Bendersky58d548d2012-05-29 15:45:16 +03003193 target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003194 if (!target) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02003195 Py_CLEAR(self_xp->entity);
3196 Py_CLEAR(self_xp->names);
Eli Bendersky52467b12012-06-01 07:13:08 +03003197 EXPAT(ParserFree)(self_xp->parser);
3198 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003199 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003200 }
3201 self_xp->target = target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003202
Eli Bendersky52467b12012-06-01 07:13:08 +03003203 self_xp->handle_start = PyObject_GetAttrString(target, "start");
3204 self_xp->handle_data = PyObject_GetAttrString(target, "data");
3205 self_xp->handle_end = PyObject_GetAttrString(target, "end");
3206 self_xp->handle_comment = PyObject_GetAttrString(target, "comment");
3207 self_xp->handle_pi = PyObject_GetAttrString(target, "pi");
3208 self_xp->handle_close = PyObject_GetAttrString(target, "close");
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003209 self_xp->handle_doctype = PyObject_GetAttrString(target, "doctype");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003210
3211 PyErr_Clear();
Eli Bendersky45839902013-01-13 05:14:47 -08003212
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003213 /* configure parser */
Eli Bendersky52467b12012-06-01 07:13:08 +03003214 EXPAT(SetUserData)(self_xp->parser, self_xp);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003215 EXPAT(SetElementHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003216 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003217 (XML_StartElementHandler) expat_start_handler,
3218 (XML_EndElementHandler) expat_end_handler
3219 );
3220 EXPAT(SetDefaultHandlerExpand)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003221 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003222 (XML_DefaultHandler) expat_default_handler
3223 );
3224 EXPAT(SetCharacterDataHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003225 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003226 (XML_CharacterDataHandler) expat_data_handler
3227 );
Eli Bendersky52467b12012-06-01 07:13:08 +03003228 if (self_xp->handle_comment)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003229 EXPAT(SetCommentHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003230 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003231 (XML_CommentHandler) expat_comment_handler
3232 );
Eli Bendersky52467b12012-06-01 07:13:08 +03003233 if (self_xp->handle_pi)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003234 EXPAT(SetProcessingInstructionHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003235 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003236 (XML_ProcessingInstructionHandler) expat_pi_handler
3237 );
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003238 EXPAT(SetStartDoctypeDeclHandler)(
3239 self_xp->parser,
3240 (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
3241 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003242 EXPAT(SetUnknownEncodingHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003243 self_xp->parser,
Eli Bendersky6dc32b32013-05-25 05:25:48 -07003244 EXPAT(DefaultUnknownEncodingHandler), NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003245 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003246
Eli Bendersky52467b12012-06-01 07:13:08 +03003247 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003248}
3249
Eli Bendersky52467b12012-06-01 07:13:08 +03003250static int
3251xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
3252{
3253 Py_VISIT(self->handle_close);
3254 Py_VISIT(self->handle_pi);
3255 Py_VISIT(self->handle_comment);
3256 Py_VISIT(self->handle_end);
3257 Py_VISIT(self->handle_data);
3258 Py_VISIT(self->handle_start);
3259
3260 Py_VISIT(self->target);
3261 Py_VISIT(self->entity);
3262 Py_VISIT(self->names);
3263
3264 return 0;
3265}
3266
3267static int
3268xmlparser_gc_clear(XMLParserObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003269{
3270 EXPAT(ParserFree)(self->parser);
3271
Antoine Pitrouc1948842012-10-01 23:40:37 +02003272 Py_CLEAR(self->handle_close);
3273 Py_CLEAR(self->handle_pi);
3274 Py_CLEAR(self->handle_comment);
3275 Py_CLEAR(self->handle_end);
3276 Py_CLEAR(self->handle_data);
3277 Py_CLEAR(self->handle_start);
3278 Py_CLEAR(self->handle_doctype);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003279
Antoine Pitrouc1948842012-10-01 23:40:37 +02003280 Py_CLEAR(self->target);
3281 Py_CLEAR(self->entity);
3282 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003283
Eli Bendersky52467b12012-06-01 07:13:08 +03003284 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003285}
3286
Eli Bendersky52467b12012-06-01 07:13:08 +03003287static void
3288xmlparser_dealloc(XMLParserObject* self)
3289{
3290 PyObject_GC_UnTrack(self);
3291 xmlparser_gc_clear(self);
3292 Py_TYPE(self)->tp_free((PyObject *)self);
3293}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003294
3295LOCAL(PyObject*)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003296expat_parse(XMLParserObject* self, const char* data, int data_len, int final)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003297{
3298 int ok;
3299
3300 ok = EXPAT(Parse)(self->parser, data, data_len, final);
3301
3302 if (PyErr_Occurred())
3303 return NULL;
3304
3305 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003306 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02003307 EXPAT(GetErrorCode)(self->parser),
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003308 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02003309 EXPAT(GetErrorColumnNumber)(self->parser),
3310 NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003311 );
3312 return NULL;
3313 }
3314
3315 Py_RETURN_NONE;
3316}
3317
3318static PyObject*
3319xmlparser_close(XMLParserObject* self, PyObject* args)
3320{
3321 /* end feeding data to parser */
3322
3323 PyObject* res;
3324 if (!PyArg_ParseTuple(args, ":close"))
3325 return NULL;
3326
3327 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003328 if (!res)
3329 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003330
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003331 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003332 Py_DECREF(res);
3333 return treebuilder_done((TreeBuilderObject*) self->target);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003334 } if (self->handle_close) {
3335 Py_DECREF(res);
3336 return PyObject_CallFunction(self->handle_close, "");
3337 } else
3338 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003339}
3340
3341static PyObject*
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003342xmlparser_feed(XMLParserObject* self, PyObject* arg)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003343{
3344 /* feed data to parser */
3345
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003346 if (PyUnicode_Check(arg)) {
3347 Py_ssize_t data_len;
3348 const char *data = PyUnicode_AsUTF8AndSize(arg, &data_len);
3349 if (data == NULL)
3350 return NULL;
3351 if (data_len > INT_MAX) {
3352 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3353 return NULL;
3354 }
3355 /* Explicitly set UTF-8 encoding. Return code ignored. */
3356 (void)EXPAT(SetEncoding)(self->parser, "utf-8");
3357 return expat_parse(self, data, (int)data_len, 0);
3358 }
3359 else {
3360 Py_buffer view;
3361 PyObject *res;
3362 if (PyObject_GetBuffer(arg, &view, PyBUF_SIMPLE) < 0)
3363 return NULL;
3364 if (view.len > INT_MAX) {
3365 PyBuffer_Release(&view);
3366 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3367 return NULL;
3368 }
3369 res = expat_parse(self, view.buf, (int)view.len, 0);
3370 PyBuffer_Release(&view);
3371 return res;
3372 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003373}
3374
3375static PyObject*
3376xmlparser_parse(XMLParserObject* self, PyObject* args)
3377{
3378 /* (internal) parse until end of input stream */
3379
3380 PyObject* reader;
3381 PyObject* buffer;
Eli Benderskyf996e772012-03-16 05:53:30 +02003382 PyObject* temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003383 PyObject* res;
3384
3385 PyObject* fileobj;
3386 if (!PyArg_ParseTuple(args, "O:_parse", &fileobj))
3387 return NULL;
3388
3389 reader = PyObject_GetAttrString(fileobj, "read");
3390 if (!reader)
3391 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003392
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003393 /* read from open file object */
3394 for (;;) {
3395
3396 buffer = PyObject_CallFunction(reader, "i", 64*1024);
3397
3398 if (!buffer) {
3399 /* read failed (e.g. due to KeyboardInterrupt) */
3400 Py_DECREF(reader);
3401 return NULL;
3402 }
3403
Eli Benderskyf996e772012-03-16 05:53:30 +02003404 if (PyUnicode_CheckExact(buffer)) {
3405 /* A unicode object is encoded into bytes using UTF-8 */
3406 if (PyUnicode_GET_SIZE(buffer) == 0) {
3407 Py_DECREF(buffer);
3408 break;
3409 }
3410 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
Antoine Pitrouc1948842012-10-01 23:40:37 +02003411 Py_DECREF(buffer);
Eli Benderskyf996e772012-03-16 05:53:30 +02003412 if (!temp) {
3413 /* Propagate exception from PyUnicode_AsEncodedString */
Eli Benderskyf996e772012-03-16 05:53:30 +02003414 Py_DECREF(reader);
3415 return NULL;
3416 }
Eli Benderskyf996e772012-03-16 05:53:30 +02003417 buffer = temp;
3418 }
3419 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003420 Py_DECREF(buffer);
3421 break;
3422 }
3423
3424 res = expat_parse(
Christian Heimes72b710a2008-05-26 13:28:38 +00003425 self, PyBytes_AS_STRING(buffer), PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003426 );
3427
3428 Py_DECREF(buffer);
3429
3430 if (!res) {
3431 Py_DECREF(reader);
3432 return NULL;
3433 }
3434 Py_DECREF(res);
3435
3436 }
3437
3438 Py_DECREF(reader);
3439
3440 res = expat_parse(self, "", 0, 1);
3441
3442 if (res && TreeBuilder_CheckExact(self->target)) {
3443 Py_DECREF(res);
3444 return treebuilder_done((TreeBuilderObject*) self->target);
3445 }
3446
3447 return res;
3448}
3449
3450static PyObject*
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003451xmlparser_doctype(XMLParserObject *self, PyObject *args)
3452{
3453 Py_RETURN_NONE;
3454}
3455
3456static PyObject*
3457xmlparser_setevents(XMLParserObject *self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003458{
3459 /* activate element event reporting */
3460
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003461 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003462 TreeBuilderObject* target;
3463
3464 PyObject* events; /* event collector */
3465 PyObject* event_set = Py_None;
3466 if (!PyArg_ParseTuple(args, "O!|O:_setevents", &PyList_Type, &events,
3467 &event_set))
3468 return NULL;
3469
3470 if (!TreeBuilder_CheckExact(self->target)) {
3471 PyErr_SetString(
3472 PyExc_TypeError,
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003473 "event handling only supported for ElementTree.TreeBuilder "
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003474 "targets"
3475 );
3476 return NULL;
3477 }
3478
3479 target = (TreeBuilderObject*) self->target;
3480
3481 Py_INCREF(events);
3482 Py_XDECREF(target->events);
3483 target->events = events;
3484
3485 /* clear out existing events */
Antoine Pitrouc1948842012-10-01 23:40:37 +02003486 Py_CLEAR(target->start_event_obj);
3487 Py_CLEAR(target->end_event_obj);
3488 Py_CLEAR(target->start_ns_event_obj);
3489 Py_CLEAR(target->end_ns_event_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003490
3491 if (event_set == Py_None) {
3492 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003493 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003494 Py_RETURN_NONE;
3495 }
3496
3497 if (!PyTuple_Check(event_set)) /* FIXME: handle arbitrary sequences */
3498 goto error;
3499
3500 for (i = 0; i < PyTuple_GET_SIZE(event_set); i++) {
3501 PyObject* item = PyTuple_GET_ITEM(event_set, i);
3502 char* event;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003503 if (PyUnicode_Check(item)) {
3504 event = _PyUnicode_AsString(item);
Victor Stinner0477bf32010-03-22 12:11:44 +00003505 if (event == NULL)
3506 goto error;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003507 } else if (PyBytes_Check(item))
3508 event = PyBytes_AS_STRING(item);
3509 else {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003510 goto error;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003511 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003512 if (strcmp(event, "start") == 0) {
3513 Py_INCREF(item);
3514 target->start_event_obj = item;
3515 } else if (strcmp(event, "end") == 0) {
3516 Py_INCREF(item);
3517 Py_XDECREF(target->end_event_obj);
3518 target->end_event_obj = item;
3519 } else if (strcmp(event, "start-ns") == 0) {
3520 Py_INCREF(item);
3521 Py_XDECREF(target->start_ns_event_obj);
3522 target->start_ns_event_obj = item;
3523 EXPAT(SetNamespaceDeclHandler)(
3524 self->parser,
3525 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3526 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3527 );
3528 } else if (strcmp(event, "end-ns") == 0) {
3529 Py_INCREF(item);
3530 Py_XDECREF(target->end_ns_event_obj);
3531 target->end_ns_event_obj = item;
3532 EXPAT(SetNamespaceDeclHandler)(
3533 self->parser,
3534 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3535 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3536 );
3537 } else {
3538 PyErr_Format(
3539 PyExc_ValueError,
3540 "unknown event '%s'", event
3541 );
3542 return NULL;
3543 }
3544 }
3545
3546 Py_RETURN_NONE;
3547
3548 error:
3549 PyErr_SetString(
3550 PyExc_TypeError,
3551 "invalid event tuple"
3552 );
3553 return NULL;
3554}
3555
3556static PyMethodDef xmlparser_methods[] = {
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003557 {"feed", (PyCFunction) xmlparser_feed, METH_O},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003558 {"close", (PyCFunction) xmlparser_close, METH_VARARGS},
3559 {"_parse", (PyCFunction) xmlparser_parse, METH_VARARGS},
3560 {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003561 {"doctype", (PyCFunction) xmlparser_doctype, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003562 {NULL, NULL}
3563};
3564
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003565static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003566xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003567{
Alexander Belopolskye239d232010-12-08 23:31:48 +00003568 if (PyUnicode_Check(nameobj)) {
3569 PyObject* res;
3570 if (PyUnicode_CompareWithASCIIString(nameobj, "entity") == 0)
3571 res = self->entity;
3572 else if (PyUnicode_CompareWithASCIIString(nameobj, "target") == 0)
3573 res = self->target;
3574 else if (PyUnicode_CompareWithASCIIString(nameobj, "version") == 0) {
3575 return PyUnicode_FromFormat(
3576 "Expat %d.%d.%d", XML_MAJOR_VERSION,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003577 XML_MINOR_VERSION, XML_MICRO_VERSION);
Alexander Belopolskye239d232010-12-08 23:31:48 +00003578 }
3579 else
3580 goto generic;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003581
Alexander Belopolskye239d232010-12-08 23:31:48 +00003582 Py_INCREF(res);
3583 return res;
3584 }
3585 generic:
3586 return PyObject_GenericGetAttr((PyObject*) self, nameobj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003587}
3588
Neal Norwitz227b5332006-03-22 09:28:35 +00003589static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003590 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08003591 "xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003592 /* methods */
Eli Bendersky52467b12012-06-01 07:13:08 +03003593 (destructor)xmlparser_dealloc, /* tp_dealloc */
3594 0, /* tp_print */
3595 0, /* tp_getattr */
3596 0, /* tp_setattr */
3597 0, /* tp_reserved */
3598 0, /* tp_repr */
3599 0, /* tp_as_number */
3600 0, /* tp_as_sequence */
3601 0, /* tp_as_mapping */
3602 0, /* tp_hash */
3603 0, /* tp_call */
3604 0, /* tp_str */
3605 (getattrofunc)xmlparser_getattro, /* tp_getattro */
3606 0, /* tp_setattro */
3607 0, /* tp_as_buffer */
3608 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3609 /* tp_flags */
3610 0, /* tp_doc */
3611 (traverseproc)xmlparser_gc_traverse, /* tp_traverse */
3612 (inquiry)xmlparser_gc_clear, /* tp_clear */
3613 0, /* tp_richcompare */
3614 0, /* tp_weaklistoffset */
3615 0, /* tp_iter */
3616 0, /* tp_iternext */
3617 xmlparser_methods, /* tp_methods */
3618 0, /* tp_members */
3619 0, /* tp_getset */
3620 0, /* tp_base */
3621 0, /* tp_dict */
3622 0, /* tp_descr_get */
3623 0, /* tp_descr_set */
3624 0, /* tp_dictoffset */
3625 (initproc)xmlparser_init, /* tp_init */
3626 PyType_GenericAlloc, /* tp_alloc */
3627 xmlparser_new, /* tp_new */
3628 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003629};
3630
3631#endif
3632
3633/* ==================================================================== */
3634/* python module interface */
3635
3636static PyMethodDef _functions[] = {
Eli Benderskya8736902013-01-05 06:26:39 -08003637 {"SubElement", (PyCFunction) subelement, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003638 {NULL, NULL}
3639};
3640
Martin v. Löwis1a214512008-06-11 05:26:20 +00003641
3642static struct PyModuleDef _elementtreemodule = {
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003643 PyModuleDef_HEAD_INIT,
3644 "_elementtree",
3645 NULL,
3646 -1,
3647 _functions,
3648 NULL,
3649 NULL,
3650 NULL,
3651 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00003652};
3653
Neal Norwitzf6657e62006-12-28 04:47:50 +00003654PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00003655PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003656{
Eli Bendersky64d11e62012-06-15 07:42:50 +03003657 PyObject *m, *temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003658
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003659 /* Initialize object types */
Ronald Oussoren138d0802013-07-19 11:11:25 +02003660 if (PyType_Ready(&ElementIter_Type) < 0)
3661 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003662 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003663 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003664 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003665 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003666#if defined(USE_EXPAT)
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003667 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003668 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003669#endif
3670
Martin v. Löwis1a214512008-06-11 05:26:20 +00003671 m = PyModule_Create(&_elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003672 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00003673 return NULL;
3674
Eli Bendersky828efde2012-04-05 05:40:58 +03003675 if (!(temp = PyImport_ImportModule("copy")))
3676 return NULL;
3677 elementtree_deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
3678 Py_XDECREF(temp);
3679
3680 if (!(elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
3681 return NULL;
3682
Eli Bendersky20d41742012-06-01 09:48:37 +03003683 /* link against pyexpat */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003684 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
3685 if (expat_capi) {
3686 /* check that it's usable */
3687 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
3688 expat_capi->size < sizeof(struct PyExpat_CAPI) ||
3689 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
3690 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
Eli Bendersky52467b12012-06-01 07:13:08 +03003691 expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
Eli Benderskyef391ac2012-07-21 20:28:46 +03003692 PyErr_SetString(PyExc_ImportError,
3693 "pyexpat version is incompatible");
3694 return NULL;
Eli Bendersky52467b12012-06-01 07:13:08 +03003695 }
Eli Benderskyef391ac2012-07-21 20:28:46 +03003696 } else {
Eli Bendersky52467b12012-06-01 07:13:08 +03003697 return NULL;
Eli Benderskyef391ac2012-07-21 20:28:46 +03003698 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003699
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003700 elementtree_parseerror_obj = PyErr_NewException(
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003701 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003702 );
3703 Py_INCREF(elementtree_parseerror_obj);
3704 PyModule_AddObject(m, "ParseError", elementtree_parseerror_obj);
3705
Eli Bendersky092af1f2012-03-04 07:14:03 +02003706 Py_INCREF((PyObject *)&Element_Type);
3707 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
3708
Eli Bendersky58d548d2012-05-29 15:45:16 +03003709 Py_INCREF((PyObject *)&TreeBuilder_Type);
3710 PyModule_AddObject(m, "TreeBuilder", (PyObject *)&TreeBuilder_Type);
3711
Eli Bendersky52467b12012-06-01 07:13:08 +03003712#if defined(USE_EXPAT)
3713 Py_INCREF((PyObject *)&XMLParser_Type);
3714 PyModule_AddObject(m, "XMLParser", (PyObject *)&XMLParser_Type);
3715#endif
3716
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003717 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003718}