blob: c179e965da5ea62ea20fb92b704e181818f6b8e0 [file] [log] [blame]
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001/*
2 * ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003 * $Id: _elementtree.c 3473 2009-01-11 22:53:55Z fredrik $
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
5 * elementtree accelerator
6 *
7 * History:
8 * 1999-06-20 fl created (as part of sgmlop)
9 * 2001-05-29 fl effdom edition
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000010 * 2003-02-27 fl elementtree edition (alpha)
11 * 2004-06-03 fl updates for elementtree 1.2
Florent Xiclunaf15351d2010-03-13 23:24:31 +000012 * 2005-01-05 fl major optimization effort
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000013 * 2005-01-11 fl first public release (cElementTree 0.8)
14 * 2005-01-12 fl split element object into base and extras
15 * 2005-01-13 fl use tagged pointers for tail/text (cElementTree 0.9)
16 * 2005-01-17 fl added treebuilder close method
17 * 2005-01-17 fl fixed crash in getchildren
18 * 2005-01-18 fl removed observer api, added iterparse (cElementTree 0.9.3)
19 * 2005-01-23 fl revised iterparse api; added namespace event support (0.9.8)
20 * 2005-01-26 fl added VERSION module property (cElementTree 1.0)
21 * 2005-01-28 fl added remove method (1.0.1)
22 * 2005-03-01 fl added iselement function; fixed makeelement aliasing (1.0.2)
23 * 2005-03-13 fl export Comment and ProcessingInstruction/PI helpers
24 * 2005-03-26 fl added Comment and PI support to XMLParser
25 * 2005-03-27 fl event optimizations; complain about bogus events
26 * 2005-08-08 fl fixed read error handling in parse
27 * 2005-08-11 fl added runtime test for copy workaround (1.0.3)
28 * 2005-12-13 fl added expat_capi support (for xml.etree) (1.0.4)
29 * 2005-12-16 fl added support for non-standard encodings
Fredrik Lundh44ed4db2006-03-12 21:06:35 +000030 * 2006-03-08 fl fixed a couple of potential null-refs and leaks
31 * 2006-03-12 fl merge in 2.5 ssize_t changes
Florent Xiclunaf15351d2010-03-13 23:24:31 +000032 * 2007-08-25 fl call custom builder's close method from XMLParser
33 * 2007-08-31 fl added iter, extend from ET 1.3
34 * 2007-09-01 fl fixed ParseError exception, setslice source type, etc
35 * 2007-09-03 fl fixed handling of negative insert indexes
36 * 2007-09-04 fl added itertext from ET 1.3
37 * 2007-09-06 fl added position attribute to ParseError exception
38 * 2008-06-06 fl delay error reporting in iterparse (from Hrvoje Niksic)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000039 *
Florent Xiclunaf15351d2010-03-13 23:24:31 +000040 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
41 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000042 *
43 * info@pythonware.com
44 * http://www.pythonware.com
45 */
46
Fredrik Lundh6d52b552005-12-16 22:06:43 +000047/* Licensed to PSF under a Contributor Agreement. */
Florent Xiclunaf15351d2010-03-13 23:24:31 +000048/* See http://www.python.org/psf/license for licensing details. */
Fredrik Lundh6d52b552005-12-16 22:06:43 +000049
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000050#include "Python.h"
Eli Benderskyebf37a22012-04-03 22:02:37 +030051#include "structmember.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000052
Thomas Wouters00ee7ba2006-08-21 19:07:27 +000053#define VERSION "1.0.6"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000054
55/* -------------------------------------------------------------------- */
56/* configuration */
57
58/* Leave defined to include the expat-based XMLParser type */
59#define USE_EXPAT
60
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000061/* An element can hold this many children without extra memory
62 allocations. */
63#define STATIC_CHILDREN 4
64
65/* For best performance, chose a value so that 80-90% of all nodes
66 have no more than the given number of children. Set this to zero
67 to minimize the size of the element structure itself (this only
68 helps if you have lots of leaf nodes with attributes). */
69
70/* Also note that pymalloc always allocates blocks in multiples of
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010071 eight bytes. For the current C version of ElementTree, this means
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000072 that the number of children should be an even number, at least on
73 32-bit platforms. */
74
75/* -------------------------------------------------------------------- */
76
77#if 0
78static int memory = 0;
79#define ALLOC(size, comment)\
80do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
81#define RELEASE(size, comment)\
82do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
83#else
84#define ALLOC(size, comment)
85#define RELEASE(size, comment)
86#endif
87
88/* compiler tweaks */
89#if defined(_MSC_VER)
90#define LOCAL(type) static __inline type __fastcall
91#else
92#define LOCAL(type) static type
93#endif
94
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000095/* macros used to store 'join' flags in string object pointers. note
96 that all use of text and tail as object pointers must be wrapped in
97 JOIN_OBJ. see comments in the ElementObject definition for more
98 info. */
99#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
100#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
Antoine Pitrouca8aa4a2012-09-20 20:56:47 +0200101#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~(Py_uintptr_t)1))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000102
Ronald Oussoren138d0802013-07-19 11:11:25 +0200103/* Types defined by this extension */
104static PyTypeObject Element_Type;
105static PyTypeObject ElementIter_Type;
106static PyTypeObject TreeBuilder_Type;
107static PyTypeObject XMLParser_Type;
108
109
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000110/* glue functions (see the init function for details) */
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000111static PyObject* elementtree_parseerror_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000112static PyObject* elementtree_deepcopy_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000113static PyObject* elementpath_obj;
114
115/* helpers */
116
117LOCAL(PyObject*)
118deepcopy(PyObject* object, PyObject* memo)
119{
120 /* do a deep copy of the given object */
121
122 PyObject* args;
123 PyObject* result;
124
125 if (!elementtree_deepcopy_obj) {
126 PyErr_SetString(
127 PyExc_RuntimeError,
128 "deepcopy helper not found"
129 );
130 return NULL;
131 }
132
Antoine Pitrouc1948842012-10-01 23:40:37 +0200133 args = PyTuple_Pack(2, object, memo);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000134 if (!args)
135 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000136 result = PyObject_CallObject(elementtree_deepcopy_obj, args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000137 Py_DECREF(args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000138 return result;
139}
140
141LOCAL(PyObject*)
142list_join(PyObject* list)
143{
144 /* join list elements (destroying the list in the process) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000145 PyObject* joiner;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000146 PyObject* result;
147
Antoine Pitrouc1948842012-10-01 23:40:37 +0200148 joiner = PyUnicode_FromStringAndSize("", 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000149 if (!joiner)
150 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200151 result = PyUnicode_Join(joiner, list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000152 Py_DECREF(joiner);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200153 if (result)
154 Py_DECREF(list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000155 return result;
156}
157
Eli Bendersky48d358b2012-05-30 17:57:50 +0300158/* Is the given object an empty dictionary?
159*/
160static int
161is_empty_dict(PyObject *obj)
162{
163 return PyDict_CheckExact(obj) && PyDict_Size(obj) == 0;
164}
165
166
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000167/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200168/* the Element type */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000169
170typedef struct {
171
172 /* attributes (a dictionary object), or None if no attributes */
173 PyObject* attrib;
174
175 /* child elements */
176 int length; /* actual number of items */
177 int allocated; /* allocated items */
178
179 /* this either points to _children or to a malloced buffer */
180 PyObject* *children;
181
182 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100183
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000184} ElementObjectExtra;
185
186typedef struct {
187 PyObject_HEAD
188
189 /* element tag (a string). */
190 PyObject* tag;
191
192 /* text before first child. note that this is a tagged pointer;
193 use JOIN_OBJ to get the object pointer. the join flag is used
194 to distinguish lists created by the tree builder from lists
195 assigned to the attribute by application code; the former
196 should be joined before being returned to the user, the latter
197 should be left intact. */
198 PyObject* text;
199
200 /* text after this element, in parent. note that this is a tagged
201 pointer; use JOIN_OBJ to get the object pointer. */
202 PyObject* tail;
203
204 ElementObjectExtra* extra;
205
Eli Benderskyebf37a22012-04-03 22:02:37 +0300206 PyObject *weakreflist; /* For tp_weaklistoffset */
207
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000208} ElementObject;
209
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000210
Christian Heimes90aa7642007-12-19 02:45:37 +0000211#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000212
213/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200214/* Element constructors and destructor */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000215
216LOCAL(int)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200217create_extra(ElementObject* self, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000218{
219 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
220 if (!self->extra)
221 return -1;
222
223 if (!attrib)
224 attrib = Py_None;
225
226 Py_INCREF(attrib);
227 self->extra->attrib = attrib;
228
229 self->extra->length = 0;
230 self->extra->allocated = STATIC_CHILDREN;
231 self->extra->children = self->extra->_children;
232
233 return 0;
234}
235
236LOCAL(void)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200237dealloc_extra(ElementObject* self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000238{
Eli Bendersky08b85292012-04-04 15:55:07 +0300239 ElementObjectExtra *myextra;
240 int i;
241
Eli Benderskyebf37a22012-04-03 22:02:37 +0300242 if (!self->extra)
243 return;
244
245 /* Avoid DECREFs calling into this code again (cycles, etc.)
246 */
Eli Bendersky08b85292012-04-04 15:55:07 +0300247 myextra = self->extra;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300248 self->extra = NULL;
249
250 Py_DECREF(myextra->attrib);
251
Eli Benderskyebf37a22012-04-03 22:02:37 +0300252 for (i = 0; i < myextra->length; i++)
253 Py_DECREF(myextra->children[i]);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000254
Eli Benderskyebf37a22012-04-03 22:02:37 +0300255 if (myextra->children != myextra->_children)
256 PyObject_Free(myextra->children);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000257
Eli Benderskyebf37a22012-04-03 22:02:37 +0300258 PyObject_Free(myextra);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000259}
260
Eli Bendersky092af1f2012-03-04 07:14:03 +0200261/* Convenience internal function to create new Element objects with the given
262 * tag and attributes.
263*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000264LOCAL(PyObject*)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200265create_new_element(PyObject* tag, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000266{
267 ElementObject* self;
268
Eli Bendersky0192ba32012-03-30 16:38:33 +0300269 self = PyObject_GC_New(ElementObject, &Element_Type);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000270 if (self == NULL)
271 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000272 self->extra = NULL;
273
Eli Bendersky48d358b2012-05-30 17:57:50 +0300274 if (attrib != Py_None && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200275 if (create_extra(self, attrib) < 0) {
Thomas Wouters477c8d52006-05-27 19:21:47 +0000276 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000277 return NULL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000278 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000279 }
280
281 Py_INCREF(tag);
282 self->tag = tag;
283
284 Py_INCREF(Py_None);
285 self->text = Py_None;
286
287 Py_INCREF(Py_None);
288 self->tail = Py_None;
289
Eli Benderskyebf37a22012-04-03 22:02:37 +0300290 self->weakreflist = NULL;
291
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000292 ALLOC(sizeof(ElementObject), "create element");
Eli Bendersky0192ba32012-03-30 16:38:33 +0300293 PyObject_GC_Track(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000294 return (PyObject*) self;
295}
296
Eli Bendersky092af1f2012-03-04 07:14:03 +0200297static PyObject *
298element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
299{
300 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
301 if (e != NULL) {
302 Py_INCREF(Py_None);
303 e->tag = Py_None;
304
305 Py_INCREF(Py_None);
306 e->text = Py_None;
307
308 Py_INCREF(Py_None);
309 e->tail = Py_None;
310
311 e->extra = NULL;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300312 e->weakreflist = NULL;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200313 }
314 return (PyObject *)e;
315}
316
Eli Bendersky737b1732012-05-29 06:02:56 +0300317/* Helper function for extracting the attrib dictionary from a keywords dict.
318 * This is required by some constructors/functions in this module that can
Eli Bendersky45839902013-01-13 05:14:47 -0800319 * either accept attrib as a keyword argument or all attributes splashed
Eli Bendersky737b1732012-05-29 06:02:56 +0300320 * directly into *kwds.
Eli Benderskyd4cb4b72013-04-22 05:25:25 -0700321 *
322 * Return a dictionary with the content of kwds merged into the content of
323 * attrib. If there is no attrib keyword, return a copy of kwds.
Eli Bendersky737b1732012-05-29 06:02:56 +0300324 */
325static PyObject*
326get_attrib_from_keywords(PyObject *kwds)
327{
328 PyObject *attrib_str = PyUnicode_FromString("attrib");
329 PyObject *attrib = PyDict_GetItem(kwds, attrib_str);
330
331 if (attrib) {
332 /* If attrib was found in kwds, copy its value and remove it from
333 * kwds
334 */
335 if (!PyDict_Check(attrib)) {
336 Py_DECREF(attrib_str);
337 PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
338 Py_TYPE(attrib)->tp_name);
339 return NULL;
340 }
341 attrib = PyDict_Copy(attrib);
342 PyDict_DelItem(kwds, attrib_str);
343 } else {
344 attrib = PyDict_New();
345 }
346
347 Py_DECREF(attrib_str);
348
349 if (attrib)
Christian Heimes7ed42942013-07-20 15:12:09 +0200350 if (PyDict_Update(attrib, kwds) < 0)
351 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300352 return attrib;
353}
354
Eli Bendersky092af1f2012-03-04 07:14:03 +0200355static int
356element_init(PyObject *self, PyObject *args, PyObject *kwds)
357{
358 PyObject *tag;
359 PyObject *tmp;
360 PyObject *attrib = NULL;
361 ElementObject *self_elem;
362
363 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
364 return -1;
365
Eli Bendersky737b1732012-05-29 06:02:56 +0300366 if (attrib) {
367 /* attrib passed as positional arg */
368 attrib = PyDict_Copy(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200369 if (!attrib)
370 return -1;
Eli Bendersky737b1732012-05-29 06:02:56 +0300371 if (kwds) {
372 if (PyDict_Update(attrib, kwds) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200373 Py_DECREF(attrib);
Eli Bendersky737b1732012-05-29 06:02:56 +0300374 return -1;
375 }
376 }
377 } else if (kwds) {
378 /* have keywords args */
379 attrib = get_attrib_from_keywords(kwds);
380 if (!attrib)
381 return -1;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200382 }
383
384 self_elem = (ElementObject *)self;
385
Antoine Pitrouc1948842012-10-01 23:40:37 +0200386 if (attrib != NULL && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200387 if (create_extra(self_elem, attrib) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200388 Py_DECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200389 return -1;
390 }
391 }
392
Eli Bendersky48d358b2012-05-30 17:57:50 +0300393 /* We own a reference to attrib here and it's no longer needed. */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200394 Py_XDECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200395
396 /* Replace the objects already pointed to by tag, text and tail. */
397 tmp = self_elem->tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200398 Py_INCREF(tag);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200399 self_elem->tag = tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200400 Py_DECREF(tmp);
401
402 tmp = self_elem->text;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200403 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200404 self_elem->text = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200405 Py_DECREF(JOIN_OBJ(tmp));
406
407 tmp = self_elem->tail;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200408 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200409 self_elem->tail = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200410 Py_DECREF(JOIN_OBJ(tmp));
411
412 return 0;
413}
414
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000415LOCAL(int)
416element_resize(ElementObject* self, int extra)
417{
418 int size;
419 PyObject* *children;
420
421 /* make sure self->children can hold the given number of extra
422 elements. set an exception and return -1 if allocation failed */
423
424 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200425 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000426
427 size = self->extra->length + extra;
428
429 if (size > self->extra->allocated) {
430 /* use Python 2.4's list growth strategy */
431 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000432 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100433 * which needs at least 4 bytes.
434 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000435 * be safe.
436 */
437 size = size ? size : 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000438 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000439 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100440 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000441 * false alarm always assume at least one child to be safe.
442 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000443 children = PyObject_Realloc(self->extra->children,
444 size * sizeof(PyObject*));
445 if (!children)
446 goto nomemory;
447 } else {
448 children = PyObject_Malloc(size * sizeof(PyObject*));
449 if (!children)
450 goto nomemory;
451 /* copy existing children from static area to malloc buffer */
452 memcpy(children, self->extra->children,
453 self->extra->length * sizeof(PyObject*));
454 }
455 self->extra->children = children;
456 self->extra->allocated = size;
457 }
458
459 return 0;
460
461 nomemory:
462 PyErr_NoMemory();
463 return -1;
464}
465
466LOCAL(int)
467element_add_subelement(ElementObject* self, PyObject* element)
468{
469 /* add a child element to a parent */
470
471 if (element_resize(self, 1) < 0)
472 return -1;
473
474 Py_INCREF(element);
475 self->extra->children[self->extra->length] = element;
476
477 self->extra->length++;
478
479 return 0;
480}
481
482LOCAL(PyObject*)
483element_get_attrib(ElementObject* self)
484{
485 /* return borrowed reference to attrib dictionary */
486 /* note: this function assumes that the extra section exists */
487
488 PyObject* res = self->extra->attrib;
489
490 if (res == Py_None) {
491 /* create missing dictionary */
492 res = PyDict_New();
493 if (!res)
494 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200495 Py_DECREF(Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000496 self->extra->attrib = res;
497 }
498
499 return res;
500}
501
502LOCAL(PyObject*)
503element_get_text(ElementObject* self)
504{
505 /* return borrowed reference to text attribute */
506
507 PyObject* res = self->text;
508
509 if (JOIN_GET(res)) {
510 res = JOIN_OBJ(res);
511 if (PyList_CheckExact(res)) {
512 res = list_join(res);
513 if (!res)
514 return NULL;
515 self->text = res;
516 }
517 }
518
519 return res;
520}
521
522LOCAL(PyObject*)
523element_get_tail(ElementObject* self)
524{
525 /* return borrowed reference to text attribute */
526
527 PyObject* res = self->tail;
528
529 if (JOIN_GET(res)) {
530 res = JOIN_OBJ(res);
531 if (PyList_CheckExact(res)) {
532 res = list_join(res);
533 if (!res)
534 return NULL;
535 self->tail = res;
536 }
537 }
538
539 return res;
540}
541
542static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300543subelement(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000544{
545 PyObject* elem;
546
547 ElementObject* parent;
548 PyObject* tag;
549 PyObject* attrib = NULL;
550 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
551 &Element_Type, &parent, &tag,
552 &PyDict_Type, &attrib))
553 return NULL;
554
Eli Bendersky737b1732012-05-29 06:02:56 +0300555 if (attrib) {
556 /* attrib passed as positional arg */
557 attrib = PyDict_Copy(attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000558 if (!attrib)
559 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300560 if (kwds) {
561 if (PyDict_Update(attrib, kwds) < 0) {
562 return NULL;
563 }
564 }
565 } else if (kwds) {
566 /* have keyword args */
567 attrib = get_attrib_from_keywords(kwds);
568 if (!attrib)
569 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000570 } else {
Eli Bendersky737b1732012-05-29 06:02:56 +0300571 /* no attrib arg, no kwds, so no attribute */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000572 Py_INCREF(Py_None);
573 attrib = Py_None;
574 }
575
Eli Bendersky092af1f2012-03-04 07:14:03 +0200576 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000577
578 Py_DECREF(attrib);
579
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000580 if (element_add_subelement(parent, elem) < 0) {
581 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000582 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000583 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000584
585 return elem;
586}
587
Eli Bendersky0192ba32012-03-30 16:38:33 +0300588static int
589element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
590{
591 Py_VISIT(self->tag);
592 Py_VISIT(JOIN_OBJ(self->text));
593 Py_VISIT(JOIN_OBJ(self->tail));
594
595 if (self->extra) {
596 int i;
597 Py_VISIT(self->extra->attrib);
598
599 for (i = 0; i < self->extra->length; ++i)
600 Py_VISIT(self->extra->children[i]);
601 }
602 return 0;
603}
604
605static int
606element_gc_clear(ElementObject *self)
607{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300608 Py_CLEAR(self->tag);
Eli Benderskyebf37a22012-04-03 22:02:37 +0300609
610 /* The following is like Py_CLEAR for self->text and self->tail, but
611 * written explicitily because the real pointers hide behind access
612 * macros.
613 */
614 if (self->text) {
615 PyObject *tmp = JOIN_OBJ(self->text);
616 self->text = NULL;
617 Py_DECREF(tmp);
618 }
619
620 if (self->tail) {
621 PyObject *tmp = JOIN_OBJ(self->tail);
622 self->tail = NULL;
623 Py_DECREF(tmp);
624 }
Eli Bendersky0192ba32012-03-30 16:38:33 +0300625
626 /* After dropping all references from extra, it's no longer valid anyway,
Eli Benderskyebf37a22012-04-03 22:02:37 +0300627 * so fully deallocate it.
Eli Bendersky0192ba32012-03-30 16:38:33 +0300628 */
Eli Benderskyebf37a22012-04-03 22:02:37 +0300629 dealloc_extra(self);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300630 return 0;
631}
632
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000633static void
634element_dealloc(ElementObject* self)
635{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300636 PyObject_GC_UnTrack(self);
Eli Benderskyebf37a22012-04-03 22:02:37 +0300637
638 if (self->weakreflist != NULL)
639 PyObject_ClearWeakRefs((PyObject *) self);
640
Eli Bendersky0192ba32012-03-30 16:38:33 +0300641 /* element_gc_clear clears all references and deallocates extra
642 */
643 element_gc_clear(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000644
645 RELEASE(sizeof(ElementObject), "destroy element");
Eli Bendersky092af1f2012-03-04 07:14:03 +0200646 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000647}
648
649/* -------------------------------------------------------------------- */
650/* methods (in alphabetical order) */
651
652static PyObject*
653element_append(ElementObject* self, PyObject* args)
654{
655 PyObject* element;
656 if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element))
657 return NULL;
658
659 if (element_add_subelement(self, element) < 0)
660 return NULL;
661
662 Py_RETURN_NONE;
663}
664
665static PyObject*
Eli Bendersky0192ba32012-03-30 16:38:33 +0300666element_clearmethod(ElementObject* self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000667{
668 if (!PyArg_ParseTuple(args, ":clear"))
669 return NULL;
670
Eli Benderskyebf37a22012-04-03 22:02:37 +0300671 dealloc_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000672
673 Py_INCREF(Py_None);
674 Py_DECREF(JOIN_OBJ(self->text));
675 self->text = Py_None;
676
677 Py_INCREF(Py_None);
678 Py_DECREF(JOIN_OBJ(self->tail));
679 self->tail = Py_None;
680
681 Py_RETURN_NONE;
682}
683
684static PyObject*
685element_copy(ElementObject* self, PyObject* args)
686{
687 int i;
688 ElementObject* element;
689
690 if (!PyArg_ParseTuple(args, ":__copy__"))
691 return NULL;
692
Eli Bendersky092af1f2012-03-04 07:14:03 +0200693 element = (ElementObject*) create_new_element(
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000694 self->tag, (self->extra) ? self->extra->attrib : Py_None
695 );
696 if (!element)
697 return NULL;
698
699 Py_DECREF(JOIN_OBJ(element->text));
700 element->text = self->text;
701 Py_INCREF(JOIN_OBJ(element->text));
702
703 Py_DECREF(JOIN_OBJ(element->tail));
704 element->tail = self->tail;
705 Py_INCREF(JOIN_OBJ(element->tail));
706
707 if (self->extra) {
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100708
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000709 if (element_resize(element, self->extra->length) < 0) {
710 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000711 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000712 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000713
714 for (i = 0; i < self->extra->length; i++) {
715 Py_INCREF(self->extra->children[i]);
716 element->extra->children[i] = self->extra->children[i];
717 }
718
719 element->extra->length = self->extra->length;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100720
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000721 }
722
723 return (PyObject*) element;
724}
725
726static PyObject*
727element_deepcopy(ElementObject* self, PyObject* args)
728{
729 int i;
730 ElementObject* element;
731 PyObject* tag;
732 PyObject* attrib;
733 PyObject* text;
734 PyObject* tail;
735 PyObject* id;
736
737 PyObject* memo;
738 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
739 return NULL;
740
741 tag = deepcopy(self->tag, memo);
742 if (!tag)
743 return NULL;
744
745 if (self->extra) {
746 attrib = deepcopy(self->extra->attrib, memo);
747 if (!attrib) {
748 Py_DECREF(tag);
749 return NULL;
750 }
751 } else {
752 Py_INCREF(Py_None);
753 attrib = Py_None;
754 }
755
Eli Bendersky092af1f2012-03-04 07:14:03 +0200756 element = (ElementObject*) create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000757
758 Py_DECREF(tag);
759 Py_DECREF(attrib);
760
761 if (!element)
762 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100763
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000764 text = deepcopy(JOIN_OBJ(self->text), memo);
765 if (!text)
766 goto error;
767 Py_DECREF(element->text);
768 element->text = JOIN_SET(text, JOIN_GET(self->text));
769
770 tail = deepcopy(JOIN_OBJ(self->tail), memo);
771 if (!tail)
772 goto error;
773 Py_DECREF(element->tail);
774 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
775
776 if (self->extra) {
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100777
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000778 if (element_resize(element, self->extra->length) < 0)
779 goto error;
780
781 for (i = 0; i < self->extra->length; i++) {
782 PyObject* child = deepcopy(self->extra->children[i], memo);
783 if (!child) {
784 element->extra->length = i;
785 goto error;
786 }
787 element->extra->children[i] = child;
788 }
789
790 element->extra->length = self->extra->length;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100791
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000792 }
793
794 /* add object to memo dictionary (so deepcopy won't visit it again) */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200795 id = PyLong_FromSsize_t((Py_uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000796 if (!id)
797 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000798
799 i = PyDict_SetItem(memo, id, (PyObject*) element);
800
801 Py_DECREF(id);
802
803 if (i < 0)
804 goto error;
805
806 return (PyObject*) element;
807
808 error:
809 Py_DECREF(element);
810 return NULL;
811}
812
Martin v. Löwisbce16662012-06-17 10:41:22 +0200813static PyObject*
814element_sizeof(PyObject* _self, PyObject* args)
815{
816 ElementObject *self = (ElementObject*)_self;
817 Py_ssize_t result = sizeof(ElementObject);
818 if (self->extra) {
819 result += sizeof(ElementObjectExtra);
820 if (self->extra->children != self->extra->_children)
821 result += sizeof(PyObject*) * self->extra->allocated;
822 }
823 return PyLong_FromSsize_t(result);
824}
825
Eli Bendersky698bdb22013-01-10 06:01:06 -0800826/* dict keys for getstate/setstate. */
827#define PICKLED_TAG "tag"
828#define PICKLED_CHILDREN "_children"
829#define PICKLED_ATTRIB "attrib"
830#define PICKLED_TAIL "tail"
831#define PICKLED_TEXT "text"
832
833/* __getstate__ returns a fabricated instance dict as in the pure-Python
834 * Element implementation, for interoperability/interchangeability. This
835 * makes the pure-Python implementation details an API, but (a) there aren't
836 * any unnecessary structures there; and (b) it buys compatibility with 3.2
837 * pickles. See issue #16076.
838 */
839static PyObject *
840element_getstate(ElementObject *self)
841{
842 int i, noattrib;
843 PyObject *instancedict = NULL, *children;
844
845 /* Build a list of children. */
846 children = PyList_New(self->extra ? self->extra->length : 0);
847 if (!children)
848 return NULL;
849 for (i = 0; i < PyList_GET_SIZE(children); i++) {
850 PyObject *child = self->extra->children[i];
851 Py_INCREF(child);
852 PyList_SET_ITEM(children, i, child);
853 }
854
855 /* Construct the state object. */
856 noattrib = (self->extra == NULL || self->extra->attrib == Py_None);
857 if (noattrib)
858 instancedict = Py_BuildValue("{sOsOs{}sOsO}",
859 PICKLED_TAG, self->tag,
860 PICKLED_CHILDREN, children,
861 PICKLED_ATTRIB,
862 PICKLED_TEXT, self->text,
863 PICKLED_TAIL, self->tail);
864 else
865 instancedict = Py_BuildValue("{sOsOsOsOsO}",
866 PICKLED_TAG, self->tag,
867 PICKLED_CHILDREN, children,
868 PICKLED_ATTRIB, self->extra->attrib,
869 PICKLED_TEXT, self->text,
870 PICKLED_TAIL, self->tail);
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800871 if (instancedict) {
872 Py_DECREF(children);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800873 return instancedict;
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800874 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800875 else {
876 for (i = 0; i < PyList_GET_SIZE(children); i++)
877 Py_DECREF(PyList_GET_ITEM(children, i));
878 Py_DECREF(children);
879
880 return NULL;
881 }
882}
883
884static PyObject *
885element_setstate_from_attributes(ElementObject *self,
886 PyObject *tag,
887 PyObject *attrib,
888 PyObject *text,
889 PyObject *tail,
890 PyObject *children)
891{
892 Py_ssize_t i, nchildren;
893
894 if (!tag) {
895 PyErr_SetString(PyExc_TypeError, "tag may not be NULL");
896 return NULL;
897 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800898
899 Py_CLEAR(self->tag);
900 self->tag = tag;
901 Py_INCREF(self->tag);
902
903 Py_CLEAR(self->text);
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800904 self->text = text ? text : Py_None;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800905 Py_INCREF(self->text);
906
907 Py_CLEAR(self->tail);
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800908 self->tail = tail ? tail : Py_None;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800909 Py_INCREF(self->tail);
910
911 /* Handle ATTRIB and CHILDREN. */
912 if (!children && !attrib)
913 Py_RETURN_NONE;
914
915 /* Compute 'nchildren'. */
916 if (children) {
917 if (!PyList_Check(children)) {
918 PyErr_SetString(PyExc_TypeError, "'_children' is not a list");
919 return NULL;
920 }
921 nchildren = PyList_Size(children);
922 }
923 else {
924 nchildren = 0;
925 }
926
927 /* Allocate 'extra'. */
928 if (element_resize(self, nchildren)) {
929 return NULL;
930 }
931 assert(self->extra && self->extra->allocated >= nchildren);
932
933 /* Copy children */
934 for (i = 0; i < nchildren; i++) {
935 self->extra->children[i] = PyList_GET_ITEM(children, i);
936 Py_INCREF(self->extra->children[i]);
937 }
938
939 self->extra->length = nchildren;
940 self->extra->allocated = nchildren;
941
942 /* Stash attrib. */
943 if (attrib) {
944 Py_CLEAR(self->extra->attrib);
945 self->extra->attrib = attrib;
946 Py_INCREF(attrib);
947 }
948
949 Py_RETURN_NONE;
950}
951
952/* __setstate__ for Element instance from the Python implementation.
953 * 'state' should be the instance dict.
954 */
955static PyObject *
956element_setstate_from_Python(ElementObject *self, PyObject *state)
957{
958 static char *kwlist[] = {PICKLED_TAG, PICKLED_ATTRIB, PICKLED_TEXT,
959 PICKLED_TAIL, PICKLED_CHILDREN, 0};
960 PyObject *args;
961 PyObject *tag, *attrib, *text, *tail, *children;
Eli Bendersky799e3ed2013-01-12 05:42:38 -0800962 PyObject *retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800963
Eli Bendersky698bdb22013-01-10 06:01:06 -0800964 tag = attrib = text = tail = children = NULL;
965 args = PyTuple_New(0);
Eli Bendersky799e3ed2013-01-12 05:42:38 -0800966 if (!args)
Eli Bendersky698bdb22013-01-10 06:01:06 -0800967 return NULL;
Eli Bendersky799e3ed2013-01-12 05:42:38 -0800968
969 if (PyArg_ParseTupleAndKeywords(args, state, "|$OOOOO", kwlist, &tag,
970 &attrib, &text, &tail, &children))
971 retval = element_setstate_from_attributes(self, tag, attrib, text,
972 tail, children);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800973 else
Eli Bendersky799e3ed2013-01-12 05:42:38 -0800974 retval = NULL;
975
976 Py_DECREF(args);
977 return retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800978}
979
980static PyObject *
981element_setstate(ElementObject *self, PyObject *state)
982{
983 if (!PyDict_CheckExact(state)) {
984 PyErr_Format(PyExc_TypeError,
985 "Don't know how to unpickle \"%.200R\" as an Element",
986 state);
987 return NULL;
988 }
989 else
990 return element_setstate_from_Python(self, state);
991}
992
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000993LOCAL(int)
994checkpath(PyObject* tag)
995{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000996 Py_ssize_t i;
997 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000998
999 /* check if a tag contains an xpath character */
1000
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001001#define PATHCHAR(ch) \
1002 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001003
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001004 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001005 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
1006 void *data = PyUnicode_DATA(tag);
1007 unsigned int kind = PyUnicode_KIND(tag);
1008 for (i = 0; i < len; i++) {
1009 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1010 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001011 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001012 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001013 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001014 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001015 return 1;
1016 }
1017 return 0;
1018 }
Christian Heimes72b710a2008-05-26 13:28:38 +00001019 if (PyBytes_Check(tag)) {
1020 char *p = PyBytes_AS_STRING(tag);
1021 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001022 if (p[i] == '{')
1023 check = 0;
1024 else if (p[i] == '}')
1025 check = 1;
1026 else if (check && PATHCHAR(p[i]))
1027 return 1;
1028 }
1029 return 0;
1030 }
1031
1032 return 1; /* unknown type; might be path expression */
1033}
1034
1035static PyObject*
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001036element_extend(ElementObject* self, PyObject* args)
1037{
1038 PyObject* seq;
1039 Py_ssize_t i, seqlen = 0;
1040
1041 PyObject* seq_in;
1042 if (!PyArg_ParseTuple(args, "O:extend", &seq_in))
1043 return NULL;
1044
1045 seq = PySequence_Fast(seq_in, "");
1046 if (!seq) {
1047 PyErr_Format(
1048 PyExc_TypeError,
1049 "expected sequence, not \"%.200s\"", Py_TYPE(seq_in)->tp_name
1050 );
1051 return NULL;
1052 }
1053
1054 seqlen = PySequence_Size(seq);
1055 for (i = 0; i < seqlen; i++) {
1056 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001057 if (!PyObject_IsInstance(element, (PyObject *)&Element_Type)) {
1058 Py_DECREF(seq);
1059 PyErr_Format(
1060 PyExc_TypeError,
1061 "expected an Element, not \"%.200s\"",
1062 Py_TYPE(element)->tp_name);
1063 return NULL;
1064 }
1065
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001066 if (element_add_subelement(self, element) < 0) {
1067 Py_DECREF(seq);
1068 return NULL;
1069 }
1070 }
1071
1072 Py_DECREF(seq);
1073
1074 Py_RETURN_NONE;
1075}
1076
1077static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001078element_find(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001079{
1080 int i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001081 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001082 PyObject* namespaces = Py_None;
Eli Bendersky737b1732012-05-29 06:02:56 +03001083 static char *kwlist[] = {"path", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001084
Eli Bendersky737b1732012-05-29 06:02:56 +03001085 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:find", kwlist,
1086 &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001087 return NULL;
1088
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001089 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001090 _Py_IDENTIFIER(find);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001091 return _PyObject_CallMethodId(
1092 elementpath_obj, &PyId_find, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001093 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001094 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001095
1096 if (!self->extra)
1097 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001098
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001099 for (i = 0; i < self->extra->length; i++) {
1100 PyObject* item = self->extra->children[i];
1101 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +00001102 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001103 Py_INCREF(item);
1104 return item;
1105 }
1106 }
1107
1108 Py_RETURN_NONE;
1109}
1110
1111static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001112element_findtext(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001113{
1114 int i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001115 PyObject* tag;
1116 PyObject* default_value = Py_None;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001117 PyObject* namespaces = Py_None;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001118 _Py_IDENTIFIER(findtext);
Eli Bendersky737b1732012-05-29 06:02:56 +03001119 static char *kwlist[] = {"path", "default", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001120
Eli Bendersky737b1732012-05-29 06:02:56 +03001121 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO:findtext", kwlist,
1122 &tag, &default_value, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001123 return NULL;
1124
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001125 if (checkpath(tag) || namespaces != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001126 return _PyObject_CallMethodId(
1127 elementpath_obj, &PyId_findtext, "OOOO", self, tag, default_value, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001128 );
1129
1130 if (!self->extra) {
1131 Py_INCREF(default_value);
1132 return default_value;
1133 }
1134
1135 for (i = 0; i < self->extra->length; i++) {
1136 ElementObject* item = (ElementObject*) self->extra->children[i];
Mark Dickinson211c6252009-02-01 10:28:51 +00001137 if (Element_CheckExact(item) && (PyObject_RichCompareBool(item->tag, tag, Py_EQ) == 1)) {
1138
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001139 PyObject* text = element_get_text(item);
1140 if (text == Py_None)
Eli Bendersky25771b32013-01-13 05:26:07 -08001141 return PyUnicode_New(0, 0);
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001142 Py_XINCREF(text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001143 return text;
1144 }
1145 }
1146
1147 Py_INCREF(default_value);
1148 return default_value;
1149}
1150
1151static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001152element_findall(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001153{
1154 int i;
1155 PyObject* out;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001156 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001157 PyObject* namespaces = Py_None;
Eli Bendersky737b1732012-05-29 06:02:56 +03001158 static char *kwlist[] = {"path", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001159
Eli Bendersky737b1732012-05-29 06:02:56 +03001160 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:findall", kwlist,
1161 &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001162 return NULL;
1163
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001164 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001165 _Py_IDENTIFIER(findall);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001166 return _PyObject_CallMethodId(
1167 elementpath_obj, &PyId_findall, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001168 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001169 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001170
1171 out = PyList_New(0);
1172 if (!out)
1173 return NULL;
1174
1175 if (!self->extra)
1176 return out;
1177
1178 for (i = 0; i < self->extra->length; i++) {
1179 PyObject* item = self->extra->children[i];
1180 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +00001181 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001182 if (PyList_Append(out, item) < 0) {
1183 Py_DECREF(out);
1184 return NULL;
1185 }
1186 }
1187 }
1188
1189 return out;
1190}
1191
1192static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001193element_iterfind(ElementObject *self, PyObject *args, PyObject *kwds)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001194{
1195 PyObject* tag;
1196 PyObject* namespaces = Py_None;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001197 _Py_IDENTIFIER(iterfind);
Eli Bendersky737b1732012-05-29 06:02:56 +03001198 static char *kwlist[] = {"path", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001199
Eli Bendersky737b1732012-05-29 06:02:56 +03001200 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:iterfind", kwlist,
1201 &tag, &namespaces))
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001202 return NULL;
1203
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001204 return _PyObject_CallMethodId(
1205 elementpath_obj, &PyId_iterfind, "OOO", self, tag, namespaces
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001206 );
1207}
1208
1209static PyObject*
Eli Benderskya8736902013-01-05 06:26:39 -08001210element_get(ElementObject* self, PyObject* args, PyObject* kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001211{
1212 PyObject* value;
Eli Benderskya8736902013-01-05 06:26:39 -08001213 static char* kwlist[] = {"key", "default", 0};
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001214
1215 PyObject* key;
1216 PyObject* default_value = Py_None;
Eli Benderskya8736902013-01-05 06:26:39 -08001217
1218 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:get", kwlist, &key,
1219 &default_value))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001220 return NULL;
1221
1222 if (!self->extra || self->extra->attrib == Py_None)
1223 value = default_value;
1224 else {
1225 value = PyDict_GetItem(self->extra->attrib, key);
1226 if (!value)
1227 value = default_value;
1228 }
1229
1230 Py_INCREF(value);
1231 return value;
1232}
1233
1234static PyObject*
1235element_getchildren(ElementObject* self, PyObject* args)
1236{
1237 int i;
1238 PyObject* list;
1239
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001240 /* FIXME: report as deprecated? */
1241
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001242 if (!PyArg_ParseTuple(args, ":getchildren"))
1243 return NULL;
1244
1245 if (!self->extra)
1246 return PyList_New(0);
1247
1248 list = PyList_New(self->extra->length);
1249 if (!list)
1250 return NULL;
1251
1252 for (i = 0; i < self->extra->length; i++) {
1253 PyObject* item = self->extra->children[i];
1254 Py_INCREF(item);
1255 PyList_SET_ITEM(list, i, item);
1256 }
1257
1258 return list;
1259}
1260
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001261
Eli Bendersky64d11e62012-06-15 07:42:50 +03001262static PyObject *
1263create_elementiter(ElementObject *self, PyObject *tag, int gettext);
1264
1265
1266static PyObject *
Eli Benderskya8736902013-01-05 06:26:39 -08001267element_iter(ElementObject *self, PyObject *args, PyObject *kwds)
Eli Bendersky64d11e62012-06-15 07:42:50 +03001268{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001269 PyObject* tag = Py_None;
Eli Benderskya8736902013-01-05 06:26:39 -08001270 static char* kwlist[] = {"tag", 0};
1271
1272 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:iter", kwlist, &tag))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001273 return NULL;
1274
Eli Bendersky64d11e62012-06-15 07:42:50 +03001275 return create_elementiter(self, tag, 0);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001276}
1277
1278
1279static PyObject*
1280element_itertext(ElementObject* self, PyObject* args)
1281{
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001282 if (!PyArg_ParseTuple(args, ":itertext"))
1283 return NULL;
1284
Eli Bendersky64d11e62012-06-15 07:42:50 +03001285 return create_elementiter(self, Py_None, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001286}
1287
Eli Bendersky64d11e62012-06-15 07:42:50 +03001288
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001289static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001290element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001291{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001292 ElementObject* self = (ElementObject*) self_;
1293
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001294 if (!self->extra || index < 0 || index >= self->extra->length) {
1295 PyErr_SetString(
1296 PyExc_IndexError,
1297 "child index out of range"
1298 );
1299 return NULL;
1300 }
1301
1302 Py_INCREF(self->extra->children[index]);
1303 return self->extra->children[index];
1304}
1305
1306static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001307element_insert(ElementObject* self, PyObject* args)
1308{
1309 int i;
1310
1311 int index;
1312 PyObject* element;
1313 if (!PyArg_ParseTuple(args, "iO!:insert", &index,
1314 &Element_Type, &element))
1315 return NULL;
1316
1317 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001318 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001319
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001320 if (index < 0) {
1321 index += self->extra->length;
1322 if (index < 0)
1323 index = 0;
1324 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001325 if (index > self->extra->length)
1326 index = self->extra->length;
1327
1328 if (element_resize(self, 1) < 0)
1329 return NULL;
1330
1331 for (i = self->extra->length; i > index; i--)
1332 self->extra->children[i] = self->extra->children[i-1];
1333
1334 Py_INCREF(element);
1335 self->extra->children[index] = element;
1336
1337 self->extra->length++;
1338
1339 Py_RETURN_NONE;
1340}
1341
1342static PyObject*
1343element_items(ElementObject* self, PyObject* args)
1344{
1345 if (!PyArg_ParseTuple(args, ":items"))
1346 return NULL;
1347
1348 if (!self->extra || self->extra->attrib == Py_None)
1349 return PyList_New(0);
1350
1351 return PyDict_Items(self->extra->attrib);
1352}
1353
1354static PyObject*
1355element_keys(ElementObject* self, PyObject* args)
1356{
1357 if (!PyArg_ParseTuple(args, ":keys"))
1358 return NULL;
1359
1360 if (!self->extra || self->extra->attrib == Py_None)
1361 return PyList_New(0);
1362
1363 return PyDict_Keys(self->extra->attrib);
1364}
1365
Martin v. Löwis18e16552006-02-15 17:27:45 +00001366static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001367element_length(ElementObject* self)
1368{
1369 if (!self->extra)
1370 return 0;
1371
1372 return self->extra->length;
1373}
1374
1375static PyObject*
1376element_makeelement(PyObject* self, PyObject* args, PyObject* kw)
1377{
1378 PyObject* elem;
1379
1380 PyObject* tag;
1381 PyObject* attrib;
1382 if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib))
1383 return NULL;
1384
1385 attrib = PyDict_Copy(attrib);
1386 if (!attrib)
1387 return NULL;
1388
Eli Bendersky092af1f2012-03-04 07:14:03 +02001389 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001390
1391 Py_DECREF(attrib);
1392
1393 return elem;
1394}
1395
1396static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001397element_remove(ElementObject* self, PyObject* args)
1398{
1399 int i;
1400
1401 PyObject* element;
1402 if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element))
1403 return NULL;
1404
1405 if (!self->extra) {
1406 /* element has no children, so raise exception */
1407 PyErr_SetString(
1408 PyExc_ValueError,
1409 "list.remove(x): x not in list"
1410 );
1411 return NULL;
1412 }
1413
1414 for (i = 0; i < self->extra->length; i++) {
1415 if (self->extra->children[i] == element)
1416 break;
Mark Dickinson211c6252009-02-01 10:28:51 +00001417 if (PyObject_RichCompareBool(self->extra->children[i], element, Py_EQ) == 1)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001418 break;
1419 }
1420
1421 if (i == self->extra->length) {
1422 /* element is not in children, so raise exception */
1423 PyErr_SetString(
1424 PyExc_ValueError,
1425 "list.remove(x): x not in list"
1426 );
1427 return NULL;
1428 }
1429
1430 Py_DECREF(self->extra->children[i]);
1431
1432 self->extra->length--;
1433
1434 for (; i < self->extra->length; i++)
1435 self->extra->children[i] = self->extra->children[i+1];
1436
1437 Py_RETURN_NONE;
1438}
1439
1440static PyObject*
1441element_repr(ElementObject* self)
1442{
Eli Bendersky092af1f2012-03-04 07:14:03 +02001443 if (self->tag)
1444 return PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1445 else
1446 return PyUnicode_FromFormat("<Element at %p>", self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001447}
1448
1449static PyObject*
1450element_set(ElementObject* self, PyObject* args)
1451{
1452 PyObject* attrib;
1453
1454 PyObject* key;
1455 PyObject* value;
1456 if (!PyArg_ParseTuple(args, "OO:set", &key, &value))
1457 return NULL;
1458
1459 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001460 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001461
1462 attrib = element_get_attrib(self);
1463 if (!attrib)
1464 return NULL;
1465
1466 if (PyDict_SetItem(attrib, key, value) < 0)
1467 return NULL;
1468
1469 Py_RETURN_NONE;
1470}
1471
1472static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001473element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001474{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001475 ElementObject* self = (ElementObject*) self_;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001476 int i;
1477 PyObject* old;
1478
1479 if (!self->extra || index < 0 || index >= self->extra->length) {
1480 PyErr_SetString(
1481 PyExc_IndexError,
1482 "child assignment index out of range");
1483 return -1;
1484 }
1485
1486 old = self->extra->children[index];
1487
1488 if (item) {
1489 Py_INCREF(item);
1490 self->extra->children[index] = item;
1491 } else {
1492 self->extra->length--;
1493 for (i = index; i < self->extra->length; i++)
1494 self->extra->children[i] = self->extra->children[i+1];
1495 }
1496
1497 Py_DECREF(old);
1498
1499 return 0;
1500}
1501
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001502static PyObject*
1503element_subscr(PyObject* self_, PyObject* item)
1504{
1505 ElementObject* self = (ElementObject*) self_;
1506
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001507 if (PyIndex_Check(item)) {
1508 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001509
1510 if (i == -1 && PyErr_Occurred()) {
1511 return NULL;
1512 }
1513 if (i < 0 && self->extra)
1514 i += self->extra->length;
1515 return element_getitem(self_, i);
1516 }
1517 else if (PySlice_Check(item)) {
1518 Py_ssize_t start, stop, step, slicelen, cur, i;
1519 PyObject* list;
1520
1521 if (!self->extra)
1522 return PyList_New(0);
1523
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001524 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001525 self->extra->length,
1526 &start, &stop, &step, &slicelen) < 0) {
1527 return NULL;
1528 }
1529
1530 if (slicelen <= 0)
1531 return PyList_New(0);
1532 else {
1533 list = PyList_New(slicelen);
1534 if (!list)
1535 return NULL;
1536
1537 for (cur = start, i = 0; i < slicelen;
1538 cur += step, i++) {
1539 PyObject* item = self->extra->children[cur];
1540 Py_INCREF(item);
1541 PyList_SET_ITEM(list, i, item);
1542 }
1543
1544 return list;
1545 }
1546 }
1547 else {
1548 PyErr_SetString(PyExc_TypeError,
1549 "element indices must be integers");
1550 return NULL;
1551 }
1552}
1553
1554static int
1555element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1556{
1557 ElementObject* self = (ElementObject*) self_;
1558
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001559 if (PyIndex_Check(item)) {
1560 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001561
1562 if (i == -1 && PyErr_Occurred()) {
1563 return -1;
1564 }
1565 if (i < 0 && self->extra)
1566 i += self->extra->length;
1567 return element_setitem(self_, i, value);
1568 }
1569 else if (PySlice_Check(item)) {
1570 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1571
1572 PyObject* recycle = NULL;
1573 PyObject* seq = NULL;
1574
1575 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001576 create_extra(self, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001577
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001578 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001579 self->extra->length,
1580 &start, &stop, &step, &slicelen) < 0) {
1581 return -1;
1582 }
1583
Eli Bendersky865756a2012-03-09 13:38:15 +02001584 if (value == NULL) {
1585 /* Delete slice */
1586 size_t cur;
1587 Py_ssize_t i;
1588
1589 if (slicelen <= 0)
1590 return 0;
1591
1592 /* Since we're deleting, the direction of the range doesn't matter,
1593 * so for simplicity make it always ascending.
1594 */
1595 if (step < 0) {
1596 stop = start + 1;
1597 start = stop + step * (slicelen - 1) - 1;
1598 step = -step;
1599 }
1600
1601 assert((size_t)slicelen <= PY_SIZE_MAX / sizeof(PyObject *));
1602
1603 /* recycle is a list that will contain all the children
1604 * scheduled for removal.
1605 */
1606 if (!(recycle = PyList_New(slicelen))) {
1607 PyErr_NoMemory();
1608 return -1;
1609 }
1610
1611 /* This loop walks over all the children that have to be deleted,
1612 * with cur pointing at them. num_moved is the amount of children
1613 * until the next deleted child that have to be "shifted down" to
1614 * occupy the deleted's places.
1615 * Note that in the ith iteration, shifting is done i+i places down
1616 * because i children were already removed.
1617 */
1618 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1619 /* Compute how many children have to be moved, clipping at the
1620 * list end.
1621 */
1622 Py_ssize_t num_moved = step - 1;
1623 if (cur + step >= (size_t)self->extra->length) {
1624 num_moved = self->extra->length - cur - 1;
1625 }
1626
1627 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1628
1629 memmove(
1630 self->extra->children + cur - i,
1631 self->extra->children + cur + 1,
1632 num_moved * sizeof(PyObject *));
1633 }
1634
1635 /* Leftover "tail" after the last removed child */
1636 cur = start + (size_t)slicelen * step;
1637 if (cur < (size_t)self->extra->length) {
1638 memmove(
1639 self->extra->children + cur - slicelen,
1640 self->extra->children + cur,
1641 (self->extra->length - cur) * sizeof(PyObject *));
1642 }
1643
1644 self->extra->length -= slicelen;
1645
1646 /* Discard the recycle list with all the deleted sub-elements */
1647 Py_XDECREF(recycle);
1648 return 0;
1649 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001650 else {
Eli Bendersky865756a2012-03-09 13:38:15 +02001651 /* A new slice is actually being assigned */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001652 seq = PySequence_Fast(value, "");
1653 if (!seq) {
1654 PyErr_Format(
1655 PyExc_TypeError,
1656 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1657 );
1658 return -1;
1659 }
1660 newlen = PySequence_Size(seq);
1661 }
1662
1663 if (step != 1 && newlen != slicelen)
1664 {
1665 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001666 "attempt to assign sequence of size %zd "
1667 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001668 newlen, slicelen
1669 );
1670 return -1;
1671 }
1672
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001673 /* Resize before creating the recycle bin, to prevent refleaks. */
1674 if (newlen > slicelen) {
1675 if (element_resize(self, newlen - slicelen) < 0) {
1676 if (seq) {
1677 Py_DECREF(seq);
1678 }
1679 return -1;
1680 }
1681 }
1682
1683 if (slicelen > 0) {
1684 /* to avoid recursive calls to this method (via decref), move
1685 old items to the recycle bin here, and get rid of them when
1686 we're done modifying the element */
1687 recycle = PyList_New(slicelen);
1688 if (!recycle) {
1689 if (seq) {
1690 Py_DECREF(seq);
1691 }
1692 return -1;
1693 }
1694 for (cur = start, i = 0; i < slicelen;
1695 cur += step, i++)
1696 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1697 }
1698
1699 if (newlen < slicelen) {
1700 /* delete slice */
1701 for (i = stop; i < self->extra->length; i++)
1702 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1703 } else if (newlen > slicelen) {
1704 /* insert slice */
1705 for (i = self->extra->length-1; i >= stop; i--)
1706 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1707 }
1708
1709 /* replace the slice */
1710 for (cur = start, i = 0; i < newlen;
1711 cur += step, i++) {
1712 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1713 Py_INCREF(element);
1714 self->extra->children[cur] = element;
1715 }
1716
1717 self->extra->length += newlen - slicelen;
1718
1719 if (seq) {
1720 Py_DECREF(seq);
1721 }
1722
1723 /* discard the recycle bin, and everything in it */
1724 Py_XDECREF(recycle);
1725
1726 return 0;
1727 }
1728 else {
1729 PyErr_SetString(PyExc_TypeError,
1730 "element indices must be integers");
1731 return -1;
1732 }
1733}
1734
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001735static PyMethodDef element_methods[] = {
1736
Eli Bendersky0192ba32012-03-30 16:38:33 +03001737 {"clear", (PyCFunction) element_clearmethod, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001738
Eli Benderskya8736902013-01-05 06:26:39 -08001739 {"get", (PyCFunction) element_get, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001740 {"set", (PyCFunction) element_set, METH_VARARGS},
1741
Eli Bendersky737b1732012-05-29 06:02:56 +03001742 {"find", (PyCFunction) element_find, METH_VARARGS | METH_KEYWORDS},
1743 {"findtext", (PyCFunction) element_findtext, METH_VARARGS | METH_KEYWORDS},
1744 {"findall", (PyCFunction) element_findall, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001745
1746 {"append", (PyCFunction) element_append, METH_VARARGS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001747 {"extend", (PyCFunction) element_extend, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001748 {"insert", (PyCFunction) element_insert, METH_VARARGS},
1749 {"remove", (PyCFunction) element_remove, METH_VARARGS},
1750
Eli Benderskya8736902013-01-05 06:26:39 -08001751 {"iter", (PyCFunction) element_iter, METH_VARARGS | METH_KEYWORDS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001752 {"itertext", (PyCFunction) element_itertext, METH_VARARGS},
Eli Bendersky737b1732012-05-29 06:02:56 +03001753 {"iterfind", (PyCFunction) element_iterfind, METH_VARARGS | METH_KEYWORDS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001754
Eli Benderskya8736902013-01-05 06:26:39 -08001755 {"getiterator", (PyCFunction) element_iter, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001756 {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS},
1757
1758 {"items", (PyCFunction) element_items, METH_VARARGS},
1759 {"keys", (PyCFunction) element_keys, METH_VARARGS},
1760
1761 {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS},
1762
1763 {"__copy__", (PyCFunction) element_copy, METH_VARARGS},
1764 {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS},
Martin v. Löwisbce16662012-06-17 10:41:22 +02001765 {"__sizeof__", element_sizeof, METH_NOARGS},
Eli Bendersky698bdb22013-01-10 06:01:06 -08001766 {"__getstate__", (PyCFunction)element_getstate, METH_NOARGS},
1767 {"__setstate__", (PyCFunction)element_setstate, METH_O},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001768
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001769 {NULL, NULL}
1770};
1771
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001772static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001773element_getattro(ElementObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001774{
1775 PyObject* res;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001776 char *name = "";
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001777
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001778 if (PyUnicode_Check(nameobj))
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001779 name = _PyUnicode_AsString(nameobj);
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001780
Alexander Belopolskye239d232010-12-08 23:31:48 +00001781 if (name == NULL)
1782 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001783
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001784 /* handle common attributes first */
1785 if (strcmp(name, "tag") == 0) {
1786 res = self->tag;
1787 Py_INCREF(res);
1788 return res;
1789 } else if (strcmp(name, "text") == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001790 res = element_get_text(self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001791 Py_INCREF(res);
1792 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001793 }
1794
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001795 /* methods */
1796 res = PyObject_GenericGetAttr((PyObject*) self, nameobj);
1797 if (res)
1798 return res;
1799
1800 /* less common attributes */
1801 if (strcmp(name, "tail") == 0) {
1802 PyErr_Clear();
1803 res = element_get_tail(self);
1804 } else if (strcmp(name, "attrib") == 0) {
1805 PyErr_Clear();
1806 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001807 create_extra(self, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001808 res = element_get_attrib(self);
1809 }
1810
1811 if (!res)
1812 return NULL;
1813
1814 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001815 return res;
1816}
1817
Eli Benderskyef9683b2013-05-18 07:52:34 -07001818static int
Eli Benderskyb20df952012-05-20 06:33:29 +03001819element_setattro(ElementObject* self, PyObject* nameobj, PyObject* value)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001820{
Eli Benderskyb20df952012-05-20 06:33:29 +03001821 char *name = "";
1822 if (PyUnicode_Check(nameobj))
1823 name = _PyUnicode_AsString(nameobj);
1824
Eli Benderskyef9683b2013-05-18 07:52:34 -07001825 if (name == NULL) {
1826 return -1;
1827 } else if (strcmp(name, "tag") == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001828 Py_DECREF(self->tag);
1829 self->tag = value;
1830 Py_INCREF(self->tag);
1831 } else if (strcmp(name, "text") == 0) {
1832 Py_DECREF(JOIN_OBJ(self->text));
1833 self->text = value;
1834 Py_INCREF(self->text);
1835 } else if (strcmp(name, "tail") == 0) {
1836 Py_DECREF(JOIN_OBJ(self->tail));
1837 self->tail = value;
1838 Py_INCREF(self->tail);
1839 } else if (strcmp(name, "attrib") == 0) {
1840 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001841 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001842 Py_DECREF(self->extra->attrib);
1843 self->extra->attrib = value;
1844 Py_INCREF(self->extra->attrib);
1845 } else {
Eli Benderskyef9683b2013-05-18 07:52:34 -07001846 PyErr_SetString(PyExc_AttributeError,
Eli Bendersky6a55dc32013-05-19 16:59:59 -07001847 "Can't set arbitrary attributes on Element");
Eli Benderskyef9683b2013-05-18 07:52:34 -07001848 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001849 }
1850
Eli Benderskyef9683b2013-05-18 07:52:34 -07001851 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001852}
1853
1854static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001855 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001856 0, /* sq_concat */
1857 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001858 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001859 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001860 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001861 0,
1862};
1863
1864static PyMappingMethods element_as_mapping = {
1865 (lenfunc) element_length,
1866 (binaryfunc) element_subscr,
1867 (objobjargproc) element_ass_subscr,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001868};
1869
Neal Norwitz227b5332006-03-22 09:28:35 +00001870static PyTypeObject Element_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001871 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08001872 "xml.etree.ElementTree.Element", sizeof(ElementObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001873 /* methods */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001874 (destructor)element_dealloc, /* tp_dealloc */
1875 0, /* tp_print */
1876 0, /* tp_getattr */
Eli Benderskyb20df952012-05-20 06:33:29 +03001877 0, /* tp_setattr */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001878 0, /* tp_reserved */
1879 (reprfunc)element_repr, /* tp_repr */
1880 0, /* tp_as_number */
1881 &element_as_sequence, /* tp_as_sequence */
1882 &element_as_mapping, /* tp_as_mapping */
1883 0, /* tp_hash */
1884 0, /* tp_call */
1885 0, /* tp_str */
1886 (getattrofunc)element_getattro, /* tp_getattro */
Eli Benderskyb20df952012-05-20 06:33:29 +03001887 (setattrofunc)element_setattro, /* tp_setattro */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001888 0, /* tp_as_buffer */
Eli Bendersky0192ba32012-03-30 16:38:33 +03001889 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
1890 /* tp_flags */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001891 0, /* tp_doc */
Eli Bendersky0192ba32012-03-30 16:38:33 +03001892 (traverseproc)element_gc_traverse, /* tp_traverse */
1893 (inquiry)element_gc_clear, /* tp_clear */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001894 0, /* tp_richcompare */
Eli Benderskyebf37a22012-04-03 22:02:37 +03001895 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001896 0, /* tp_iter */
1897 0, /* tp_iternext */
1898 element_methods, /* tp_methods */
1899 0, /* tp_members */
1900 0, /* tp_getset */
1901 0, /* tp_base */
1902 0, /* tp_dict */
1903 0, /* tp_descr_get */
1904 0, /* tp_descr_set */
1905 0, /* tp_dictoffset */
1906 (initproc)element_init, /* tp_init */
1907 PyType_GenericAlloc, /* tp_alloc */
1908 element_new, /* tp_new */
1909 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001910};
1911
Eli Bendersky64d11e62012-06-15 07:42:50 +03001912/******************************* Element iterator ****************************/
1913
1914/* ElementIterObject represents the iteration state over an XML element in
1915 * pre-order traversal. To keep track of which sub-element should be returned
1916 * next, a stack of parents is maintained. This is a standard stack-based
1917 * iterative pre-order traversal of a tree.
1918 * The stack is managed using a single-linked list starting at parent_stack.
1919 * Each stack node contains the saved parent to which we should return after
1920 * the current one is exhausted, and the next child to examine in that parent.
1921 */
1922typedef struct ParentLocator_t {
1923 ElementObject *parent;
1924 Py_ssize_t child_index;
1925 struct ParentLocator_t *next;
1926} ParentLocator;
1927
1928typedef struct {
1929 PyObject_HEAD
1930 ParentLocator *parent_stack;
1931 ElementObject *root_element;
1932 PyObject *sought_tag;
1933 int root_done;
1934 int gettext;
1935} ElementIterObject;
1936
1937
1938static void
1939elementiter_dealloc(ElementIterObject *it)
1940{
1941 ParentLocator *p = it->parent_stack;
1942 while (p) {
1943 ParentLocator *temp = p;
1944 Py_XDECREF(p->parent);
1945 p = p->next;
1946 PyObject_Free(temp);
1947 }
1948
1949 Py_XDECREF(it->sought_tag);
1950 Py_XDECREF(it->root_element);
1951
1952 PyObject_GC_UnTrack(it);
1953 PyObject_GC_Del(it);
1954}
1955
1956static int
1957elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
1958{
1959 ParentLocator *p = it->parent_stack;
1960 while (p) {
1961 Py_VISIT(p->parent);
1962 p = p->next;
1963 }
1964
1965 Py_VISIT(it->root_element);
1966 Py_VISIT(it->sought_tag);
1967 return 0;
1968}
1969
1970/* Helper function for elementiter_next. Add a new parent to the parent stack.
1971 */
1972static ParentLocator *
1973parent_stack_push_new(ParentLocator *stack, ElementObject *parent)
1974{
1975 ParentLocator *new_node = PyObject_Malloc(sizeof(ParentLocator));
1976 if (new_node) {
1977 new_node->parent = parent;
1978 Py_INCREF(parent);
1979 new_node->child_index = 0;
1980 new_node->next = stack;
1981 }
1982 return new_node;
1983}
1984
1985static PyObject *
1986elementiter_next(ElementIterObject *it)
1987{
1988 /* Sub-element iterator.
Eli Bendersky45839902013-01-13 05:14:47 -08001989 *
Eli Bendersky64d11e62012-06-15 07:42:50 +03001990 * A short note on gettext: this function serves both the iter() and
1991 * itertext() methods to avoid code duplication. However, there are a few
1992 * small differences in the way these iterations work. Namely:
1993 * - itertext() only yields text from nodes that have it, and continues
1994 * iterating when a node doesn't have text (so it doesn't return any
1995 * node like iter())
1996 * - itertext() also has to handle tail, after finishing with all the
1997 * children of a node.
1998 */
Eli Bendersky113da642012-06-15 07:52:49 +03001999 ElementObject *cur_parent;
2000 Py_ssize_t child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002001
2002 while (1) {
2003 /* Handle the case reached in the beginning and end of iteration, where
2004 * the parent stack is empty. The root_done flag gives us indication
2005 * whether we've just started iterating (so root_done is 0), in which
2006 * case the root is returned. If root_done is 1 and we're here, the
2007 * iterator is exhausted.
2008 */
2009 if (!it->parent_stack->parent) {
2010 if (it->root_done) {
2011 PyErr_SetNone(PyExc_StopIteration);
2012 return NULL;
2013 } else {
2014 it->parent_stack = parent_stack_push_new(it->parent_stack,
2015 it->root_element);
2016 if (!it->parent_stack) {
2017 PyErr_NoMemory();
2018 return NULL;
2019 }
2020
2021 it->root_done = 1;
2022 if (it->sought_tag == Py_None ||
2023 PyObject_RichCompareBool(it->root_element->tag,
2024 it->sought_tag, Py_EQ) == 1) {
2025 if (it->gettext) {
Eli Benderskye6174ca2013-01-10 06:27:53 -08002026 PyObject *text = element_get_text(it->root_element);
2027 if (!text)
2028 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002029 if (PyObject_IsTrue(text)) {
2030 Py_INCREF(text);
2031 return text;
2032 }
2033 } else {
2034 Py_INCREF(it->root_element);
2035 return (PyObject *)it->root_element;
2036 }
2037 }
2038 }
2039 }
2040
2041 /* See if there are children left to traverse in the current parent. If
2042 * yes, visit the next child. If not, pop the stack and try again.
2043 */
Eli Bendersky113da642012-06-15 07:52:49 +03002044 cur_parent = it->parent_stack->parent;
2045 child_index = it->parent_stack->child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002046 if (cur_parent->extra && child_index < cur_parent->extra->length) {
2047 ElementObject *child = (ElementObject *)
2048 cur_parent->extra->children[child_index];
2049 it->parent_stack->child_index++;
2050 it->parent_stack = parent_stack_push_new(it->parent_stack,
2051 child);
2052 if (!it->parent_stack) {
2053 PyErr_NoMemory();
2054 return NULL;
2055 }
2056
2057 if (it->gettext) {
Eli Benderskye6174ca2013-01-10 06:27:53 -08002058 PyObject *text = element_get_text(child);
2059 if (!text)
2060 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002061 if (PyObject_IsTrue(text)) {
2062 Py_INCREF(text);
2063 return text;
2064 }
2065 } else if (it->sought_tag == Py_None ||
2066 PyObject_RichCompareBool(child->tag,
2067 it->sought_tag, Py_EQ) == 1) {
2068 Py_INCREF(child);
2069 return (PyObject *)child;
2070 }
2071 else
2072 continue;
2073 }
2074 else {
Eli Benderskye6174ca2013-01-10 06:27:53 -08002075 PyObject *tail;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002076 ParentLocator *next = it->parent_stack->next;
Eli Benderskye6174ca2013-01-10 06:27:53 -08002077 if (it->gettext) {
2078 tail = element_get_tail(cur_parent);
2079 if (!tail)
2080 return NULL;
2081 }
2082 else
2083 tail = Py_None;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002084 Py_XDECREF(it->parent_stack->parent);
2085 PyObject_Free(it->parent_stack);
2086 it->parent_stack = next;
2087
2088 /* Note that extra condition on it->parent_stack->parent here;
2089 * this is because itertext() is supposed to only return *inner*
2090 * text, not text following the element it began iteration with.
2091 */
2092 if (it->parent_stack->parent && PyObject_IsTrue(tail)) {
2093 Py_INCREF(tail);
2094 return tail;
2095 }
2096 }
2097 }
2098
2099 return NULL;
2100}
2101
2102
2103static PyTypeObject ElementIter_Type = {
2104 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002105 /* Using the module's name since the pure-Python implementation does not
2106 have such a type. */
Eli Bendersky64d11e62012-06-15 07:42:50 +03002107 "_elementtree._element_iterator", /* tp_name */
2108 sizeof(ElementIterObject), /* tp_basicsize */
2109 0, /* tp_itemsize */
2110 /* methods */
2111 (destructor)elementiter_dealloc, /* tp_dealloc */
2112 0, /* tp_print */
2113 0, /* tp_getattr */
2114 0, /* tp_setattr */
2115 0, /* tp_reserved */
2116 0, /* tp_repr */
2117 0, /* tp_as_number */
2118 0, /* tp_as_sequence */
2119 0, /* tp_as_mapping */
2120 0, /* tp_hash */
2121 0, /* tp_call */
2122 0, /* tp_str */
2123 0, /* tp_getattro */
2124 0, /* tp_setattro */
2125 0, /* tp_as_buffer */
2126 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2127 0, /* tp_doc */
2128 (traverseproc)elementiter_traverse, /* tp_traverse */
2129 0, /* tp_clear */
2130 0, /* tp_richcompare */
2131 0, /* tp_weaklistoffset */
2132 PyObject_SelfIter, /* tp_iter */
2133 (iternextfunc)elementiter_next, /* tp_iternext */
2134 0, /* tp_methods */
2135 0, /* tp_members */
2136 0, /* tp_getset */
2137 0, /* tp_base */
2138 0, /* tp_dict */
2139 0, /* tp_descr_get */
2140 0, /* tp_descr_set */
2141 0, /* tp_dictoffset */
2142 0, /* tp_init */
2143 0, /* tp_alloc */
2144 0, /* tp_new */
2145};
2146
2147
2148static PyObject *
2149create_elementiter(ElementObject *self, PyObject *tag, int gettext)
2150{
2151 ElementIterObject *it;
2152 PyObject *star = NULL;
2153
2154 it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
2155 if (!it)
2156 return NULL;
2157 if (!(it->parent_stack = PyObject_Malloc(sizeof(ParentLocator)))) {
2158 PyObject_GC_Del(it);
2159 return NULL;
2160 }
2161
2162 it->parent_stack->parent = NULL;
2163 it->parent_stack->child_index = 0;
2164 it->parent_stack->next = NULL;
2165
2166 if (PyUnicode_Check(tag))
2167 star = PyUnicode_FromString("*");
2168 else if (PyBytes_Check(tag))
2169 star = PyBytes_FromString("*");
2170
2171 if (star && PyObject_RichCompareBool(tag, star, Py_EQ) == 1)
2172 tag = Py_None;
2173
2174 Py_XDECREF(star);
2175 it->sought_tag = tag;
2176 it->root_done = 0;
2177 it->gettext = gettext;
2178 it->root_element = self;
2179
2180 Py_INCREF(self);
2181 Py_INCREF(tag);
2182
2183 PyObject_GC_Track(it);
2184 return (PyObject *)it;
2185}
2186
2187
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002188/* ==================================================================== */
2189/* the tree builder type */
2190
2191typedef struct {
2192 PyObject_HEAD
2193
Eli Bendersky58d548d2012-05-29 15:45:16 +03002194 PyObject *root; /* root node (first created node) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002195
Antoine Pitrouee329312012-10-04 19:53:29 +02002196 PyObject *this; /* current node */
2197 PyObject *last; /* most recently created node */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002198
Eli Bendersky58d548d2012-05-29 15:45:16 +03002199 PyObject *data; /* data collector (string or list), or NULL */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002200
Eli Bendersky58d548d2012-05-29 15:45:16 +03002201 PyObject *stack; /* element stack */
2202 Py_ssize_t index; /* current stack size (0 means empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002203
Eli Bendersky48d358b2012-05-30 17:57:50 +03002204 PyObject *element_factory;
2205
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002206 /* element tracing */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002207 PyObject *events; /* list of events, or NULL if not collecting */
2208 PyObject *start_event_obj; /* event objects (NULL to ignore) */
2209 PyObject *end_event_obj;
2210 PyObject *start_ns_event_obj;
2211 PyObject *end_ns_event_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002212} TreeBuilderObject;
2213
Christian Heimes90aa7642007-12-19 02:45:37 +00002214#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002215
2216/* -------------------------------------------------------------------- */
2217/* constructor and destructor */
2218
Eli Bendersky58d548d2012-05-29 15:45:16 +03002219static PyObject *
2220treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002221{
Eli Bendersky58d548d2012-05-29 15:45:16 +03002222 TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
2223 if (t != NULL) {
2224 t->root = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002225
Eli Bendersky58d548d2012-05-29 15:45:16 +03002226 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002227 t->this = Py_None;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002228 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002229 t->last = Py_None;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002230
Eli Bendersky58d548d2012-05-29 15:45:16 +03002231 t->data = NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002232 t->element_factory = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002233 t->stack = PyList_New(20);
2234 if (!t->stack) {
2235 Py_DECREF(t->this);
2236 Py_DECREF(t->last);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002237 Py_DECREF((PyObject *) t);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002238 return NULL;
2239 }
2240 t->index = 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002241
Eli Bendersky58d548d2012-05-29 15:45:16 +03002242 t->events = NULL;
2243 t->start_event_obj = t->end_event_obj = NULL;
2244 t->start_ns_event_obj = t->end_ns_event_obj = NULL;
2245 }
2246 return (PyObject *)t;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002247}
2248
Eli Bendersky58d548d2012-05-29 15:45:16 +03002249static int
2250treebuilder_init(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002251{
Eli Benderskyc68e1362012-06-03 06:09:42 +03002252 static char *kwlist[] = {"element_factory", 0};
Eli Bendersky48d358b2012-05-30 17:57:50 +03002253 PyObject *element_factory = NULL;
2254 TreeBuilderObject *self_tb = (TreeBuilderObject *)self;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002255 PyObject *tmp;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002256
2257 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:TreeBuilder", kwlist,
2258 &element_factory)) {
2259 return -1;
2260 }
2261
2262 if (element_factory) {
2263 Py_INCREF(element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002264 tmp = self_tb->element_factory;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002265 self_tb->element_factory = element_factory;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002266 Py_XDECREF(tmp);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002267 }
2268
Eli Bendersky58d548d2012-05-29 15:45:16 +03002269 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002270}
2271
Eli Bendersky48d358b2012-05-30 17:57:50 +03002272static int
2273treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
2274{
2275 Py_VISIT(self->root);
2276 Py_VISIT(self->this);
2277 Py_VISIT(self->last);
2278 Py_VISIT(self->data);
2279 Py_VISIT(self->stack);
2280 Py_VISIT(self->element_factory);
2281 return 0;
2282}
2283
2284static int
2285treebuilder_gc_clear(TreeBuilderObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002286{
Antoine Pitrouc1948842012-10-01 23:40:37 +02002287 Py_CLEAR(self->end_ns_event_obj);
2288 Py_CLEAR(self->start_ns_event_obj);
2289 Py_CLEAR(self->end_event_obj);
2290 Py_CLEAR(self->start_event_obj);
2291 Py_CLEAR(self->events);
2292 Py_CLEAR(self->stack);
2293 Py_CLEAR(self->data);
2294 Py_CLEAR(self->last);
2295 Py_CLEAR(self->this);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002296 Py_CLEAR(self->element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002297 Py_CLEAR(self->root);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002298 return 0;
2299}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002300
Eli Bendersky48d358b2012-05-30 17:57:50 +03002301static void
2302treebuilder_dealloc(TreeBuilderObject *self)
2303{
2304 PyObject_GC_UnTrack(self);
2305 treebuilder_gc_clear(self);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002306 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002307}
2308
2309/* -------------------------------------------------------------------- */
Antoine Pitrouee329312012-10-04 19:53:29 +02002310/* helpers for handling of arbitrary element-like objects */
2311
2312static int
2313treebuilder_set_element_text_or_tail(PyObject *element, PyObject *data,
2314 PyObject **dest, _Py_Identifier *name)
2315{
2316 if (Element_CheckExact(element)) {
2317 Py_DECREF(JOIN_OBJ(*dest));
2318 *dest = JOIN_SET(data, PyList_CheckExact(data));
2319 return 0;
2320 }
2321 else {
2322 PyObject *joined = list_join(data);
2323 int r;
2324 if (joined == NULL)
2325 return -1;
2326 r = _PyObject_SetAttrId(element, name, joined);
2327 Py_DECREF(joined);
2328 return r;
2329 }
2330}
2331
2332/* These two functions steal a reference to data */
2333static int
2334treebuilder_set_element_text(PyObject *element, PyObject *data)
2335{
2336 _Py_IDENTIFIER(text);
2337 return treebuilder_set_element_text_or_tail(
2338 element, data, &((ElementObject *) element)->text, &PyId_text);
2339}
2340
2341static int
2342treebuilder_set_element_tail(PyObject *element, PyObject *data)
2343{
2344 _Py_IDENTIFIER(tail);
2345 return treebuilder_set_element_text_or_tail(
2346 element, data, &((ElementObject *) element)->tail, &PyId_tail);
2347}
2348
2349static int
2350treebuilder_add_subelement(PyObject *element, PyObject *child)
2351{
2352 _Py_IDENTIFIER(append);
2353 if (Element_CheckExact(element)) {
2354 ElementObject *elem = (ElementObject *) element;
2355 return element_add_subelement(elem, child);
2356 }
2357 else {
2358 PyObject *res;
2359 res = _PyObject_CallMethodId(element, &PyId_append, "O", child);
2360 if (res == NULL)
2361 return -1;
2362 Py_DECREF(res);
2363 return 0;
2364 }
2365}
2366
2367/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002368/* handlers */
2369
2370LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002371treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
2372 PyObject* attrib)
2373{
2374 PyObject* node;
2375 PyObject* this;
2376
2377 if (self->data) {
2378 if (self->this == self->last) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002379 if (treebuilder_set_element_text(self->last, self->data))
2380 return NULL;
2381 }
2382 else {
2383 if (treebuilder_set_element_tail(self->last, self->data))
2384 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002385 }
2386 self->data = NULL;
2387 }
2388
Eli Bendersky08231a92013-05-18 15:47:16 -07002389 if (self->element_factory && self->element_factory != Py_None) {
Eli Bendersky48d358b2012-05-30 17:57:50 +03002390 node = PyObject_CallFunction(self->element_factory, "OO", tag, attrib);
2391 } else {
2392 node = create_new_element(tag, attrib);
2393 }
2394 if (!node) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002395 return NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002396 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002397
Antoine Pitrouee329312012-10-04 19:53:29 +02002398 this = self->this;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002399
2400 if (this != Py_None) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002401 if (treebuilder_add_subelement(this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002402 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002403 } else {
2404 if (self->root) {
2405 PyErr_SetString(
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002406 elementtree_parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002407 "multiple elements on top level"
2408 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002409 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002410 }
2411 Py_INCREF(node);
2412 self->root = node;
2413 }
2414
2415 if (self->index < PyList_GET_SIZE(self->stack)) {
2416 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002417 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002418 Py_INCREF(this);
2419 } else {
2420 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002421 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002422 }
2423 self->index++;
2424
2425 Py_DECREF(this);
2426 Py_INCREF(node);
Antoine Pitrouee329312012-10-04 19:53:29 +02002427 self->this = node;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002428
2429 Py_DECREF(self->last);
2430 Py_INCREF(node);
Antoine Pitrouee329312012-10-04 19:53:29 +02002431 self->last = node;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002432
2433 if (self->start_event_obj) {
2434 PyObject* res;
2435 PyObject* action = self->start_event_obj;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002436 res = PyTuple_Pack(2, action, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002437 if (res) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002438 PyList_Append(self->events, res);
2439 Py_DECREF(res);
2440 } else
2441 PyErr_Clear(); /* FIXME: propagate error */
2442 }
2443
2444 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002445
2446 error:
2447 Py_DECREF(node);
2448 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002449}
2450
2451LOCAL(PyObject*)
2452treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
2453{
2454 if (!self->data) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002455 if (self->last == Py_None) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00002456 /* ignore calls to data before the first call to start */
2457 Py_RETURN_NONE;
2458 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002459 /* store the first item as is */
2460 Py_INCREF(data); self->data = data;
2461 } else {
2462 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00002463 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
2464 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002465 /* XXX this code path unused in Python 3? */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002466 /* expat often generates single character data sections; handle
2467 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00002468 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
2469 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002470 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00002471 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002472 } else if (PyList_CheckExact(self->data)) {
2473 if (PyList_Append(self->data, data) < 0)
2474 return NULL;
2475 } else {
2476 PyObject* list = PyList_New(2);
2477 if (!list)
2478 return NULL;
2479 PyList_SET_ITEM(list, 0, self->data);
2480 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2481 self->data = list;
2482 }
2483 }
2484
2485 Py_RETURN_NONE;
2486}
2487
2488LOCAL(PyObject*)
2489treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2490{
2491 PyObject* item;
2492
2493 if (self->data) {
2494 if (self->this == self->last) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002495 if (treebuilder_set_element_text(self->last, self->data))
2496 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002497 } else {
Antoine Pitrouee329312012-10-04 19:53:29 +02002498 if (treebuilder_set_element_tail(self->last, self->data))
2499 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002500 }
2501 self->data = NULL;
2502 }
2503
2504 if (self->index == 0) {
2505 PyErr_SetString(
2506 PyExc_IndexError,
2507 "pop from empty stack"
2508 );
2509 return NULL;
2510 }
2511
2512 self->index--;
2513
2514 item = PyList_GET_ITEM(self->stack, self->index);
2515 Py_INCREF(item);
2516
2517 Py_DECREF(self->last);
2518
Antoine Pitrouee329312012-10-04 19:53:29 +02002519 self->last = self->this;
2520 self->this = item;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002521
2522 if (self->end_event_obj) {
2523 PyObject* res;
2524 PyObject* action = self->end_event_obj;
2525 PyObject* node = (PyObject*) self->last;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002526 res = PyTuple_Pack(2, action, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002527 if (res) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002528 PyList_Append(self->events, res);
2529 Py_DECREF(res);
2530 } else
2531 PyErr_Clear(); /* FIXME: propagate error */
2532 }
2533
2534 Py_INCREF(self->last);
2535 return (PyObject*) self->last;
2536}
2537
2538LOCAL(void)
2539treebuilder_handle_namespace(TreeBuilderObject* self, int start,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002540 PyObject *prefix, PyObject *uri)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002541{
2542 PyObject* res;
2543 PyObject* action;
2544 PyObject* parcel;
2545
2546 if (!self->events)
2547 return;
2548
2549 if (start) {
2550 if (!self->start_ns_event_obj)
2551 return;
2552 action = self->start_ns_event_obj;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002553 parcel = Py_BuildValue("OO", prefix, uri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002554 if (!parcel)
2555 return;
2556 Py_INCREF(action);
2557 } else {
2558 if (!self->end_ns_event_obj)
2559 return;
2560 action = self->end_ns_event_obj;
2561 Py_INCREF(action);
2562 parcel = Py_None;
2563 Py_INCREF(parcel);
2564 }
2565
2566 res = PyTuple_New(2);
2567
2568 if (res) {
2569 PyTuple_SET_ITEM(res, 0, action);
2570 PyTuple_SET_ITEM(res, 1, parcel);
2571 PyList_Append(self->events, res);
2572 Py_DECREF(res);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002573 }
2574 else {
2575 Py_DECREF(action);
2576 Py_DECREF(parcel);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002577 PyErr_Clear(); /* FIXME: propagate error */
Antoine Pitrouc1948842012-10-01 23:40:37 +02002578 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002579}
2580
2581/* -------------------------------------------------------------------- */
2582/* methods (in alphabetical order) */
2583
2584static PyObject*
2585treebuilder_data(TreeBuilderObject* self, PyObject* args)
2586{
2587 PyObject* data;
2588 if (!PyArg_ParseTuple(args, "O:data", &data))
2589 return NULL;
2590
2591 return treebuilder_handle_data(self, data);
2592}
2593
2594static PyObject*
2595treebuilder_end(TreeBuilderObject* self, PyObject* args)
2596{
2597 PyObject* tag;
2598 if (!PyArg_ParseTuple(args, "O:end", &tag))
2599 return NULL;
2600
2601 return treebuilder_handle_end(self, tag);
2602}
2603
2604LOCAL(PyObject*)
2605treebuilder_done(TreeBuilderObject* self)
2606{
2607 PyObject* res;
2608
2609 /* FIXME: check stack size? */
2610
2611 if (self->root)
2612 res = self->root;
2613 else
2614 res = Py_None;
2615
2616 Py_INCREF(res);
2617 return res;
2618}
2619
2620static PyObject*
2621treebuilder_close(TreeBuilderObject* self, PyObject* args)
2622{
2623 if (!PyArg_ParseTuple(args, ":close"))
2624 return NULL;
2625
2626 return treebuilder_done(self);
2627}
2628
2629static PyObject*
2630treebuilder_start(TreeBuilderObject* self, PyObject* args)
2631{
2632 PyObject* tag;
2633 PyObject* attrib = Py_None;
2634 if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib))
2635 return NULL;
2636
2637 return treebuilder_handle_start(self, tag, attrib);
2638}
2639
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002640static PyMethodDef treebuilder_methods[] = {
2641 {"data", (PyCFunction) treebuilder_data, METH_VARARGS},
2642 {"start", (PyCFunction) treebuilder_start, METH_VARARGS},
2643 {"end", (PyCFunction) treebuilder_end, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002644 {"close", (PyCFunction) treebuilder_close, METH_VARARGS},
2645 {NULL, NULL}
2646};
2647
Neal Norwitz227b5332006-03-22 09:28:35 +00002648static PyTypeObject TreeBuilder_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002649 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002650 "xml.etree.ElementTree.TreeBuilder", sizeof(TreeBuilderObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002651 /* methods */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002652 (destructor)treebuilder_dealloc, /* tp_dealloc */
2653 0, /* tp_print */
2654 0, /* tp_getattr */
2655 0, /* tp_setattr */
2656 0, /* tp_reserved */
2657 0, /* tp_repr */
2658 0, /* tp_as_number */
2659 0, /* tp_as_sequence */
2660 0, /* tp_as_mapping */
2661 0, /* tp_hash */
2662 0, /* tp_call */
2663 0, /* tp_str */
2664 0, /* tp_getattro */
2665 0, /* tp_setattro */
2666 0, /* tp_as_buffer */
Eli Bendersky48d358b2012-05-30 17:57:50 +03002667 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
2668 /* tp_flags */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002669 0, /* tp_doc */
Eli Bendersky48d358b2012-05-30 17:57:50 +03002670 (traverseproc)treebuilder_gc_traverse, /* tp_traverse */
2671 (inquiry)treebuilder_gc_clear, /* tp_clear */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002672 0, /* tp_richcompare */
2673 0, /* tp_weaklistoffset */
2674 0, /* tp_iter */
2675 0, /* tp_iternext */
2676 treebuilder_methods, /* tp_methods */
2677 0, /* tp_members */
2678 0, /* tp_getset */
2679 0, /* tp_base */
2680 0, /* tp_dict */
2681 0, /* tp_descr_get */
2682 0, /* tp_descr_set */
2683 0, /* tp_dictoffset */
2684 (initproc)treebuilder_init, /* tp_init */
2685 PyType_GenericAlloc, /* tp_alloc */
2686 treebuilder_new, /* tp_new */
2687 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002688};
2689
2690/* ==================================================================== */
2691/* the expat interface */
2692
2693#if defined(USE_EXPAT)
2694
2695#include "expat.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002696#include "pyexpat.h"
Eli Bendersky20d41742012-06-01 09:48:37 +03002697static struct PyExpat_CAPI *expat_capi;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002698#define EXPAT(func) (expat_capi->func)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002699
Eli Bendersky52467b12012-06-01 07:13:08 +03002700static XML_Memory_Handling_Suite ExpatMemoryHandler = {
2701 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
2702
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002703typedef struct {
2704 PyObject_HEAD
2705
2706 XML_Parser parser;
2707
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002708 PyObject *target;
2709 PyObject *entity;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002710
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002711 PyObject *names;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002712
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002713 PyObject *handle_start;
2714 PyObject *handle_data;
2715 PyObject *handle_end;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002716
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002717 PyObject *handle_comment;
2718 PyObject *handle_pi;
2719 PyObject *handle_doctype;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002720
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002721 PyObject *handle_close;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002722
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002723} XMLParserObject;
2724
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002725#define XMLParser_CheckExact(op) (Py_TYPE(op) == &XMLParser_Type)
2726
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002727/* helpers */
2728
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002729LOCAL(PyObject*)
2730makeuniversal(XMLParserObject* self, const char* string)
2731{
2732 /* convert a UTF-8 tag/attribute name from the expat parser
2733 to a universal name string */
2734
Antoine Pitrouc1948842012-10-01 23:40:37 +02002735 Py_ssize_t size = (Py_ssize_t) strlen(string);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002736 PyObject* key;
2737 PyObject* value;
2738
2739 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00002740 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002741 if (!key)
2742 return NULL;
2743
2744 value = PyDict_GetItem(self->names, key);
2745
2746 if (value) {
2747 Py_INCREF(value);
2748 } else {
2749 /* new name. convert to universal name, and decode as
2750 necessary */
2751
2752 PyObject* tag;
2753 char* p;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002754 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002755
2756 /* look for namespace separator */
2757 for (i = 0; i < size; i++)
2758 if (string[i] == '}')
2759 break;
2760 if (i != size) {
2761 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002762 tag = PyBytes_FromStringAndSize(NULL, size+1);
2763 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002764 p[0] = '{';
2765 memcpy(p+1, string, size);
2766 size++;
2767 } else {
2768 /* plain name; use key as tag */
2769 Py_INCREF(key);
2770 tag = key;
2771 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002772
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002773 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002774 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00002775 value = PyUnicode_DecodeUTF8(p, size, "strict");
2776 Py_DECREF(tag);
2777 if (!value) {
2778 Py_DECREF(key);
2779 return NULL;
2780 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002781
2782 /* add to names dictionary */
2783 if (PyDict_SetItem(self->names, key, value) < 0) {
2784 Py_DECREF(key);
2785 Py_DECREF(value);
2786 return NULL;
2787 }
2788 }
2789
2790 Py_DECREF(key);
2791 return value;
2792}
2793
Eli Bendersky5b77d812012-03-16 08:20:05 +02002794/* Set the ParseError exception with the given parameters.
2795 * If message is not NULL, it's used as the error string. Otherwise, the
2796 * message string is the default for the given error_code.
2797*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002798static void
Eli Bendersky5b77d812012-03-16 08:20:05 +02002799expat_set_error(enum XML_Error error_code, int line, int column, char *message)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002800{
Eli Bendersky5b77d812012-03-16 08:20:05 +02002801 PyObject *errmsg, *error, *position, *code;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002802
Victor Stinner499dfcf2011-03-21 13:26:24 +01002803 errmsg = PyUnicode_FromFormat("%s: line %d, column %d",
Eli Bendersky5b77d812012-03-16 08:20:05 +02002804 message ? message : EXPAT(ErrorString)(error_code),
2805 line, column);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002806 if (errmsg == NULL)
2807 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002808
Victor Stinner499dfcf2011-03-21 13:26:24 +01002809 error = PyObject_CallFunction(elementtree_parseerror_obj, "O", errmsg);
2810 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002811 if (!error)
2812 return;
2813
Eli Bendersky5b77d812012-03-16 08:20:05 +02002814 /* Add code and position attributes */
2815 code = PyLong_FromLong((long)error_code);
2816 if (!code) {
2817 Py_DECREF(error);
2818 return;
2819 }
2820 if (PyObject_SetAttrString(error, "code", code) == -1) {
2821 Py_DECREF(error);
2822 Py_DECREF(code);
2823 return;
2824 }
2825 Py_DECREF(code);
2826
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002827 position = Py_BuildValue("(ii)", line, column);
2828 if (!position) {
2829 Py_DECREF(error);
2830 return;
2831 }
2832 if (PyObject_SetAttrString(error, "position", position) == -1) {
2833 Py_DECREF(error);
2834 Py_DECREF(position);
2835 return;
2836 }
2837 Py_DECREF(position);
2838
2839 PyErr_SetObject(elementtree_parseerror_obj, error);
2840 Py_DECREF(error);
2841}
2842
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002843/* -------------------------------------------------------------------- */
2844/* handlers */
2845
2846static void
2847expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2848 int data_len)
2849{
2850 PyObject* key;
2851 PyObject* value;
2852 PyObject* res;
2853
2854 if (data_len < 2 || data_in[0] != '&')
2855 return;
2856
Neal Norwitz0269b912007-08-08 06:56:02 +00002857 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002858 if (!key)
2859 return;
2860
2861 value = PyDict_GetItem(self->entity, key);
2862
2863 if (value) {
2864 if (TreeBuilder_CheckExact(self->target))
2865 res = treebuilder_handle_data(
2866 (TreeBuilderObject*) self->target, value
2867 );
2868 else if (self->handle_data)
2869 res = PyObject_CallFunction(self->handle_data, "O", value);
2870 else
2871 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002872 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002873 } else if (!PyErr_Occurred()) {
2874 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00002875 char message[128] = "undefined entity ";
2876 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002877 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02002878 XML_ERROR_UNDEFINED_ENTITY,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002879 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02002880 EXPAT(GetErrorColumnNumber)(self->parser),
2881 message
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002882 );
2883 }
2884
2885 Py_DECREF(key);
2886}
2887
2888static void
2889expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2890 const XML_Char **attrib_in)
2891{
2892 PyObject* res;
2893 PyObject* tag;
2894 PyObject* attrib;
2895 int ok;
2896
2897 /* tag name */
2898 tag = makeuniversal(self, tag_in);
2899 if (!tag)
2900 return; /* parser will look for errors */
2901
2902 /* attributes */
2903 if (attrib_in[0]) {
2904 attrib = PyDict_New();
2905 if (!attrib)
2906 return;
2907 while (attrib_in[0] && attrib_in[1]) {
2908 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00002909 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002910 if (!key || !value) {
2911 Py_XDECREF(value);
2912 Py_XDECREF(key);
2913 Py_DECREF(attrib);
2914 return;
2915 }
2916 ok = PyDict_SetItem(attrib, key, value);
2917 Py_DECREF(value);
2918 Py_DECREF(key);
2919 if (ok < 0) {
2920 Py_DECREF(attrib);
2921 return;
2922 }
2923 attrib_in += 2;
2924 }
2925 } else {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002926 /* Pass an empty dictionary on */
Eli Bendersky48d358b2012-05-30 17:57:50 +03002927 attrib = PyDict_New();
2928 if (!attrib)
2929 return;
2930 }
2931
2932 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002933 /* shortcut */
2934 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2935 tag, attrib);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002936 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002937 else if (self->handle_start) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002938 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002939 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002940 res = NULL;
2941
2942 Py_DECREF(tag);
2943 Py_DECREF(attrib);
2944
2945 Py_XDECREF(res);
2946}
2947
2948static void
2949expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2950 int data_len)
2951{
2952 PyObject* data;
2953 PyObject* res;
2954
Neal Norwitz0269b912007-08-08 06:56:02 +00002955 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002956 if (!data)
2957 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002958
2959 if (TreeBuilder_CheckExact(self->target))
2960 /* shortcut */
2961 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
2962 else if (self->handle_data)
2963 res = PyObject_CallFunction(self->handle_data, "O", data);
2964 else
2965 res = NULL;
2966
2967 Py_DECREF(data);
2968
2969 Py_XDECREF(res);
2970}
2971
2972static void
2973expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
2974{
2975 PyObject* tag;
2976 PyObject* res = NULL;
2977
2978 if (TreeBuilder_CheckExact(self->target))
2979 /* shortcut */
2980 /* the standard tree builder doesn't look at the end tag */
2981 res = treebuilder_handle_end(
2982 (TreeBuilderObject*) self->target, Py_None
2983 );
2984 else if (self->handle_end) {
2985 tag = makeuniversal(self, tag_in);
2986 if (tag) {
2987 res = PyObject_CallFunction(self->handle_end, "O", tag);
2988 Py_DECREF(tag);
2989 }
2990 }
2991
2992 Py_XDECREF(res);
2993}
2994
2995static void
2996expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
2997 const XML_Char *uri)
2998{
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002999 PyObject* sprefix = NULL;
3000 PyObject* suri = NULL;
3001
3002 suri = PyUnicode_DecodeUTF8(uri, strlen(uri), "strict");
3003 if (!suri)
3004 return;
3005
3006 if (prefix)
3007 sprefix = PyUnicode_DecodeUTF8(prefix, strlen(prefix), "strict");
3008 else
3009 sprefix = PyUnicode_FromString("");
3010 if (!sprefix) {
3011 Py_DECREF(suri);
3012 return;
3013 }
3014
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003015 treebuilder_handle_namespace(
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003016 (TreeBuilderObject*) self->target, 1, sprefix, suri
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003017 );
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003018
3019 Py_DECREF(sprefix);
3020 Py_DECREF(suri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003021}
3022
3023static void
3024expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
3025{
3026 treebuilder_handle_namespace(
3027 (TreeBuilderObject*) self->target, 0, NULL, NULL
3028 );
3029}
3030
3031static void
3032expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
3033{
3034 PyObject* comment;
3035 PyObject* res;
3036
3037 if (self->handle_comment) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003038 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003039 if (comment) {
3040 res = PyObject_CallFunction(self->handle_comment, "O", comment);
3041 Py_XDECREF(res);
3042 Py_DECREF(comment);
3043 }
3044 }
3045}
3046
Eli Bendersky45839902013-01-13 05:14:47 -08003047static void
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003048expat_start_doctype_handler(XMLParserObject *self,
3049 const XML_Char *doctype_name,
3050 const XML_Char *sysid,
3051 const XML_Char *pubid,
3052 int has_internal_subset)
3053{
3054 PyObject *self_pyobj = (PyObject *)self;
3055 PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
3056 PyObject *parser_doctype = NULL;
3057 PyObject *res = NULL;
3058
3059 doctype_name_obj = makeuniversal(self, doctype_name);
3060 if (!doctype_name_obj)
3061 return;
3062
3063 if (sysid) {
3064 sysid_obj = makeuniversal(self, sysid);
3065 if (!sysid_obj) {
3066 Py_DECREF(doctype_name_obj);
3067 return;
3068 }
3069 } else {
3070 Py_INCREF(Py_None);
3071 sysid_obj = Py_None;
3072 }
3073
3074 if (pubid) {
3075 pubid_obj = makeuniversal(self, pubid);
3076 if (!pubid_obj) {
3077 Py_DECREF(doctype_name_obj);
3078 Py_DECREF(sysid_obj);
3079 return;
3080 }
3081 } else {
3082 Py_INCREF(Py_None);
3083 pubid_obj = Py_None;
3084 }
3085
3086 /* If the target has a handler for doctype, call it. */
3087 if (self->handle_doctype) {
3088 res = PyObject_CallFunction(self->handle_doctype, "OOO",
3089 doctype_name_obj, pubid_obj, sysid_obj);
3090 Py_CLEAR(res);
3091 }
3092
3093 /* Now see if the parser itself has a doctype method. If yes and it's
3094 * a subclass, call it but warn about deprecation. If it's not a subclass
3095 * (i.e. vanilla XMLParser), do nothing.
3096 */
3097 parser_doctype = PyObject_GetAttrString(self_pyobj, "doctype");
3098 if (parser_doctype) {
3099 if (!XMLParser_CheckExact(self_pyobj)) {
3100 if (PyErr_WarnEx(PyExc_DeprecationWarning,
3101 "This method of XMLParser is deprecated. Define"
3102 " doctype() method on the TreeBuilder target.",
3103 1) < 0) {
3104 goto clear;
3105 }
3106 res = PyObject_CallFunction(parser_doctype, "OOO",
3107 doctype_name_obj, pubid_obj, sysid_obj);
3108 Py_CLEAR(res);
3109 }
3110 }
3111
3112clear:
3113 Py_XDECREF(parser_doctype);
3114 Py_DECREF(doctype_name_obj);
3115 Py_DECREF(pubid_obj);
3116 Py_DECREF(sysid_obj);
3117}
3118
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003119static void
3120expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
3121 const XML_Char* data_in)
3122{
3123 PyObject* target;
3124 PyObject* data;
3125 PyObject* res;
3126
3127 if (self->handle_pi) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003128 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3129 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003130 if (target && data) {
3131 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
3132 Py_XDECREF(res);
3133 Py_DECREF(data);
3134 Py_DECREF(target);
3135 } else {
3136 Py_XDECREF(data);
3137 Py_XDECREF(target);
3138 }
3139 }
3140}
3141
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003142/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003143
Eli Bendersky52467b12012-06-01 07:13:08 +03003144static PyObject *
3145xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003146{
Eli Bendersky52467b12012-06-01 07:13:08 +03003147 XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
3148 if (self) {
3149 self->parser = NULL;
3150 self->target = self->entity = self->names = NULL;
3151 self->handle_start = self->handle_data = self->handle_end = NULL;
3152 self->handle_comment = self->handle_pi = self->handle_close = NULL;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003153 self->handle_doctype = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003154 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003155 return (PyObject *)self;
3156}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003157
Eli Bendersky52467b12012-06-01 07:13:08 +03003158static int
3159xmlparser_init(PyObject *self, PyObject *args, PyObject *kwds)
3160{
3161 XMLParserObject *self_xp = (XMLParserObject *)self;
3162 PyObject *target = NULL, *html = NULL;
3163 char *encoding = NULL;
Eli Benderskyc68e1362012-06-03 06:09:42 +03003164 static char *kwlist[] = {"html", "target", "encoding", 0};
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003165
Eli Bendersky52467b12012-06-01 07:13:08 +03003166 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|OOz:XMLParser", kwlist,
3167 &html, &target, &encoding)) {
3168 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003169 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003170
Eli Bendersky52467b12012-06-01 07:13:08 +03003171 self_xp->entity = PyDict_New();
3172 if (!self_xp->entity)
3173 return -1;
3174
3175 self_xp->names = PyDict_New();
3176 if (!self_xp->names) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02003177 Py_CLEAR(self_xp->entity);
Eli Bendersky52467b12012-06-01 07:13:08 +03003178 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003179 }
3180
Eli Bendersky52467b12012-06-01 07:13:08 +03003181 self_xp->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
3182 if (!self_xp->parser) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02003183 Py_CLEAR(self_xp->entity);
3184 Py_CLEAR(self_xp->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003185 PyErr_NoMemory();
Eli Bendersky52467b12012-06-01 07:13:08 +03003186 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003187 }
3188
Eli Bendersky52467b12012-06-01 07:13:08 +03003189 if (target) {
3190 Py_INCREF(target);
3191 } else {
Eli Bendersky58d548d2012-05-29 15:45:16 +03003192 target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003193 if (!target) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02003194 Py_CLEAR(self_xp->entity);
3195 Py_CLEAR(self_xp->names);
Eli Bendersky52467b12012-06-01 07:13:08 +03003196 EXPAT(ParserFree)(self_xp->parser);
3197 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003198 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003199 }
3200 self_xp->target = target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003201
Eli Bendersky52467b12012-06-01 07:13:08 +03003202 self_xp->handle_start = PyObject_GetAttrString(target, "start");
3203 self_xp->handle_data = PyObject_GetAttrString(target, "data");
3204 self_xp->handle_end = PyObject_GetAttrString(target, "end");
3205 self_xp->handle_comment = PyObject_GetAttrString(target, "comment");
3206 self_xp->handle_pi = PyObject_GetAttrString(target, "pi");
3207 self_xp->handle_close = PyObject_GetAttrString(target, "close");
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003208 self_xp->handle_doctype = PyObject_GetAttrString(target, "doctype");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003209
3210 PyErr_Clear();
Eli Bendersky45839902013-01-13 05:14:47 -08003211
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003212 /* configure parser */
Eli Bendersky52467b12012-06-01 07:13:08 +03003213 EXPAT(SetUserData)(self_xp->parser, self_xp);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003214 EXPAT(SetElementHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003215 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003216 (XML_StartElementHandler) expat_start_handler,
3217 (XML_EndElementHandler) expat_end_handler
3218 );
3219 EXPAT(SetDefaultHandlerExpand)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003220 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003221 (XML_DefaultHandler) expat_default_handler
3222 );
3223 EXPAT(SetCharacterDataHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003224 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003225 (XML_CharacterDataHandler) expat_data_handler
3226 );
Eli Bendersky52467b12012-06-01 07:13:08 +03003227 if (self_xp->handle_comment)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003228 EXPAT(SetCommentHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003229 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003230 (XML_CommentHandler) expat_comment_handler
3231 );
Eli Bendersky52467b12012-06-01 07:13:08 +03003232 if (self_xp->handle_pi)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003233 EXPAT(SetProcessingInstructionHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003234 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003235 (XML_ProcessingInstructionHandler) expat_pi_handler
3236 );
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003237 EXPAT(SetStartDoctypeDeclHandler)(
3238 self_xp->parser,
3239 (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
3240 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003241 EXPAT(SetUnknownEncodingHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003242 self_xp->parser,
Eli Bendersky6dc32b32013-05-25 05:25:48 -07003243 EXPAT(DefaultUnknownEncodingHandler), NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003244 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003245
Eli Bendersky52467b12012-06-01 07:13:08 +03003246 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003247}
3248
Eli Bendersky52467b12012-06-01 07:13:08 +03003249static int
3250xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
3251{
3252 Py_VISIT(self->handle_close);
3253 Py_VISIT(self->handle_pi);
3254 Py_VISIT(self->handle_comment);
3255 Py_VISIT(self->handle_end);
3256 Py_VISIT(self->handle_data);
3257 Py_VISIT(self->handle_start);
3258
3259 Py_VISIT(self->target);
3260 Py_VISIT(self->entity);
3261 Py_VISIT(self->names);
3262
3263 return 0;
3264}
3265
3266static int
3267xmlparser_gc_clear(XMLParserObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003268{
3269 EXPAT(ParserFree)(self->parser);
3270
Antoine Pitrouc1948842012-10-01 23:40:37 +02003271 Py_CLEAR(self->handle_close);
3272 Py_CLEAR(self->handle_pi);
3273 Py_CLEAR(self->handle_comment);
3274 Py_CLEAR(self->handle_end);
3275 Py_CLEAR(self->handle_data);
3276 Py_CLEAR(self->handle_start);
3277 Py_CLEAR(self->handle_doctype);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003278
Antoine Pitrouc1948842012-10-01 23:40:37 +02003279 Py_CLEAR(self->target);
3280 Py_CLEAR(self->entity);
3281 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003282
Eli Bendersky52467b12012-06-01 07:13:08 +03003283 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003284}
3285
Eli Bendersky52467b12012-06-01 07:13:08 +03003286static void
3287xmlparser_dealloc(XMLParserObject* self)
3288{
3289 PyObject_GC_UnTrack(self);
3290 xmlparser_gc_clear(self);
3291 Py_TYPE(self)->tp_free((PyObject *)self);
3292}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003293
3294LOCAL(PyObject*)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003295expat_parse(XMLParserObject* self, const char* data, int data_len, int final)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003296{
3297 int ok;
3298
3299 ok = EXPAT(Parse)(self->parser, data, data_len, final);
3300
3301 if (PyErr_Occurred())
3302 return NULL;
3303
3304 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003305 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02003306 EXPAT(GetErrorCode)(self->parser),
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003307 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02003308 EXPAT(GetErrorColumnNumber)(self->parser),
3309 NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003310 );
3311 return NULL;
3312 }
3313
3314 Py_RETURN_NONE;
3315}
3316
3317static PyObject*
3318xmlparser_close(XMLParserObject* self, PyObject* args)
3319{
3320 /* end feeding data to parser */
3321
3322 PyObject* res;
3323 if (!PyArg_ParseTuple(args, ":close"))
3324 return NULL;
3325
3326 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003327 if (!res)
3328 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003329
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003330 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003331 Py_DECREF(res);
3332 return treebuilder_done((TreeBuilderObject*) self->target);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003333 } if (self->handle_close) {
3334 Py_DECREF(res);
3335 return PyObject_CallFunction(self->handle_close, "");
3336 } else
3337 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003338}
3339
3340static PyObject*
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003341xmlparser_feed(XMLParserObject* self, PyObject* arg)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003342{
3343 /* feed data to parser */
3344
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003345 if (PyUnicode_Check(arg)) {
3346 Py_ssize_t data_len;
3347 const char *data = PyUnicode_AsUTF8AndSize(arg, &data_len);
3348 if (data == NULL)
3349 return NULL;
3350 if (data_len > INT_MAX) {
3351 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3352 return NULL;
3353 }
3354 /* Explicitly set UTF-8 encoding. Return code ignored. */
3355 (void)EXPAT(SetEncoding)(self->parser, "utf-8");
3356 return expat_parse(self, data, (int)data_len, 0);
3357 }
3358 else {
3359 Py_buffer view;
3360 PyObject *res;
3361 if (PyObject_GetBuffer(arg, &view, PyBUF_SIMPLE) < 0)
3362 return NULL;
3363 if (view.len > INT_MAX) {
3364 PyBuffer_Release(&view);
3365 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3366 return NULL;
3367 }
3368 res = expat_parse(self, view.buf, (int)view.len, 0);
3369 PyBuffer_Release(&view);
3370 return res;
3371 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003372}
3373
3374static PyObject*
3375xmlparser_parse(XMLParserObject* self, PyObject* args)
3376{
3377 /* (internal) parse until end of input stream */
3378
3379 PyObject* reader;
3380 PyObject* buffer;
Eli Benderskyf996e772012-03-16 05:53:30 +02003381 PyObject* temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003382 PyObject* res;
3383
3384 PyObject* fileobj;
3385 if (!PyArg_ParseTuple(args, "O:_parse", &fileobj))
3386 return NULL;
3387
3388 reader = PyObject_GetAttrString(fileobj, "read");
3389 if (!reader)
3390 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003391
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003392 /* read from open file object */
3393 for (;;) {
3394
3395 buffer = PyObject_CallFunction(reader, "i", 64*1024);
3396
3397 if (!buffer) {
3398 /* read failed (e.g. due to KeyboardInterrupt) */
3399 Py_DECREF(reader);
3400 return NULL;
3401 }
3402
Eli Benderskyf996e772012-03-16 05:53:30 +02003403 if (PyUnicode_CheckExact(buffer)) {
3404 /* A unicode object is encoded into bytes using UTF-8 */
3405 if (PyUnicode_GET_SIZE(buffer) == 0) {
3406 Py_DECREF(buffer);
3407 break;
3408 }
3409 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
Antoine Pitrouc1948842012-10-01 23:40:37 +02003410 Py_DECREF(buffer);
Eli Benderskyf996e772012-03-16 05:53:30 +02003411 if (!temp) {
3412 /* Propagate exception from PyUnicode_AsEncodedString */
Eli Benderskyf996e772012-03-16 05:53:30 +02003413 Py_DECREF(reader);
3414 return NULL;
3415 }
Eli Benderskyf996e772012-03-16 05:53:30 +02003416 buffer = temp;
3417 }
3418 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003419 Py_DECREF(buffer);
3420 break;
3421 }
3422
3423 res = expat_parse(
Christian Heimes72b710a2008-05-26 13:28:38 +00003424 self, PyBytes_AS_STRING(buffer), PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003425 );
3426
3427 Py_DECREF(buffer);
3428
3429 if (!res) {
3430 Py_DECREF(reader);
3431 return NULL;
3432 }
3433 Py_DECREF(res);
3434
3435 }
3436
3437 Py_DECREF(reader);
3438
3439 res = expat_parse(self, "", 0, 1);
3440
3441 if (res && TreeBuilder_CheckExact(self->target)) {
3442 Py_DECREF(res);
3443 return treebuilder_done((TreeBuilderObject*) self->target);
3444 }
3445
3446 return res;
3447}
3448
3449static PyObject*
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003450xmlparser_doctype(XMLParserObject *self, PyObject *args)
3451{
3452 Py_RETURN_NONE;
3453}
3454
3455static PyObject*
3456xmlparser_setevents(XMLParserObject *self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003457{
3458 /* activate element event reporting */
3459
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003460 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003461 TreeBuilderObject* target;
3462
3463 PyObject* events; /* event collector */
3464 PyObject* event_set = Py_None;
3465 if (!PyArg_ParseTuple(args, "O!|O:_setevents", &PyList_Type, &events,
3466 &event_set))
3467 return NULL;
3468
3469 if (!TreeBuilder_CheckExact(self->target)) {
3470 PyErr_SetString(
3471 PyExc_TypeError,
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003472 "event handling only supported for ElementTree.TreeBuilder "
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003473 "targets"
3474 );
3475 return NULL;
3476 }
3477
3478 target = (TreeBuilderObject*) self->target;
3479
3480 Py_INCREF(events);
3481 Py_XDECREF(target->events);
3482 target->events = events;
3483
3484 /* clear out existing events */
Antoine Pitrouc1948842012-10-01 23:40:37 +02003485 Py_CLEAR(target->start_event_obj);
3486 Py_CLEAR(target->end_event_obj);
3487 Py_CLEAR(target->start_ns_event_obj);
3488 Py_CLEAR(target->end_ns_event_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003489
3490 if (event_set == Py_None) {
3491 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003492 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003493 Py_RETURN_NONE;
3494 }
3495
3496 if (!PyTuple_Check(event_set)) /* FIXME: handle arbitrary sequences */
3497 goto error;
3498
3499 for (i = 0; i < PyTuple_GET_SIZE(event_set); i++) {
3500 PyObject* item = PyTuple_GET_ITEM(event_set, i);
3501 char* event;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003502 if (PyUnicode_Check(item)) {
3503 event = _PyUnicode_AsString(item);
Victor Stinner0477bf32010-03-22 12:11:44 +00003504 if (event == NULL)
3505 goto error;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003506 } else if (PyBytes_Check(item))
3507 event = PyBytes_AS_STRING(item);
3508 else {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003509 goto error;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003510 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003511 if (strcmp(event, "start") == 0) {
3512 Py_INCREF(item);
3513 target->start_event_obj = item;
3514 } else if (strcmp(event, "end") == 0) {
3515 Py_INCREF(item);
3516 Py_XDECREF(target->end_event_obj);
3517 target->end_event_obj = item;
3518 } else if (strcmp(event, "start-ns") == 0) {
3519 Py_INCREF(item);
3520 Py_XDECREF(target->start_ns_event_obj);
3521 target->start_ns_event_obj = item;
3522 EXPAT(SetNamespaceDeclHandler)(
3523 self->parser,
3524 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3525 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3526 );
3527 } else if (strcmp(event, "end-ns") == 0) {
3528 Py_INCREF(item);
3529 Py_XDECREF(target->end_ns_event_obj);
3530 target->end_ns_event_obj = item;
3531 EXPAT(SetNamespaceDeclHandler)(
3532 self->parser,
3533 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3534 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3535 );
3536 } else {
3537 PyErr_Format(
3538 PyExc_ValueError,
3539 "unknown event '%s'", event
3540 );
3541 return NULL;
3542 }
3543 }
3544
3545 Py_RETURN_NONE;
3546
3547 error:
3548 PyErr_SetString(
3549 PyExc_TypeError,
3550 "invalid event tuple"
3551 );
3552 return NULL;
3553}
3554
3555static PyMethodDef xmlparser_methods[] = {
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003556 {"feed", (PyCFunction) xmlparser_feed, METH_O},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003557 {"close", (PyCFunction) xmlparser_close, METH_VARARGS},
3558 {"_parse", (PyCFunction) xmlparser_parse, METH_VARARGS},
3559 {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003560 {"doctype", (PyCFunction) xmlparser_doctype, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003561 {NULL, NULL}
3562};
3563
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003564static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003565xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003566{
Alexander Belopolskye239d232010-12-08 23:31:48 +00003567 if (PyUnicode_Check(nameobj)) {
3568 PyObject* res;
3569 if (PyUnicode_CompareWithASCIIString(nameobj, "entity") == 0)
3570 res = self->entity;
3571 else if (PyUnicode_CompareWithASCIIString(nameobj, "target") == 0)
3572 res = self->target;
3573 else if (PyUnicode_CompareWithASCIIString(nameobj, "version") == 0) {
3574 return PyUnicode_FromFormat(
3575 "Expat %d.%d.%d", XML_MAJOR_VERSION,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003576 XML_MINOR_VERSION, XML_MICRO_VERSION);
Alexander Belopolskye239d232010-12-08 23:31:48 +00003577 }
3578 else
3579 goto generic;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003580
Alexander Belopolskye239d232010-12-08 23:31:48 +00003581 Py_INCREF(res);
3582 return res;
3583 }
3584 generic:
3585 return PyObject_GenericGetAttr((PyObject*) self, nameobj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003586}
3587
Neal Norwitz227b5332006-03-22 09:28:35 +00003588static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003589 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08003590 "xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003591 /* methods */
Eli Bendersky52467b12012-06-01 07:13:08 +03003592 (destructor)xmlparser_dealloc, /* tp_dealloc */
3593 0, /* tp_print */
3594 0, /* tp_getattr */
3595 0, /* tp_setattr */
3596 0, /* tp_reserved */
3597 0, /* tp_repr */
3598 0, /* tp_as_number */
3599 0, /* tp_as_sequence */
3600 0, /* tp_as_mapping */
3601 0, /* tp_hash */
3602 0, /* tp_call */
3603 0, /* tp_str */
3604 (getattrofunc)xmlparser_getattro, /* tp_getattro */
3605 0, /* tp_setattro */
3606 0, /* tp_as_buffer */
3607 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3608 /* tp_flags */
3609 0, /* tp_doc */
3610 (traverseproc)xmlparser_gc_traverse, /* tp_traverse */
3611 (inquiry)xmlparser_gc_clear, /* tp_clear */
3612 0, /* tp_richcompare */
3613 0, /* tp_weaklistoffset */
3614 0, /* tp_iter */
3615 0, /* tp_iternext */
3616 xmlparser_methods, /* tp_methods */
3617 0, /* tp_members */
3618 0, /* tp_getset */
3619 0, /* tp_base */
3620 0, /* tp_dict */
3621 0, /* tp_descr_get */
3622 0, /* tp_descr_set */
3623 0, /* tp_dictoffset */
3624 (initproc)xmlparser_init, /* tp_init */
3625 PyType_GenericAlloc, /* tp_alloc */
3626 xmlparser_new, /* tp_new */
3627 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003628};
3629
3630#endif
3631
3632/* ==================================================================== */
3633/* python module interface */
3634
3635static PyMethodDef _functions[] = {
Eli Benderskya8736902013-01-05 06:26:39 -08003636 {"SubElement", (PyCFunction) subelement, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003637 {NULL, NULL}
3638};
3639
Martin v. Löwis1a214512008-06-11 05:26:20 +00003640
3641static struct PyModuleDef _elementtreemodule = {
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003642 PyModuleDef_HEAD_INIT,
3643 "_elementtree",
3644 NULL,
3645 -1,
3646 _functions,
3647 NULL,
3648 NULL,
3649 NULL,
3650 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00003651};
3652
Neal Norwitzf6657e62006-12-28 04:47:50 +00003653PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00003654PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003655{
Eli Bendersky64d11e62012-06-15 07:42:50 +03003656 PyObject *m, *temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003657
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003658 /* Initialize object types */
Ronald Oussoren138d0802013-07-19 11:11:25 +02003659 if (PyType_Ready(&ElementIter_Type) < 0)
3660 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003661 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003662 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003663 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003664 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003665#if defined(USE_EXPAT)
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003666 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003667 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003668#endif
3669
Martin v. Löwis1a214512008-06-11 05:26:20 +00003670 m = PyModule_Create(&_elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003671 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00003672 return NULL;
3673
Eli Bendersky828efde2012-04-05 05:40:58 +03003674 if (!(temp = PyImport_ImportModule("copy")))
3675 return NULL;
3676 elementtree_deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
3677 Py_XDECREF(temp);
3678
3679 if (!(elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
3680 return NULL;
3681
Eli Bendersky20d41742012-06-01 09:48:37 +03003682 /* link against pyexpat */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003683 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
3684 if (expat_capi) {
3685 /* check that it's usable */
3686 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
3687 expat_capi->size < sizeof(struct PyExpat_CAPI) ||
3688 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
3689 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
Eli Bendersky52467b12012-06-01 07:13:08 +03003690 expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
Eli Benderskyef391ac2012-07-21 20:28:46 +03003691 PyErr_SetString(PyExc_ImportError,
3692 "pyexpat version is incompatible");
3693 return NULL;
Eli Bendersky52467b12012-06-01 07:13:08 +03003694 }
Eli Benderskyef391ac2012-07-21 20:28:46 +03003695 } else {
Eli Bendersky52467b12012-06-01 07:13:08 +03003696 return NULL;
Eli Benderskyef391ac2012-07-21 20:28:46 +03003697 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003698
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003699 elementtree_parseerror_obj = PyErr_NewException(
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003700 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003701 );
3702 Py_INCREF(elementtree_parseerror_obj);
3703 PyModule_AddObject(m, "ParseError", elementtree_parseerror_obj);
3704
Eli Bendersky092af1f2012-03-04 07:14:03 +02003705 Py_INCREF((PyObject *)&Element_Type);
3706 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
3707
Eli Bendersky58d548d2012-05-29 15:45:16 +03003708 Py_INCREF((PyObject *)&TreeBuilder_Type);
3709 PyModule_AddObject(m, "TreeBuilder", (PyObject *)&TreeBuilder_Type);
3710
Eli Bendersky52467b12012-06-01 07:13:08 +03003711#if defined(USE_EXPAT)
3712 Py_INCREF((PyObject *)&XMLParser_Type);
3713 PyModule_AddObject(m, "XMLParser", (PyObject *)&XMLParser_Type);
3714#endif
3715
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003716 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003717}