blob: 49441cc827eaf5f5d4eabf0afa400e75a1a67066 [file] [log] [blame]
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001/*
2 * ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003 * $Id: _elementtree.c 3473 2009-01-11 22:53:55Z fredrik $
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
5 * elementtree accelerator
6 *
7 * History:
8 * 1999-06-20 fl created (as part of sgmlop)
9 * 2001-05-29 fl effdom edition
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000010 * 2003-02-27 fl elementtree edition (alpha)
11 * 2004-06-03 fl updates for elementtree 1.2
Florent Xiclunaf15351d2010-03-13 23:24:31 +000012 * 2005-01-05 fl major optimization effort
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000013 * 2005-01-11 fl first public release (cElementTree 0.8)
14 * 2005-01-12 fl split element object into base and extras
15 * 2005-01-13 fl use tagged pointers for tail/text (cElementTree 0.9)
16 * 2005-01-17 fl added treebuilder close method
17 * 2005-01-17 fl fixed crash in getchildren
18 * 2005-01-18 fl removed observer api, added iterparse (cElementTree 0.9.3)
19 * 2005-01-23 fl revised iterparse api; added namespace event support (0.9.8)
20 * 2005-01-26 fl added VERSION module property (cElementTree 1.0)
21 * 2005-01-28 fl added remove method (1.0.1)
22 * 2005-03-01 fl added iselement function; fixed makeelement aliasing (1.0.2)
23 * 2005-03-13 fl export Comment and ProcessingInstruction/PI helpers
24 * 2005-03-26 fl added Comment and PI support to XMLParser
25 * 2005-03-27 fl event optimizations; complain about bogus events
26 * 2005-08-08 fl fixed read error handling in parse
27 * 2005-08-11 fl added runtime test for copy workaround (1.0.3)
28 * 2005-12-13 fl added expat_capi support (for xml.etree) (1.0.4)
29 * 2005-12-16 fl added support for non-standard encodings
Fredrik Lundh44ed4db2006-03-12 21:06:35 +000030 * 2006-03-08 fl fixed a couple of potential null-refs and leaks
31 * 2006-03-12 fl merge in 2.5 ssize_t changes
Florent Xiclunaf15351d2010-03-13 23:24:31 +000032 * 2007-08-25 fl call custom builder's close method from XMLParser
33 * 2007-08-31 fl added iter, extend from ET 1.3
34 * 2007-09-01 fl fixed ParseError exception, setslice source type, etc
35 * 2007-09-03 fl fixed handling of negative insert indexes
36 * 2007-09-04 fl added itertext from ET 1.3
37 * 2007-09-06 fl added position attribute to ParseError exception
38 * 2008-06-06 fl delay error reporting in iterparse (from Hrvoje Niksic)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000039 *
Florent Xiclunaf15351d2010-03-13 23:24:31 +000040 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
41 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000042 *
43 * info@pythonware.com
44 * http://www.pythonware.com
45 */
46
Fredrik Lundh6d52b552005-12-16 22:06:43 +000047/* Licensed to PSF under a Contributor Agreement. */
Florent Xiclunaf15351d2010-03-13 23:24:31 +000048/* See http://www.python.org/psf/license for licensing details. */
Fredrik Lundh6d52b552005-12-16 22:06:43 +000049
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000050#include "Python.h"
Eli Benderskyebf37a22012-04-03 22:02:37 +030051#include "structmember.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000052
Thomas Wouters00ee7ba2006-08-21 19:07:27 +000053#define VERSION "1.0.6"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000054
55/* -------------------------------------------------------------------- */
56/* configuration */
57
58/* Leave defined to include the expat-based XMLParser type */
59#define USE_EXPAT
60
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000061/* An element can hold this many children without extra memory
62 allocations. */
63#define STATIC_CHILDREN 4
64
65/* For best performance, chose a value so that 80-90% of all nodes
66 have no more than the given number of children. Set this to zero
67 to minimize the size of the element structure itself (this only
68 helps if you have lots of leaf nodes with attributes). */
69
70/* Also note that pymalloc always allocates blocks in multiples of
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010071 eight bytes. For the current C version of ElementTree, this means
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000072 that the number of children should be an even number, at least on
73 32-bit platforms. */
74
75/* -------------------------------------------------------------------- */
76
77#if 0
78static int memory = 0;
79#define ALLOC(size, comment)\
80do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
81#define RELEASE(size, comment)\
82do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
83#else
84#define ALLOC(size, comment)
85#define RELEASE(size, comment)
86#endif
87
88/* compiler tweaks */
89#if defined(_MSC_VER)
90#define LOCAL(type) static __inline type __fastcall
91#else
92#define LOCAL(type) static type
93#endif
94
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000095/* macros used to store 'join' flags in string object pointers. note
96 that all use of text and tail as object pointers must be wrapped in
97 JOIN_OBJ. see comments in the ElementObject definition for more
98 info. */
99#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
100#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
Antoine Pitrouca8aa4a2012-09-20 20:56:47 +0200101#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~(Py_uintptr_t)1))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000102
103/* glue functions (see the init function for details) */
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000104static PyObject* elementtree_parseerror_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000105static PyObject* elementtree_deepcopy_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000106static PyObject* elementpath_obj;
107
108/* helpers */
109
110LOCAL(PyObject*)
111deepcopy(PyObject* object, PyObject* memo)
112{
113 /* do a deep copy of the given object */
114
115 PyObject* args;
116 PyObject* result;
117
118 if (!elementtree_deepcopy_obj) {
119 PyErr_SetString(
120 PyExc_RuntimeError,
121 "deepcopy helper not found"
122 );
123 return NULL;
124 }
125
Antoine Pitrouc1948842012-10-01 23:40:37 +0200126 args = PyTuple_Pack(2, object, memo);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000127 if (!args)
128 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000129 result = PyObject_CallObject(elementtree_deepcopy_obj, args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000130 Py_DECREF(args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000131 return result;
132}
133
134LOCAL(PyObject*)
135list_join(PyObject* list)
136{
137 /* join list elements (destroying the list in the process) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000138 PyObject* joiner;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000139 PyObject* result;
140
Antoine Pitrouc1948842012-10-01 23:40:37 +0200141 joiner = PyUnicode_FromStringAndSize("", 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000142 if (!joiner)
143 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200144 result = PyUnicode_Join(joiner, list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000145 Py_DECREF(joiner);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200146 if (result)
147 Py_DECREF(list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000148 return result;
149}
150
Eli Bendersky48d358b2012-05-30 17:57:50 +0300151/* Is the given object an empty dictionary?
152*/
153static int
154is_empty_dict(PyObject *obj)
155{
156 return PyDict_CheckExact(obj) && PyDict_Size(obj) == 0;
157}
158
159
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000160/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200161/* the Element type */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000162
163typedef struct {
164
165 /* attributes (a dictionary object), or None if no attributes */
166 PyObject* attrib;
167
168 /* child elements */
169 int length; /* actual number of items */
170 int allocated; /* allocated items */
171
172 /* this either points to _children or to a malloced buffer */
173 PyObject* *children;
174
175 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100176
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000177} ElementObjectExtra;
178
179typedef struct {
180 PyObject_HEAD
181
182 /* element tag (a string). */
183 PyObject* tag;
184
185 /* text before first child. note that this is a tagged pointer;
186 use JOIN_OBJ to get the object pointer. the join flag is used
187 to distinguish lists created by the tree builder from lists
188 assigned to the attribute by application code; the former
189 should be joined before being returned to the user, the latter
190 should be left intact. */
191 PyObject* text;
192
193 /* text after this element, in parent. note that this is a tagged
194 pointer; use JOIN_OBJ to get the object pointer. */
195 PyObject* tail;
196
197 ElementObjectExtra* extra;
198
Eli Benderskyebf37a22012-04-03 22:02:37 +0300199 PyObject *weakreflist; /* For tp_weaklistoffset */
200
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000201} ElementObject;
202
Neal Norwitz227b5332006-03-22 09:28:35 +0000203static PyTypeObject Element_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000204
Christian Heimes90aa7642007-12-19 02:45:37 +0000205#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000206
207/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200208/* Element constructors and destructor */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000209
210LOCAL(int)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200211create_extra(ElementObject* self, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000212{
213 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
214 if (!self->extra)
215 return -1;
216
217 if (!attrib)
218 attrib = Py_None;
219
220 Py_INCREF(attrib);
221 self->extra->attrib = attrib;
222
223 self->extra->length = 0;
224 self->extra->allocated = STATIC_CHILDREN;
225 self->extra->children = self->extra->_children;
226
227 return 0;
228}
229
230LOCAL(void)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200231dealloc_extra(ElementObject* self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000232{
Eli Bendersky08b85292012-04-04 15:55:07 +0300233 ElementObjectExtra *myextra;
234 int i;
235
Eli Benderskyebf37a22012-04-03 22:02:37 +0300236 if (!self->extra)
237 return;
238
239 /* Avoid DECREFs calling into this code again (cycles, etc.)
240 */
Eli Bendersky08b85292012-04-04 15:55:07 +0300241 myextra = self->extra;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300242 self->extra = NULL;
243
244 Py_DECREF(myextra->attrib);
245
Eli Benderskyebf37a22012-04-03 22:02:37 +0300246 for (i = 0; i < myextra->length; i++)
247 Py_DECREF(myextra->children[i]);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000248
Eli Benderskyebf37a22012-04-03 22:02:37 +0300249 if (myextra->children != myextra->_children)
250 PyObject_Free(myextra->children);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000251
Eli Benderskyebf37a22012-04-03 22:02:37 +0300252 PyObject_Free(myextra);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000253}
254
Eli Bendersky092af1f2012-03-04 07:14:03 +0200255/* Convenience internal function to create new Element objects with the given
256 * tag and attributes.
257*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000258LOCAL(PyObject*)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200259create_new_element(PyObject* tag, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000260{
261 ElementObject* self;
262
Eli Bendersky0192ba32012-03-30 16:38:33 +0300263 self = PyObject_GC_New(ElementObject, &Element_Type);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000264 if (self == NULL)
265 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000266 self->extra = NULL;
267
Eli Bendersky48d358b2012-05-30 17:57:50 +0300268 if (attrib != Py_None && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200269 if (create_extra(self, attrib) < 0) {
Thomas Wouters477c8d52006-05-27 19:21:47 +0000270 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000271 return NULL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000272 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000273 }
274
275 Py_INCREF(tag);
276 self->tag = tag;
277
278 Py_INCREF(Py_None);
279 self->text = Py_None;
280
281 Py_INCREF(Py_None);
282 self->tail = Py_None;
283
Eli Benderskyebf37a22012-04-03 22:02:37 +0300284 self->weakreflist = NULL;
285
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000286 ALLOC(sizeof(ElementObject), "create element");
Eli Bendersky0192ba32012-03-30 16:38:33 +0300287 PyObject_GC_Track(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000288 return (PyObject*) self;
289}
290
Eli Bendersky092af1f2012-03-04 07:14:03 +0200291static PyObject *
292element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
293{
294 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
295 if (e != NULL) {
296 Py_INCREF(Py_None);
297 e->tag = Py_None;
298
299 Py_INCREF(Py_None);
300 e->text = Py_None;
301
302 Py_INCREF(Py_None);
303 e->tail = Py_None;
304
305 e->extra = NULL;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300306 e->weakreflist = NULL;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200307 }
308 return (PyObject *)e;
309}
310
Eli Bendersky737b1732012-05-29 06:02:56 +0300311/* Helper function for extracting the attrib dictionary from a keywords dict.
312 * This is required by some constructors/functions in this module that can
Eli Bendersky45839902013-01-13 05:14:47 -0800313 * either accept attrib as a keyword argument or all attributes splashed
Eli Bendersky737b1732012-05-29 06:02:56 +0300314 * directly into *kwds.
315 * If there is no 'attrib' keyword, return an empty dict.
316 */
317static PyObject*
318get_attrib_from_keywords(PyObject *kwds)
319{
320 PyObject *attrib_str = PyUnicode_FromString("attrib");
321 PyObject *attrib = PyDict_GetItem(kwds, attrib_str);
322
323 if (attrib) {
324 /* If attrib was found in kwds, copy its value and remove it from
325 * kwds
326 */
327 if (!PyDict_Check(attrib)) {
328 Py_DECREF(attrib_str);
329 PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
330 Py_TYPE(attrib)->tp_name);
331 return NULL;
332 }
333 attrib = PyDict_Copy(attrib);
334 PyDict_DelItem(kwds, attrib_str);
335 } else {
336 attrib = PyDict_New();
337 }
338
339 Py_DECREF(attrib_str);
340
341 if (attrib)
342 PyDict_Update(attrib, kwds);
343 return attrib;
344}
345
Eli Bendersky092af1f2012-03-04 07:14:03 +0200346static int
347element_init(PyObject *self, PyObject *args, PyObject *kwds)
348{
349 PyObject *tag;
350 PyObject *tmp;
351 PyObject *attrib = NULL;
352 ElementObject *self_elem;
353
354 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
355 return -1;
356
Eli Bendersky737b1732012-05-29 06:02:56 +0300357 if (attrib) {
358 /* attrib passed as positional arg */
359 attrib = PyDict_Copy(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200360 if (!attrib)
361 return -1;
Eli Bendersky737b1732012-05-29 06:02:56 +0300362 if (kwds) {
363 if (PyDict_Update(attrib, kwds) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200364 Py_DECREF(attrib);
Eli Bendersky737b1732012-05-29 06:02:56 +0300365 return -1;
366 }
367 }
368 } else if (kwds) {
369 /* have keywords args */
370 attrib = get_attrib_from_keywords(kwds);
371 if (!attrib)
372 return -1;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200373 }
374
375 self_elem = (ElementObject *)self;
376
Antoine Pitrouc1948842012-10-01 23:40:37 +0200377 if (attrib != NULL && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200378 if (create_extra(self_elem, attrib) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200379 Py_DECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200380 return -1;
381 }
382 }
383
Eli Bendersky48d358b2012-05-30 17:57:50 +0300384 /* We own a reference to attrib here and it's no longer needed. */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200385 Py_XDECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200386
387 /* Replace the objects already pointed to by tag, text and tail. */
388 tmp = self_elem->tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200389 Py_INCREF(tag);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200390 self_elem->tag = tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200391 Py_DECREF(tmp);
392
393 tmp = self_elem->text;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200394 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200395 self_elem->text = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200396 Py_DECREF(JOIN_OBJ(tmp));
397
398 tmp = self_elem->tail;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200399 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200400 self_elem->tail = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200401 Py_DECREF(JOIN_OBJ(tmp));
402
403 return 0;
404}
405
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000406LOCAL(int)
407element_resize(ElementObject* self, int extra)
408{
409 int size;
410 PyObject* *children;
411
412 /* make sure self->children can hold the given number of extra
413 elements. set an exception and return -1 if allocation failed */
414
415 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200416 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000417
418 size = self->extra->length + extra;
419
420 if (size > self->extra->allocated) {
421 /* use Python 2.4's list growth strategy */
422 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000423 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100424 * which needs at least 4 bytes.
425 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000426 * be safe.
427 */
428 size = size ? size : 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000429 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000430 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100431 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000432 * false alarm always assume at least one child to be safe.
433 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000434 children = PyObject_Realloc(self->extra->children,
435 size * sizeof(PyObject*));
436 if (!children)
437 goto nomemory;
438 } else {
439 children = PyObject_Malloc(size * sizeof(PyObject*));
440 if (!children)
441 goto nomemory;
442 /* copy existing children from static area to malloc buffer */
443 memcpy(children, self->extra->children,
444 self->extra->length * sizeof(PyObject*));
445 }
446 self->extra->children = children;
447 self->extra->allocated = size;
448 }
449
450 return 0;
451
452 nomemory:
453 PyErr_NoMemory();
454 return -1;
455}
456
457LOCAL(int)
458element_add_subelement(ElementObject* self, PyObject* element)
459{
460 /* add a child element to a parent */
461
462 if (element_resize(self, 1) < 0)
463 return -1;
464
465 Py_INCREF(element);
466 self->extra->children[self->extra->length] = element;
467
468 self->extra->length++;
469
470 return 0;
471}
472
473LOCAL(PyObject*)
474element_get_attrib(ElementObject* self)
475{
476 /* return borrowed reference to attrib dictionary */
477 /* note: this function assumes that the extra section exists */
478
479 PyObject* res = self->extra->attrib;
480
481 if (res == Py_None) {
482 /* create missing dictionary */
483 res = PyDict_New();
484 if (!res)
485 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200486 Py_DECREF(Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000487 self->extra->attrib = res;
488 }
489
490 return res;
491}
492
493LOCAL(PyObject*)
494element_get_text(ElementObject* self)
495{
496 /* return borrowed reference to text attribute */
497
498 PyObject* res = self->text;
499
500 if (JOIN_GET(res)) {
501 res = JOIN_OBJ(res);
502 if (PyList_CheckExact(res)) {
503 res = list_join(res);
504 if (!res)
505 return NULL;
506 self->text = res;
507 }
508 }
509
510 return res;
511}
512
513LOCAL(PyObject*)
514element_get_tail(ElementObject* self)
515{
516 /* return borrowed reference to text attribute */
517
518 PyObject* res = self->tail;
519
520 if (JOIN_GET(res)) {
521 res = JOIN_OBJ(res);
522 if (PyList_CheckExact(res)) {
523 res = list_join(res);
524 if (!res)
525 return NULL;
526 self->tail = res;
527 }
528 }
529
530 return res;
531}
532
533static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300534subelement(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000535{
536 PyObject* elem;
537
538 ElementObject* parent;
539 PyObject* tag;
540 PyObject* attrib = NULL;
541 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
542 &Element_Type, &parent, &tag,
543 &PyDict_Type, &attrib))
544 return NULL;
545
Eli Bendersky737b1732012-05-29 06:02:56 +0300546 if (attrib) {
547 /* attrib passed as positional arg */
548 attrib = PyDict_Copy(attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000549 if (!attrib)
550 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300551 if (kwds) {
552 if (PyDict_Update(attrib, kwds) < 0) {
553 return NULL;
554 }
555 }
556 } else if (kwds) {
557 /* have keyword args */
558 attrib = get_attrib_from_keywords(kwds);
559 if (!attrib)
560 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000561 } else {
Eli Bendersky737b1732012-05-29 06:02:56 +0300562 /* no attrib arg, no kwds, so no attribute */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000563 Py_INCREF(Py_None);
564 attrib = Py_None;
565 }
566
Eli Bendersky092af1f2012-03-04 07:14:03 +0200567 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000568
569 Py_DECREF(attrib);
570
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000571 if (element_add_subelement(parent, elem) < 0) {
572 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000573 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000574 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000575
576 return elem;
577}
578
Eli Bendersky0192ba32012-03-30 16:38:33 +0300579static int
580element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
581{
582 Py_VISIT(self->tag);
583 Py_VISIT(JOIN_OBJ(self->text));
584 Py_VISIT(JOIN_OBJ(self->tail));
585
586 if (self->extra) {
587 int i;
588 Py_VISIT(self->extra->attrib);
589
590 for (i = 0; i < self->extra->length; ++i)
591 Py_VISIT(self->extra->children[i]);
592 }
593 return 0;
594}
595
596static int
597element_gc_clear(ElementObject *self)
598{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300599 Py_CLEAR(self->tag);
Eli Benderskyebf37a22012-04-03 22:02:37 +0300600
601 /* The following is like Py_CLEAR for self->text and self->tail, but
602 * written explicitily because the real pointers hide behind access
603 * macros.
604 */
605 if (self->text) {
606 PyObject *tmp = JOIN_OBJ(self->text);
607 self->text = NULL;
608 Py_DECREF(tmp);
609 }
610
611 if (self->tail) {
612 PyObject *tmp = JOIN_OBJ(self->tail);
613 self->tail = NULL;
614 Py_DECREF(tmp);
615 }
Eli Bendersky0192ba32012-03-30 16:38:33 +0300616
617 /* After dropping all references from extra, it's no longer valid anyway,
Eli Benderskyebf37a22012-04-03 22:02:37 +0300618 * so fully deallocate it.
Eli Bendersky0192ba32012-03-30 16:38:33 +0300619 */
Eli Benderskyebf37a22012-04-03 22:02:37 +0300620 dealloc_extra(self);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300621 return 0;
622}
623
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000624static void
625element_dealloc(ElementObject* self)
626{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300627 PyObject_GC_UnTrack(self);
Eli Benderskyebf37a22012-04-03 22:02:37 +0300628
629 if (self->weakreflist != NULL)
630 PyObject_ClearWeakRefs((PyObject *) self);
631
Eli Bendersky0192ba32012-03-30 16:38:33 +0300632 /* element_gc_clear clears all references and deallocates extra
633 */
634 element_gc_clear(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000635
636 RELEASE(sizeof(ElementObject), "destroy element");
Eli Bendersky092af1f2012-03-04 07:14:03 +0200637 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000638}
639
640/* -------------------------------------------------------------------- */
641/* methods (in alphabetical order) */
642
643static PyObject*
644element_append(ElementObject* self, PyObject* args)
645{
646 PyObject* element;
647 if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element))
648 return NULL;
649
650 if (element_add_subelement(self, element) < 0)
651 return NULL;
652
653 Py_RETURN_NONE;
654}
655
656static PyObject*
Eli Bendersky0192ba32012-03-30 16:38:33 +0300657element_clearmethod(ElementObject* self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000658{
659 if (!PyArg_ParseTuple(args, ":clear"))
660 return NULL;
661
Eli Benderskyebf37a22012-04-03 22:02:37 +0300662 dealloc_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000663
664 Py_INCREF(Py_None);
665 Py_DECREF(JOIN_OBJ(self->text));
666 self->text = Py_None;
667
668 Py_INCREF(Py_None);
669 Py_DECREF(JOIN_OBJ(self->tail));
670 self->tail = Py_None;
671
672 Py_RETURN_NONE;
673}
674
675static PyObject*
676element_copy(ElementObject* self, PyObject* args)
677{
678 int i;
679 ElementObject* element;
680
681 if (!PyArg_ParseTuple(args, ":__copy__"))
682 return NULL;
683
Eli Bendersky092af1f2012-03-04 07:14:03 +0200684 element = (ElementObject*) create_new_element(
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000685 self->tag, (self->extra) ? self->extra->attrib : Py_None
686 );
687 if (!element)
688 return NULL;
689
690 Py_DECREF(JOIN_OBJ(element->text));
691 element->text = self->text;
692 Py_INCREF(JOIN_OBJ(element->text));
693
694 Py_DECREF(JOIN_OBJ(element->tail));
695 element->tail = self->tail;
696 Py_INCREF(JOIN_OBJ(element->tail));
697
698 if (self->extra) {
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100699
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000700 if (element_resize(element, self->extra->length) < 0) {
701 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000702 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000703 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000704
705 for (i = 0; i < self->extra->length; i++) {
706 Py_INCREF(self->extra->children[i]);
707 element->extra->children[i] = self->extra->children[i];
708 }
709
710 element->extra->length = self->extra->length;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100711
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000712 }
713
714 return (PyObject*) element;
715}
716
717static PyObject*
718element_deepcopy(ElementObject* self, PyObject* args)
719{
720 int i;
721 ElementObject* element;
722 PyObject* tag;
723 PyObject* attrib;
724 PyObject* text;
725 PyObject* tail;
726 PyObject* id;
727
728 PyObject* memo;
729 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
730 return NULL;
731
732 tag = deepcopy(self->tag, memo);
733 if (!tag)
734 return NULL;
735
736 if (self->extra) {
737 attrib = deepcopy(self->extra->attrib, memo);
738 if (!attrib) {
739 Py_DECREF(tag);
740 return NULL;
741 }
742 } else {
743 Py_INCREF(Py_None);
744 attrib = Py_None;
745 }
746
Eli Bendersky092af1f2012-03-04 07:14:03 +0200747 element = (ElementObject*) create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000748
749 Py_DECREF(tag);
750 Py_DECREF(attrib);
751
752 if (!element)
753 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100754
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000755 text = deepcopy(JOIN_OBJ(self->text), memo);
756 if (!text)
757 goto error;
758 Py_DECREF(element->text);
759 element->text = JOIN_SET(text, JOIN_GET(self->text));
760
761 tail = deepcopy(JOIN_OBJ(self->tail), memo);
762 if (!tail)
763 goto error;
764 Py_DECREF(element->tail);
765 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
766
767 if (self->extra) {
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100768
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000769 if (element_resize(element, self->extra->length) < 0)
770 goto error;
771
772 for (i = 0; i < self->extra->length; i++) {
773 PyObject* child = deepcopy(self->extra->children[i], memo);
774 if (!child) {
775 element->extra->length = i;
776 goto error;
777 }
778 element->extra->children[i] = child;
779 }
780
781 element->extra->length = self->extra->length;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100782
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000783 }
784
785 /* add object to memo dictionary (so deepcopy won't visit it again) */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200786 id = PyLong_FromSsize_t((Py_uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000787 if (!id)
788 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000789
790 i = PyDict_SetItem(memo, id, (PyObject*) element);
791
792 Py_DECREF(id);
793
794 if (i < 0)
795 goto error;
796
797 return (PyObject*) element;
798
799 error:
800 Py_DECREF(element);
801 return NULL;
802}
803
Martin v. Löwisbce16662012-06-17 10:41:22 +0200804static PyObject*
805element_sizeof(PyObject* _self, PyObject* args)
806{
807 ElementObject *self = (ElementObject*)_self;
808 Py_ssize_t result = sizeof(ElementObject);
809 if (self->extra) {
810 result += sizeof(ElementObjectExtra);
811 if (self->extra->children != self->extra->_children)
812 result += sizeof(PyObject*) * self->extra->allocated;
813 }
814 return PyLong_FromSsize_t(result);
815}
816
Eli Bendersky698bdb22013-01-10 06:01:06 -0800817/* dict keys for getstate/setstate. */
818#define PICKLED_TAG "tag"
819#define PICKLED_CHILDREN "_children"
820#define PICKLED_ATTRIB "attrib"
821#define PICKLED_TAIL "tail"
822#define PICKLED_TEXT "text"
823
824/* __getstate__ returns a fabricated instance dict as in the pure-Python
825 * Element implementation, for interoperability/interchangeability. This
826 * makes the pure-Python implementation details an API, but (a) there aren't
827 * any unnecessary structures there; and (b) it buys compatibility with 3.2
828 * pickles. See issue #16076.
829 */
830static PyObject *
831element_getstate(ElementObject *self)
832{
833 int i, noattrib;
834 PyObject *instancedict = NULL, *children;
835
836 /* Build a list of children. */
837 children = PyList_New(self->extra ? self->extra->length : 0);
838 if (!children)
839 return NULL;
840 for (i = 0; i < PyList_GET_SIZE(children); i++) {
841 PyObject *child = self->extra->children[i];
842 Py_INCREF(child);
843 PyList_SET_ITEM(children, i, child);
844 }
845
846 /* Construct the state object. */
847 noattrib = (self->extra == NULL || self->extra->attrib == Py_None);
848 if (noattrib)
849 instancedict = Py_BuildValue("{sOsOs{}sOsO}",
850 PICKLED_TAG, self->tag,
851 PICKLED_CHILDREN, children,
852 PICKLED_ATTRIB,
853 PICKLED_TEXT, self->text,
854 PICKLED_TAIL, self->tail);
855 else
856 instancedict = Py_BuildValue("{sOsOsOsOsO}",
857 PICKLED_TAG, self->tag,
858 PICKLED_CHILDREN, children,
859 PICKLED_ATTRIB, self->extra->attrib,
860 PICKLED_TEXT, self->text,
861 PICKLED_TAIL, self->tail);
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800862 if (instancedict) {
863 Py_DECREF(children);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800864 return instancedict;
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800865 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800866 else {
867 for (i = 0; i < PyList_GET_SIZE(children); i++)
868 Py_DECREF(PyList_GET_ITEM(children, i));
869 Py_DECREF(children);
870
871 return NULL;
872 }
873}
874
875static PyObject *
876element_setstate_from_attributes(ElementObject *self,
877 PyObject *tag,
878 PyObject *attrib,
879 PyObject *text,
880 PyObject *tail,
881 PyObject *children)
882{
883 Py_ssize_t i, nchildren;
884
885 if (!tag) {
886 PyErr_SetString(PyExc_TypeError, "tag may not be NULL");
887 return NULL;
888 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800889
890 Py_CLEAR(self->tag);
891 self->tag = tag;
892 Py_INCREF(self->tag);
893
894 Py_CLEAR(self->text);
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800895 self->text = text ? text : Py_None;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800896 Py_INCREF(self->text);
897
898 Py_CLEAR(self->tail);
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800899 self->tail = tail ? tail : Py_None;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800900 Py_INCREF(self->tail);
901
902 /* Handle ATTRIB and CHILDREN. */
903 if (!children && !attrib)
904 Py_RETURN_NONE;
905
906 /* Compute 'nchildren'. */
907 if (children) {
908 if (!PyList_Check(children)) {
909 PyErr_SetString(PyExc_TypeError, "'_children' is not a list");
910 return NULL;
911 }
912 nchildren = PyList_Size(children);
913 }
914 else {
915 nchildren = 0;
916 }
917
918 /* Allocate 'extra'. */
919 if (element_resize(self, nchildren)) {
920 return NULL;
921 }
922 assert(self->extra && self->extra->allocated >= nchildren);
923
924 /* Copy children */
925 for (i = 0; i < nchildren; i++) {
926 self->extra->children[i] = PyList_GET_ITEM(children, i);
927 Py_INCREF(self->extra->children[i]);
928 }
929
930 self->extra->length = nchildren;
931 self->extra->allocated = nchildren;
932
933 /* Stash attrib. */
934 if (attrib) {
935 Py_CLEAR(self->extra->attrib);
936 self->extra->attrib = attrib;
937 Py_INCREF(attrib);
938 }
939
940 Py_RETURN_NONE;
941}
942
943/* __setstate__ for Element instance from the Python implementation.
944 * 'state' should be the instance dict.
945 */
946static PyObject *
947element_setstate_from_Python(ElementObject *self, PyObject *state)
948{
949 static char *kwlist[] = {PICKLED_TAG, PICKLED_ATTRIB, PICKLED_TEXT,
950 PICKLED_TAIL, PICKLED_CHILDREN, 0};
951 PyObject *args;
952 PyObject *tag, *attrib, *text, *tail, *children;
Eli Bendersky799e3ed2013-01-12 05:42:38 -0800953 PyObject *retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800954
Eli Bendersky698bdb22013-01-10 06:01:06 -0800955 tag = attrib = text = tail = children = NULL;
956 args = PyTuple_New(0);
Eli Bendersky799e3ed2013-01-12 05:42:38 -0800957 if (!args)
Eli Bendersky698bdb22013-01-10 06:01:06 -0800958 return NULL;
Eli Bendersky799e3ed2013-01-12 05:42:38 -0800959
960 if (PyArg_ParseTupleAndKeywords(args, state, "|$OOOOO", kwlist, &tag,
961 &attrib, &text, &tail, &children))
962 retval = element_setstate_from_attributes(self, tag, attrib, text,
963 tail, children);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800964 else
Eli Bendersky799e3ed2013-01-12 05:42:38 -0800965 retval = NULL;
966
967 Py_DECREF(args);
968 return retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800969}
970
971static PyObject *
972element_setstate(ElementObject *self, PyObject *state)
973{
974 if (!PyDict_CheckExact(state)) {
975 PyErr_Format(PyExc_TypeError,
976 "Don't know how to unpickle \"%.200R\" as an Element",
977 state);
978 return NULL;
979 }
980 else
981 return element_setstate_from_Python(self, state);
982}
983
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000984LOCAL(int)
985checkpath(PyObject* tag)
986{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000987 Py_ssize_t i;
988 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000989
990 /* check if a tag contains an xpath character */
991
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000992#define PATHCHAR(ch) \
993 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000994
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000995 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200996 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
997 void *data = PyUnicode_DATA(tag);
998 unsigned int kind = PyUnicode_KIND(tag);
999 for (i = 0; i < len; i++) {
1000 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1001 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001002 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001003 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001004 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001005 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001006 return 1;
1007 }
1008 return 0;
1009 }
Christian Heimes72b710a2008-05-26 13:28:38 +00001010 if (PyBytes_Check(tag)) {
1011 char *p = PyBytes_AS_STRING(tag);
1012 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001013 if (p[i] == '{')
1014 check = 0;
1015 else if (p[i] == '}')
1016 check = 1;
1017 else if (check && PATHCHAR(p[i]))
1018 return 1;
1019 }
1020 return 0;
1021 }
1022
1023 return 1; /* unknown type; might be path expression */
1024}
1025
1026static PyObject*
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001027element_extend(ElementObject* self, PyObject* args)
1028{
1029 PyObject* seq;
1030 Py_ssize_t i, seqlen = 0;
1031
1032 PyObject* seq_in;
1033 if (!PyArg_ParseTuple(args, "O:extend", &seq_in))
1034 return NULL;
1035
1036 seq = PySequence_Fast(seq_in, "");
1037 if (!seq) {
1038 PyErr_Format(
1039 PyExc_TypeError,
1040 "expected sequence, not \"%.200s\"", Py_TYPE(seq_in)->tp_name
1041 );
1042 return NULL;
1043 }
1044
1045 seqlen = PySequence_Size(seq);
1046 for (i = 0; i < seqlen; i++) {
1047 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001048 if (!PyObject_IsInstance(element, (PyObject *)&Element_Type)) {
1049 Py_DECREF(seq);
1050 PyErr_Format(
1051 PyExc_TypeError,
1052 "expected an Element, not \"%.200s\"",
1053 Py_TYPE(element)->tp_name);
1054 return NULL;
1055 }
1056
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001057 if (element_add_subelement(self, element) < 0) {
1058 Py_DECREF(seq);
1059 return NULL;
1060 }
1061 }
1062
1063 Py_DECREF(seq);
1064
1065 Py_RETURN_NONE;
1066}
1067
1068static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001069element_find(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001070{
1071 int i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001072 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001073 PyObject* namespaces = Py_None;
Eli Bendersky737b1732012-05-29 06:02:56 +03001074 static char *kwlist[] = {"path", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001075
Eli Bendersky737b1732012-05-29 06:02:56 +03001076 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:find", kwlist,
1077 &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001078 return NULL;
1079
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001080 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001081 _Py_IDENTIFIER(find);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001082 return _PyObject_CallMethodId(
1083 elementpath_obj, &PyId_find, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001084 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001085 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001086
1087 if (!self->extra)
1088 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001089
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001090 for (i = 0; i < self->extra->length; i++) {
1091 PyObject* item = self->extra->children[i];
1092 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +00001093 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001094 Py_INCREF(item);
1095 return item;
1096 }
1097 }
1098
1099 Py_RETURN_NONE;
1100}
1101
1102static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001103element_findtext(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001104{
1105 int i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001106 PyObject* tag;
1107 PyObject* default_value = Py_None;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001108 PyObject* namespaces = Py_None;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001109 _Py_IDENTIFIER(findtext);
Eli Bendersky737b1732012-05-29 06:02:56 +03001110 static char *kwlist[] = {"path", "default", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001111
Eli Bendersky737b1732012-05-29 06:02:56 +03001112 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO:findtext", kwlist,
1113 &tag, &default_value, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001114 return NULL;
1115
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001116 if (checkpath(tag) || namespaces != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001117 return _PyObject_CallMethodId(
1118 elementpath_obj, &PyId_findtext, "OOOO", self, tag, default_value, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001119 );
1120
1121 if (!self->extra) {
1122 Py_INCREF(default_value);
1123 return default_value;
1124 }
1125
1126 for (i = 0; i < self->extra->length; i++) {
1127 ElementObject* item = (ElementObject*) self->extra->children[i];
Mark Dickinson211c6252009-02-01 10:28:51 +00001128 if (Element_CheckExact(item) && (PyObject_RichCompareBool(item->tag, tag, Py_EQ) == 1)) {
1129
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001130 PyObject* text = element_get_text(item);
1131 if (text == Py_None)
Eli Bendersky25771b32013-01-13 05:26:07 -08001132 return PyUnicode_New(0, 0);
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001133 Py_XINCREF(text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001134 return text;
1135 }
1136 }
1137
1138 Py_INCREF(default_value);
1139 return default_value;
1140}
1141
1142static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001143element_findall(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001144{
1145 int i;
1146 PyObject* out;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001147 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001148 PyObject* namespaces = Py_None;
Eli Bendersky737b1732012-05-29 06:02:56 +03001149 static char *kwlist[] = {"path", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001150
Eli Bendersky737b1732012-05-29 06:02:56 +03001151 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:findall", kwlist,
1152 &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001153 return NULL;
1154
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001155 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001156 _Py_IDENTIFIER(findall);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001157 return _PyObject_CallMethodId(
1158 elementpath_obj, &PyId_findall, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001159 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001160 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001161
1162 out = PyList_New(0);
1163 if (!out)
1164 return NULL;
1165
1166 if (!self->extra)
1167 return out;
1168
1169 for (i = 0; i < self->extra->length; i++) {
1170 PyObject* item = self->extra->children[i];
1171 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +00001172 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001173 if (PyList_Append(out, item) < 0) {
1174 Py_DECREF(out);
1175 return NULL;
1176 }
1177 }
1178 }
1179
1180 return out;
1181}
1182
1183static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001184element_iterfind(ElementObject *self, PyObject *args, PyObject *kwds)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001185{
1186 PyObject* tag;
1187 PyObject* namespaces = Py_None;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001188 _Py_IDENTIFIER(iterfind);
Eli Bendersky737b1732012-05-29 06:02:56 +03001189 static char *kwlist[] = {"path", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001190
Eli Bendersky737b1732012-05-29 06:02:56 +03001191 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:iterfind", kwlist,
1192 &tag, &namespaces))
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001193 return NULL;
1194
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001195 return _PyObject_CallMethodId(
1196 elementpath_obj, &PyId_iterfind, "OOO", self, tag, namespaces
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001197 );
1198}
1199
1200static PyObject*
Eli Benderskya8736902013-01-05 06:26:39 -08001201element_get(ElementObject* self, PyObject* args, PyObject* kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001202{
1203 PyObject* value;
Eli Benderskya8736902013-01-05 06:26:39 -08001204 static char* kwlist[] = {"key", "default", 0};
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001205
1206 PyObject* key;
1207 PyObject* default_value = Py_None;
Eli Benderskya8736902013-01-05 06:26:39 -08001208
1209 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:get", kwlist, &key,
1210 &default_value))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001211 return NULL;
1212
1213 if (!self->extra || self->extra->attrib == Py_None)
1214 value = default_value;
1215 else {
1216 value = PyDict_GetItem(self->extra->attrib, key);
1217 if (!value)
1218 value = default_value;
1219 }
1220
1221 Py_INCREF(value);
1222 return value;
1223}
1224
1225static PyObject*
1226element_getchildren(ElementObject* self, PyObject* args)
1227{
1228 int i;
1229 PyObject* list;
1230
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001231 /* FIXME: report as deprecated? */
1232
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001233 if (!PyArg_ParseTuple(args, ":getchildren"))
1234 return NULL;
1235
1236 if (!self->extra)
1237 return PyList_New(0);
1238
1239 list = PyList_New(self->extra->length);
1240 if (!list)
1241 return NULL;
1242
1243 for (i = 0; i < self->extra->length; i++) {
1244 PyObject* item = self->extra->children[i];
1245 Py_INCREF(item);
1246 PyList_SET_ITEM(list, i, item);
1247 }
1248
1249 return list;
1250}
1251
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001252
Eli Bendersky64d11e62012-06-15 07:42:50 +03001253static PyObject *
1254create_elementiter(ElementObject *self, PyObject *tag, int gettext);
1255
1256
1257static PyObject *
Eli Benderskya8736902013-01-05 06:26:39 -08001258element_iter(ElementObject *self, PyObject *args, PyObject *kwds)
Eli Bendersky64d11e62012-06-15 07:42:50 +03001259{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001260 PyObject* tag = Py_None;
Eli Benderskya8736902013-01-05 06:26:39 -08001261 static char* kwlist[] = {"tag", 0};
1262
1263 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:iter", kwlist, &tag))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001264 return NULL;
1265
Eli Bendersky64d11e62012-06-15 07:42:50 +03001266 return create_elementiter(self, tag, 0);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001267}
1268
1269
1270static PyObject*
1271element_itertext(ElementObject* self, PyObject* args)
1272{
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001273 if (!PyArg_ParseTuple(args, ":itertext"))
1274 return NULL;
1275
Eli Bendersky64d11e62012-06-15 07:42:50 +03001276 return create_elementiter(self, Py_None, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001277}
1278
Eli Bendersky64d11e62012-06-15 07:42:50 +03001279
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001280static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001281element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001282{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001283 ElementObject* self = (ElementObject*) self_;
1284
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001285 if (!self->extra || index < 0 || index >= self->extra->length) {
1286 PyErr_SetString(
1287 PyExc_IndexError,
1288 "child index out of range"
1289 );
1290 return NULL;
1291 }
1292
1293 Py_INCREF(self->extra->children[index]);
1294 return self->extra->children[index];
1295}
1296
1297static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001298element_insert(ElementObject* self, PyObject* args)
1299{
1300 int i;
1301
1302 int index;
1303 PyObject* element;
1304 if (!PyArg_ParseTuple(args, "iO!:insert", &index,
1305 &Element_Type, &element))
1306 return NULL;
1307
1308 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001309 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001310
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001311 if (index < 0) {
1312 index += self->extra->length;
1313 if (index < 0)
1314 index = 0;
1315 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001316 if (index > self->extra->length)
1317 index = self->extra->length;
1318
1319 if (element_resize(self, 1) < 0)
1320 return NULL;
1321
1322 for (i = self->extra->length; i > index; i--)
1323 self->extra->children[i] = self->extra->children[i-1];
1324
1325 Py_INCREF(element);
1326 self->extra->children[index] = element;
1327
1328 self->extra->length++;
1329
1330 Py_RETURN_NONE;
1331}
1332
1333static PyObject*
1334element_items(ElementObject* self, PyObject* args)
1335{
1336 if (!PyArg_ParseTuple(args, ":items"))
1337 return NULL;
1338
1339 if (!self->extra || self->extra->attrib == Py_None)
1340 return PyList_New(0);
1341
1342 return PyDict_Items(self->extra->attrib);
1343}
1344
1345static PyObject*
1346element_keys(ElementObject* self, PyObject* args)
1347{
1348 if (!PyArg_ParseTuple(args, ":keys"))
1349 return NULL;
1350
1351 if (!self->extra || self->extra->attrib == Py_None)
1352 return PyList_New(0);
1353
1354 return PyDict_Keys(self->extra->attrib);
1355}
1356
Martin v. Löwis18e16552006-02-15 17:27:45 +00001357static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001358element_length(ElementObject* self)
1359{
1360 if (!self->extra)
1361 return 0;
1362
1363 return self->extra->length;
1364}
1365
1366static PyObject*
1367element_makeelement(PyObject* self, PyObject* args, PyObject* kw)
1368{
1369 PyObject* elem;
1370
1371 PyObject* tag;
1372 PyObject* attrib;
1373 if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib))
1374 return NULL;
1375
1376 attrib = PyDict_Copy(attrib);
1377 if (!attrib)
1378 return NULL;
1379
Eli Bendersky092af1f2012-03-04 07:14:03 +02001380 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001381
1382 Py_DECREF(attrib);
1383
1384 return elem;
1385}
1386
1387static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001388element_remove(ElementObject* self, PyObject* args)
1389{
1390 int i;
1391
1392 PyObject* element;
1393 if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element))
1394 return NULL;
1395
1396 if (!self->extra) {
1397 /* element has no children, so raise exception */
1398 PyErr_SetString(
1399 PyExc_ValueError,
1400 "list.remove(x): x not in list"
1401 );
1402 return NULL;
1403 }
1404
1405 for (i = 0; i < self->extra->length; i++) {
1406 if (self->extra->children[i] == element)
1407 break;
Mark Dickinson211c6252009-02-01 10:28:51 +00001408 if (PyObject_RichCompareBool(self->extra->children[i], element, Py_EQ) == 1)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001409 break;
1410 }
1411
1412 if (i == self->extra->length) {
1413 /* element is not in children, so raise exception */
1414 PyErr_SetString(
1415 PyExc_ValueError,
1416 "list.remove(x): x not in list"
1417 );
1418 return NULL;
1419 }
1420
1421 Py_DECREF(self->extra->children[i]);
1422
1423 self->extra->length--;
1424
1425 for (; i < self->extra->length; i++)
1426 self->extra->children[i] = self->extra->children[i+1];
1427
1428 Py_RETURN_NONE;
1429}
1430
1431static PyObject*
1432element_repr(ElementObject* self)
1433{
Eli Bendersky092af1f2012-03-04 07:14:03 +02001434 if (self->tag)
1435 return PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1436 else
1437 return PyUnicode_FromFormat("<Element at %p>", self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001438}
1439
1440static PyObject*
1441element_set(ElementObject* self, PyObject* args)
1442{
1443 PyObject* attrib;
1444
1445 PyObject* key;
1446 PyObject* value;
1447 if (!PyArg_ParseTuple(args, "OO:set", &key, &value))
1448 return NULL;
1449
1450 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001451 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001452
1453 attrib = element_get_attrib(self);
1454 if (!attrib)
1455 return NULL;
1456
1457 if (PyDict_SetItem(attrib, key, value) < 0)
1458 return NULL;
1459
1460 Py_RETURN_NONE;
1461}
1462
1463static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001464element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001465{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001466 ElementObject* self = (ElementObject*) self_;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001467 int i;
1468 PyObject* old;
1469
1470 if (!self->extra || index < 0 || index >= self->extra->length) {
1471 PyErr_SetString(
1472 PyExc_IndexError,
1473 "child assignment index out of range");
1474 return -1;
1475 }
1476
1477 old = self->extra->children[index];
1478
1479 if (item) {
1480 Py_INCREF(item);
1481 self->extra->children[index] = item;
1482 } else {
1483 self->extra->length--;
1484 for (i = index; i < self->extra->length; i++)
1485 self->extra->children[i] = self->extra->children[i+1];
1486 }
1487
1488 Py_DECREF(old);
1489
1490 return 0;
1491}
1492
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001493static PyObject*
1494element_subscr(PyObject* self_, PyObject* item)
1495{
1496 ElementObject* self = (ElementObject*) self_;
1497
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001498 if (PyIndex_Check(item)) {
1499 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001500
1501 if (i == -1 && PyErr_Occurred()) {
1502 return NULL;
1503 }
1504 if (i < 0 && self->extra)
1505 i += self->extra->length;
1506 return element_getitem(self_, i);
1507 }
1508 else if (PySlice_Check(item)) {
1509 Py_ssize_t start, stop, step, slicelen, cur, i;
1510 PyObject* list;
1511
1512 if (!self->extra)
1513 return PyList_New(0);
1514
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001515 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001516 self->extra->length,
1517 &start, &stop, &step, &slicelen) < 0) {
1518 return NULL;
1519 }
1520
1521 if (slicelen <= 0)
1522 return PyList_New(0);
1523 else {
1524 list = PyList_New(slicelen);
1525 if (!list)
1526 return NULL;
1527
1528 for (cur = start, i = 0; i < slicelen;
1529 cur += step, i++) {
1530 PyObject* item = self->extra->children[cur];
1531 Py_INCREF(item);
1532 PyList_SET_ITEM(list, i, item);
1533 }
1534
1535 return list;
1536 }
1537 }
1538 else {
1539 PyErr_SetString(PyExc_TypeError,
1540 "element indices must be integers");
1541 return NULL;
1542 }
1543}
1544
1545static int
1546element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1547{
1548 ElementObject* self = (ElementObject*) self_;
1549
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001550 if (PyIndex_Check(item)) {
1551 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001552
1553 if (i == -1 && PyErr_Occurred()) {
1554 return -1;
1555 }
1556 if (i < 0 && self->extra)
1557 i += self->extra->length;
1558 return element_setitem(self_, i, value);
1559 }
1560 else if (PySlice_Check(item)) {
1561 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1562
1563 PyObject* recycle = NULL;
1564 PyObject* seq = NULL;
1565
1566 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001567 create_extra(self, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001568
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001569 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001570 self->extra->length,
1571 &start, &stop, &step, &slicelen) < 0) {
1572 return -1;
1573 }
1574
Eli Bendersky865756a2012-03-09 13:38:15 +02001575 if (value == NULL) {
1576 /* Delete slice */
1577 size_t cur;
1578 Py_ssize_t i;
1579
1580 if (slicelen <= 0)
1581 return 0;
1582
1583 /* Since we're deleting, the direction of the range doesn't matter,
1584 * so for simplicity make it always ascending.
1585 */
1586 if (step < 0) {
1587 stop = start + 1;
1588 start = stop + step * (slicelen - 1) - 1;
1589 step = -step;
1590 }
1591
1592 assert((size_t)slicelen <= PY_SIZE_MAX / sizeof(PyObject *));
1593
1594 /* recycle is a list that will contain all the children
1595 * scheduled for removal.
1596 */
1597 if (!(recycle = PyList_New(slicelen))) {
1598 PyErr_NoMemory();
1599 return -1;
1600 }
1601
1602 /* This loop walks over all the children that have to be deleted,
1603 * with cur pointing at them. num_moved is the amount of children
1604 * until the next deleted child that have to be "shifted down" to
1605 * occupy the deleted's places.
1606 * Note that in the ith iteration, shifting is done i+i places down
1607 * because i children were already removed.
1608 */
1609 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1610 /* Compute how many children have to be moved, clipping at the
1611 * list end.
1612 */
1613 Py_ssize_t num_moved = step - 1;
1614 if (cur + step >= (size_t)self->extra->length) {
1615 num_moved = self->extra->length - cur - 1;
1616 }
1617
1618 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1619
1620 memmove(
1621 self->extra->children + cur - i,
1622 self->extra->children + cur + 1,
1623 num_moved * sizeof(PyObject *));
1624 }
1625
1626 /* Leftover "tail" after the last removed child */
1627 cur = start + (size_t)slicelen * step;
1628 if (cur < (size_t)self->extra->length) {
1629 memmove(
1630 self->extra->children + cur - slicelen,
1631 self->extra->children + cur,
1632 (self->extra->length - cur) * sizeof(PyObject *));
1633 }
1634
1635 self->extra->length -= slicelen;
1636
1637 /* Discard the recycle list with all the deleted sub-elements */
1638 Py_XDECREF(recycle);
1639 return 0;
1640 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001641 else {
Eli Bendersky865756a2012-03-09 13:38:15 +02001642 /* A new slice is actually being assigned */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001643 seq = PySequence_Fast(value, "");
1644 if (!seq) {
1645 PyErr_Format(
1646 PyExc_TypeError,
1647 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1648 );
1649 return -1;
1650 }
1651 newlen = PySequence_Size(seq);
1652 }
1653
1654 if (step != 1 && newlen != slicelen)
1655 {
1656 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001657 "attempt to assign sequence of size %zd "
1658 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001659 newlen, slicelen
1660 );
1661 return -1;
1662 }
1663
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001664 /* Resize before creating the recycle bin, to prevent refleaks. */
1665 if (newlen > slicelen) {
1666 if (element_resize(self, newlen - slicelen) < 0) {
1667 if (seq) {
1668 Py_DECREF(seq);
1669 }
1670 return -1;
1671 }
1672 }
1673
1674 if (slicelen > 0) {
1675 /* to avoid recursive calls to this method (via decref), move
1676 old items to the recycle bin here, and get rid of them when
1677 we're done modifying the element */
1678 recycle = PyList_New(slicelen);
1679 if (!recycle) {
1680 if (seq) {
1681 Py_DECREF(seq);
1682 }
1683 return -1;
1684 }
1685 for (cur = start, i = 0; i < slicelen;
1686 cur += step, i++)
1687 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1688 }
1689
1690 if (newlen < slicelen) {
1691 /* delete slice */
1692 for (i = stop; i < self->extra->length; i++)
1693 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1694 } else if (newlen > slicelen) {
1695 /* insert slice */
1696 for (i = self->extra->length-1; i >= stop; i--)
1697 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1698 }
1699
1700 /* replace the slice */
1701 for (cur = start, i = 0; i < newlen;
1702 cur += step, i++) {
1703 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1704 Py_INCREF(element);
1705 self->extra->children[cur] = element;
1706 }
1707
1708 self->extra->length += newlen - slicelen;
1709
1710 if (seq) {
1711 Py_DECREF(seq);
1712 }
1713
1714 /* discard the recycle bin, and everything in it */
1715 Py_XDECREF(recycle);
1716
1717 return 0;
1718 }
1719 else {
1720 PyErr_SetString(PyExc_TypeError,
1721 "element indices must be integers");
1722 return -1;
1723 }
1724}
1725
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001726static PyMethodDef element_methods[] = {
1727
Eli Bendersky0192ba32012-03-30 16:38:33 +03001728 {"clear", (PyCFunction) element_clearmethod, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001729
Eli Benderskya8736902013-01-05 06:26:39 -08001730 {"get", (PyCFunction) element_get, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001731 {"set", (PyCFunction) element_set, METH_VARARGS},
1732
Eli Bendersky737b1732012-05-29 06:02:56 +03001733 {"find", (PyCFunction) element_find, METH_VARARGS | METH_KEYWORDS},
1734 {"findtext", (PyCFunction) element_findtext, METH_VARARGS | METH_KEYWORDS},
1735 {"findall", (PyCFunction) element_findall, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001736
1737 {"append", (PyCFunction) element_append, METH_VARARGS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001738 {"extend", (PyCFunction) element_extend, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001739 {"insert", (PyCFunction) element_insert, METH_VARARGS},
1740 {"remove", (PyCFunction) element_remove, METH_VARARGS},
1741
Eli Benderskya8736902013-01-05 06:26:39 -08001742 {"iter", (PyCFunction) element_iter, METH_VARARGS | METH_KEYWORDS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001743 {"itertext", (PyCFunction) element_itertext, METH_VARARGS},
Eli Bendersky737b1732012-05-29 06:02:56 +03001744 {"iterfind", (PyCFunction) element_iterfind, METH_VARARGS | METH_KEYWORDS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001745
Eli Benderskya8736902013-01-05 06:26:39 -08001746 {"getiterator", (PyCFunction) element_iter, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001747 {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS},
1748
1749 {"items", (PyCFunction) element_items, METH_VARARGS},
1750 {"keys", (PyCFunction) element_keys, METH_VARARGS},
1751
1752 {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS},
1753
1754 {"__copy__", (PyCFunction) element_copy, METH_VARARGS},
1755 {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS},
Martin v. Löwisbce16662012-06-17 10:41:22 +02001756 {"__sizeof__", element_sizeof, METH_NOARGS},
Eli Bendersky698bdb22013-01-10 06:01:06 -08001757 {"__getstate__", (PyCFunction)element_getstate, METH_NOARGS},
1758 {"__setstate__", (PyCFunction)element_setstate, METH_O},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001759
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001760 {NULL, NULL}
1761};
1762
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001763static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001764element_getattro(ElementObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001765{
1766 PyObject* res;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001767 char *name = "";
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001768
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001769 if (PyUnicode_Check(nameobj))
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001770 name = _PyUnicode_AsString(nameobj);
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001771
Alexander Belopolskye239d232010-12-08 23:31:48 +00001772 if (name == NULL)
1773 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001774
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001775 /* handle common attributes first */
1776 if (strcmp(name, "tag") == 0) {
1777 res = self->tag;
1778 Py_INCREF(res);
1779 return res;
1780 } else if (strcmp(name, "text") == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001781 res = element_get_text(self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001782 Py_INCREF(res);
1783 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001784 }
1785
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001786 /* methods */
1787 res = PyObject_GenericGetAttr((PyObject*) self, nameobj);
1788 if (res)
1789 return res;
1790
1791 /* less common attributes */
1792 if (strcmp(name, "tail") == 0) {
1793 PyErr_Clear();
1794 res = element_get_tail(self);
1795 } else if (strcmp(name, "attrib") == 0) {
1796 PyErr_Clear();
1797 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001798 create_extra(self, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001799 res = element_get_attrib(self);
1800 }
1801
1802 if (!res)
1803 return NULL;
1804
1805 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001806 return res;
1807}
1808
Eli Benderskyb20df952012-05-20 06:33:29 +03001809static PyObject*
1810element_setattro(ElementObject* self, PyObject* nameobj, PyObject* value)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001811{
Eli Benderskyb20df952012-05-20 06:33:29 +03001812 char *name = "";
1813 if (PyUnicode_Check(nameobj))
1814 name = _PyUnicode_AsString(nameobj);
1815
1816 if (name == NULL)
1817 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001818
1819 if (strcmp(name, "tag") == 0) {
1820 Py_DECREF(self->tag);
1821 self->tag = value;
1822 Py_INCREF(self->tag);
1823 } else if (strcmp(name, "text") == 0) {
1824 Py_DECREF(JOIN_OBJ(self->text));
1825 self->text = value;
1826 Py_INCREF(self->text);
1827 } else if (strcmp(name, "tail") == 0) {
1828 Py_DECREF(JOIN_OBJ(self->tail));
1829 self->tail = value;
1830 Py_INCREF(self->tail);
1831 } else if (strcmp(name, "attrib") == 0) {
1832 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001833 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001834 Py_DECREF(self->extra->attrib);
1835 self->extra->attrib = value;
1836 Py_INCREF(self->extra->attrib);
1837 } else {
1838 PyErr_SetString(PyExc_AttributeError, name);
Eli Benderskyb20df952012-05-20 06:33:29 +03001839 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001840 }
1841
Eli Benderskyb20df952012-05-20 06:33:29 +03001842 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001843}
1844
1845static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001846 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001847 0, /* sq_concat */
1848 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001849 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001850 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001851 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001852 0,
1853};
1854
1855static PyMappingMethods element_as_mapping = {
1856 (lenfunc) element_length,
1857 (binaryfunc) element_subscr,
1858 (objobjargproc) element_ass_subscr,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001859};
1860
Neal Norwitz227b5332006-03-22 09:28:35 +00001861static PyTypeObject Element_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001862 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08001863 "xml.etree.ElementTree.Element", sizeof(ElementObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001864 /* methods */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001865 (destructor)element_dealloc, /* tp_dealloc */
1866 0, /* tp_print */
1867 0, /* tp_getattr */
Eli Benderskyb20df952012-05-20 06:33:29 +03001868 0, /* tp_setattr */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001869 0, /* tp_reserved */
1870 (reprfunc)element_repr, /* tp_repr */
1871 0, /* tp_as_number */
1872 &element_as_sequence, /* tp_as_sequence */
1873 &element_as_mapping, /* tp_as_mapping */
1874 0, /* tp_hash */
1875 0, /* tp_call */
1876 0, /* tp_str */
1877 (getattrofunc)element_getattro, /* tp_getattro */
Eli Benderskyb20df952012-05-20 06:33:29 +03001878 (setattrofunc)element_setattro, /* tp_setattro */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001879 0, /* tp_as_buffer */
Eli Bendersky0192ba32012-03-30 16:38:33 +03001880 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
1881 /* tp_flags */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001882 0, /* tp_doc */
Eli Bendersky0192ba32012-03-30 16:38:33 +03001883 (traverseproc)element_gc_traverse, /* tp_traverse */
1884 (inquiry)element_gc_clear, /* tp_clear */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001885 0, /* tp_richcompare */
Eli Benderskyebf37a22012-04-03 22:02:37 +03001886 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001887 0, /* tp_iter */
1888 0, /* tp_iternext */
1889 element_methods, /* tp_methods */
1890 0, /* tp_members */
1891 0, /* tp_getset */
1892 0, /* tp_base */
1893 0, /* tp_dict */
1894 0, /* tp_descr_get */
1895 0, /* tp_descr_set */
1896 0, /* tp_dictoffset */
1897 (initproc)element_init, /* tp_init */
1898 PyType_GenericAlloc, /* tp_alloc */
1899 element_new, /* tp_new */
1900 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001901};
1902
Eli Bendersky64d11e62012-06-15 07:42:50 +03001903/******************************* Element iterator ****************************/
1904
1905/* ElementIterObject represents the iteration state over an XML element in
1906 * pre-order traversal. To keep track of which sub-element should be returned
1907 * next, a stack of parents is maintained. This is a standard stack-based
1908 * iterative pre-order traversal of a tree.
1909 * The stack is managed using a single-linked list starting at parent_stack.
1910 * Each stack node contains the saved parent to which we should return after
1911 * the current one is exhausted, and the next child to examine in that parent.
1912 */
1913typedef struct ParentLocator_t {
1914 ElementObject *parent;
1915 Py_ssize_t child_index;
1916 struct ParentLocator_t *next;
1917} ParentLocator;
1918
1919typedef struct {
1920 PyObject_HEAD
1921 ParentLocator *parent_stack;
1922 ElementObject *root_element;
1923 PyObject *sought_tag;
1924 int root_done;
1925 int gettext;
1926} ElementIterObject;
1927
1928
1929static void
1930elementiter_dealloc(ElementIterObject *it)
1931{
1932 ParentLocator *p = it->parent_stack;
1933 while (p) {
1934 ParentLocator *temp = p;
1935 Py_XDECREF(p->parent);
1936 p = p->next;
1937 PyObject_Free(temp);
1938 }
1939
1940 Py_XDECREF(it->sought_tag);
1941 Py_XDECREF(it->root_element);
1942
1943 PyObject_GC_UnTrack(it);
1944 PyObject_GC_Del(it);
1945}
1946
1947static int
1948elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
1949{
1950 ParentLocator *p = it->parent_stack;
1951 while (p) {
1952 Py_VISIT(p->parent);
1953 p = p->next;
1954 }
1955
1956 Py_VISIT(it->root_element);
1957 Py_VISIT(it->sought_tag);
1958 return 0;
1959}
1960
1961/* Helper function for elementiter_next. Add a new parent to the parent stack.
1962 */
1963static ParentLocator *
1964parent_stack_push_new(ParentLocator *stack, ElementObject *parent)
1965{
1966 ParentLocator *new_node = PyObject_Malloc(sizeof(ParentLocator));
1967 if (new_node) {
1968 new_node->parent = parent;
1969 Py_INCREF(parent);
1970 new_node->child_index = 0;
1971 new_node->next = stack;
1972 }
1973 return new_node;
1974}
1975
1976static PyObject *
1977elementiter_next(ElementIterObject *it)
1978{
1979 /* Sub-element iterator.
Eli Bendersky45839902013-01-13 05:14:47 -08001980 *
Eli Bendersky64d11e62012-06-15 07:42:50 +03001981 * A short note on gettext: this function serves both the iter() and
1982 * itertext() methods to avoid code duplication. However, there are a few
1983 * small differences in the way these iterations work. Namely:
1984 * - itertext() only yields text from nodes that have it, and continues
1985 * iterating when a node doesn't have text (so it doesn't return any
1986 * node like iter())
1987 * - itertext() also has to handle tail, after finishing with all the
1988 * children of a node.
1989 */
Eli Bendersky113da642012-06-15 07:52:49 +03001990 ElementObject *cur_parent;
1991 Py_ssize_t child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03001992
1993 while (1) {
1994 /* Handle the case reached in the beginning and end of iteration, where
1995 * the parent stack is empty. The root_done flag gives us indication
1996 * whether we've just started iterating (so root_done is 0), in which
1997 * case the root is returned. If root_done is 1 and we're here, the
1998 * iterator is exhausted.
1999 */
2000 if (!it->parent_stack->parent) {
2001 if (it->root_done) {
2002 PyErr_SetNone(PyExc_StopIteration);
2003 return NULL;
2004 } else {
2005 it->parent_stack = parent_stack_push_new(it->parent_stack,
2006 it->root_element);
2007 if (!it->parent_stack) {
2008 PyErr_NoMemory();
2009 return NULL;
2010 }
2011
2012 it->root_done = 1;
2013 if (it->sought_tag == Py_None ||
2014 PyObject_RichCompareBool(it->root_element->tag,
2015 it->sought_tag, Py_EQ) == 1) {
2016 if (it->gettext) {
Eli Benderskye6174ca2013-01-10 06:27:53 -08002017 PyObject *text = element_get_text(it->root_element);
2018 if (!text)
2019 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002020 if (PyObject_IsTrue(text)) {
2021 Py_INCREF(text);
2022 return text;
2023 }
2024 } else {
2025 Py_INCREF(it->root_element);
2026 return (PyObject *)it->root_element;
2027 }
2028 }
2029 }
2030 }
2031
2032 /* See if there are children left to traverse in the current parent. If
2033 * yes, visit the next child. If not, pop the stack and try again.
2034 */
Eli Bendersky113da642012-06-15 07:52:49 +03002035 cur_parent = it->parent_stack->parent;
2036 child_index = it->parent_stack->child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002037 if (cur_parent->extra && child_index < cur_parent->extra->length) {
2038 ElementObject *child = (ElementObject *)
2039 cur_parent->extra->children[child_index];
2040 it->parent_stack->child_index++;
2041 it->parent_stack = parent_stack_push_new(it->parent_stack,
2042 child);
2043 if (!it->parent_stack) {
2044 PyErr_NoMemory();
2045 return NULL;
2046 }
2047
2048 if (it->gettext) {
Eli Benderskye6174ca2013-01-10 06:27:53 -08002049 PyObject *text = element_get_text(child);
2050 if (!text)
2051 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002052 if (PyObject_IsTrue(text)) {
2053 Py_INCREF(text);
2054 return text;
2055 }
2056 } else if (it->sought_tag == Py_None ||
2057 PyObject_RichCompareBool(child->tag,
2058 it->sought_tag, Py_EQ) == 1) {
2059 Py_INCREF(child);
2060 return (PyObject *)child;
2061 }
2062 else
2063 continue;
2064 }
2065 else {
Eli Benderskye6174ca2013-01-10 06:27:53 -08002066 PyObject *tail;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002067 ParentLocator *next = it->parent_stack->next;
Eli Benderskye6174ca2013-01-10 06:27:53 -08002068 if (it->gettext) {
2069 tail = element_get_tail(cur_parent);
2070 if (!tail)
2071 return NULL;
2072 }
2073 else
2074 tail = Py_None;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002075 Py_XDECREF(it->parent_stack->parent);
2076 PyObject_Free(it->parent_stack);
2077 it->parent_stack = next;
2078
2079 /* Note that extra condition on it->parent_stack->parent here;
2080 * this is because itertext() is supposed to only return *inner*
2081 * text, not text following the element it began iteration with.
2082 */
2083 if (it->parent_stack->parent && PyObject_IsTrue(tail)) {
2084 Py_INCREF(tail);
2085 return tail;
2086 }
2087 }
2088 }
2089
2090 return NULL;
2091}
2092
2093
2094static PyTypeObject ElementIter_Type = {
2095 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002096 /* Using the module's name since the pure-Python implementation does not
2097 have such a type. */
Eli Bendersky64d11e62012-06-15 07:42:50 +03002098 "_elementtree._element_iterator", /* tp_name */
2099 sizeof(ElementIterObject), /* tp_basicsize */
2100 0, /* tp_itemsize */
2101 /* methods */
2102 (destructor)elementiter_dealloc, /* tp_dealloc */
2103 0, /* tp_print */
2104 0, /* tp_getattr */
2105 0, /* tp_setattr */
2106 0, /* tp_reserved */
2107 0, /* tp_repr */
2108 0, /* tp_as_number */
2109 0, /* tp_as_sequence */
2110 0, /* tp_as_mapping */
2111 0, /* tp_hash */
2112 0, /* tp_call */
2113 0, /* tp_str */
2114 0, /* tp_getattro */
2115 0, /* tp_setattro */
2116 0, /* tp_as_buffer */
2117 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2118 0, /* tp_doc */
2119 (traverseproc)elementiter_traverse, /* tp_traverse */
2120 0, /* tp_clear */
2121 0, /* tp_richcompare */
2122 0, /* tp_weaklistoffset */
2123 PyObject_SelfIter, /* tp_iter */
2124 (iternextfunc)elementiter_next, /* tp_iternext */
2125 0, /* tp_methods */
2126 0, /* tp_members */
2127 0, /* tp_getset */
2128 0, /* tp_base */
2129 0, /* tp_dict */
2130 0, /* tp_descr_get */
2131 0, /* tp_descr_set */
2132 0, /* tp_dictoffset */
2133 0, /* tp_init */
2134 0, /* tp_alloc */
2135 0, /* tp_new */
2136};
2137
2138
2139static PyObject *
2140create_elementiter(ElementObject *self, PyObject *tag, int gettext)
2141{
2142 ElementIterObject *it;
2143 PyObject *star = NULL;
2144
2145 it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
2146 if (!it)
2147 return NULL;
2148 if (!(it->parent_stack = PyObject_Malloc(sizeof(ParentLocator)))) {
2149 PyObject_GC_Del(it);
2150 return NULL;
2151 }
2152
2153 it->parent_stack->parent = NULL;
2154 it->parent_stack->child_index = 0;
2155 it->parent_stack->next = NULL;
2156
2157 if (PyUnicode_Check(tag))
2158 star = PyUnicode_FromString("*");
2159 else if (PyBytes_Check(tag))
2160 star = PyBytes_FromString("*");
2161
2162 if (star && PyObject_RichCompareBool(tag, star, Py_EQ) == 1)
2163 tag = Py_None;
2164
2165 Py_XDECREF(star);
2166 it->sought_tag = tag;
2167 it->root_done = 0;
2168 it->gettext = gettext;
2169 it->root_element = self;
2170
2171 Py_INCREF(self);
2172 Py_INCREF(tag);
2173
2174 PyObject_GC_Track(it);
2175 return (PyObject *)it;
2176}
2177
2178
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002179/* ==================================================================== */
2180/* the tree builder type */
2181
2182typedef struct {
2183 PyObject_HEAD
2184
Eli Bendersky58d548d2012-05-29 15:45:16 +03002185 PyObject *root; /* root node (first created node) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002186
Antoine Pitrouee329312012-10-04 19:53:29 +02002187 PyObject *this; /* current node */
2188 PyObject *last; /* most recently created node */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002189
Eli Bendersky58d548d2012-05-29 15:45:16 +03002190 PyObject *data; /* data collector (string or list), or NULL */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002191
Eli Bendersky58d548d2012-05-29 15:45:16 +03002192 PyObject *stack; /* element stack */
2193 Py_ssize_t index; /* current stack size (0 means empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002194
Eli Bendersky48d358b2012-05-30 17:57:50 +03002195 PyObject *element_factory;
2196
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002197 /* element tracing */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002198 PyObject *events; /* list of events, or NULL if not collecting */
2199 PyObject *start_event_obj; /* event objects (NULL to ignore) */
2200 PyObject *end_event_obj;
2201 PyObject *start_ns_event_obj;
2202 PyObject *end_ns_event_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002203} TreeBuilderObject;
2204
Neal Norwitz227b5332006-03-22 09:28:35 +00002205static PyTypeObject TreeBuilder_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002206
Christian Heimes90aa7642007-12-19 02:45:37 +00002207#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002208
2209/* -------------------------------------------------------------------- */
2210/* constructor and destructor */
2211
Eli Bendersky58d548d2012-05-29 15:45:16 +03002212static PyObject *
2213treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002214{
Eli Bendersky58d548d2012-05-29 15:45:16 +03002215 TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
2216 if (t != NULL) {
2217 t->root = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002218
Eli Bendersky58d548d2012-05-29 15:45:16 +03002219 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002220 t->this = Py_None;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002221 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002222 t->last = Py_None;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002223
Eli Bendersky58d548d2012-05-29 15:45:16 +03002224 t->data = NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002225 t->element_factory = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002226 t->stack = PyList_New(20);
2227 if (!t->stack) {
2228 Py_DECREF(t->this);
2229 Py_DECREF(t->last);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002230 Py_DECREF((PyObject *) t);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002231 return NULL;
2232 }
2233 t->index = 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002234
Eli Bendersky58d548d2012-05-29 15:45:16 +03002235 t->events = NULL;
2236 t->start_event_obj = t->end_event_obj = NULL;
2237 t->start_ns_event_obj = t->end_ns_event_obj = NULL;
2238 }
2239 return (PyObject *)t;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002240}
2241
Eli Bendersky58d548d2012-05-29 15:45:16 +03002242static int
2243treebuilder_init(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002244{
Eli Benderskyc68e1362012-06-03 06:09:42 +03002245 static char *kwlist[] = {"element_factory", 0};
Eli Bendersky48d358b2012-05-30 17:57:50 +03002246 PyObject *element_factory = NULL;
2247 TreeBuilderObject *self_tb = (TreeBuilderObject *)self;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002248 PyObject *tmp;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002249
2250 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:TreeBuilder", kwlist,
2251 &element_factory)) {
2252 return -1;
2253 }
2254
2255 if (element_factory) {
2256 Py_INCREF(element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002257 tmp = self_tb->element_factory;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002258 self_tb->element_factory = element_factory;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002259 Py_XDECREF(tmp);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002260 }
2261
Eli Bendersky58d548d2012-05-29 15:45:16 +03002262 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002263}
2264
Eli Bendersky48d358b2012-05-30 17:57:50 +03002265static int
2266treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
2267{
2268 Py_VISIT(self->root);
2269 Py_VISIT(self->this);
2270 Py_VISIT(self->last);
2271 Py_VISIT(self->data);
2272 Py_VISIT(self->stack);
2273 Py_VISIT(self->element_factory);
2274 return 0;
2275}
2276
2277static int
2278treebuilder_gc_clear(TreeBuilderObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002279{
Antoine Pitrouc1948842012-10-01 23:40:37 +02002280 Py_CLEAR(self->end_ns_event_obj);
2281 Py_CLEAR(self->start_ns_event_obj);
2282 Py_CLEAR(self->end_event_obj);
2283 Py_CLEAR(self->start_event_obj);
2284 Py_CLEAR(self->events);
2285 Py_CLEAR(self->stack);
2286 Py_CLEAR(self->data);
2287 Py_CLEAR(self->last);
2288 Py_CLEAR(self->this);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002289 Py_CLEAR(self->element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002290 Py_CLEAR(self->root);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002291 return 0;
2292}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002293
Eli Bendersky48d358b2012-05-30 17:57:50 +03002294static void
2295treebuilder_dealloc(TreeBuilderObject *self)
2296{
2297 PyObject_GC_UnTrack(self);
2298 treebuilder_gc_clear(self);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002299 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002300}
2301
2302/* -------------------------------------------------------------------- */
Antoine Pitrouee329312012-10-04 19:53:29 +02002303/* helpers for handling of arbitrary element-like objects */
2304
2305static int
2306treebuilder_set_element_text_or_tail(PyObject *element, PyObject *data,
2307 PyObject **dest, _Py_Identifier *name)
2308{
2309 if (Element_CheckExact(element)) {
2310 Py_DECREF(JOIN_OBJ(*dest));
2311 *dest = JOIN_SET(data, PyList_CheckExact(data));
2312 return 0;
2313 }
2314 else {
2315 PyObject *joined = list_join(data);
2316 int r;
2317 if (joined == NULL)
2318 return -1;
2319 r = _PyObject_SetAttrId(element, name, joined);
2320 Py_DECREF(joined);
2321 return r;
2322 }
2323}
2324
2325/* These two functions steal a reference to data */
2326static int
2327treebuilder_set_element_text(PyObject *element, PyObject *data)
2328{
2329 _Py_IDENTIFIER(text);
2330 return treebuilder_set_element_text_or_tail(
2331 element, data, &((ElementObject *) element)->text, &PyId_text);
2332}
2333
2334static int
2335treebuilder_set_element_tail(PyObject *element, PyObject *data)
2336{
2337 _Py_IDENTIFIER(tail);
2338 return treebuilder_set_element_text_or_tail(
2339 element, data, &((ElementObject *) element)->tail, &PyId_tail);
2340}
2341
2342static int
2343treebuilder_add_subelement(PyObject *element, PyObject *child)
2344{
2345 _Py_IDENTIFIER(append);
2346 if (Element_CheckExact(element)) {
2347 ElementObject *elem = (ElementObject *) element;
2348 return element_add_subelement(elem, child);
2349 }
2350 else {
2351 PyObject *res;
2352 res = _PyObject_CallMethodId(element, &PyId_append, "O", child);
2353 if (res == NULL)
2354 return -1;
2355 Py_DECREF(res);
2356 return 0;
2357 }
2358}
2359
2360/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002361/* handlers */
2362
2363LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002364treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
2365 PyObject* attrib)
2366{
2367 PyObject* node;
2368 PyObject* this;
2369
2370 if (self->data) {
2371 if (self->this == self->last) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002372 if (treebuilder_set_element_text(self->last, self->data))
2373 return NULL;
2374 }
2375 else {
2376 if (treebuilder_set_element_tail(self->last, self->data))
2377 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002378 }
2379 self->data = NULL;
2380 }
2381
Eli Bendersky48d358b2012-05-30 17:57:50 +03002382 if (self->element_factory) {
2383 node = PyObject_CallFunction(self->element_factory, "OO", tag, attrib);
2384 } else {
2385 node = create_new_element(tag, attrib);
2386 }
2387 if (!node) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002388 return NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002389 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002390
Antoine Pitrouee329312012-10-04 19:53:29 +02002391 this = self->this;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002392
2393 if (this != Py_None) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002394 if (treebuilder_add_subelement(this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002395 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002396 } else {
2397 if (self->root) {
2398 PyErr_SetString(
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002399 elementtree_parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002400 "multiple elements on top level"
2401 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002402 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002403 }
2404 Py_INCREF(node);
2405 self->root = node;
2406 }
2407
2408 if (self->index < PyList_GET_SIZE(self->stack)) {
2409 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002410 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002411 Py_INCREF(this);
2412 } else {
2413 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002414 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002415 }
2416 self->index++;
2417
2418 Py_DECREF(this);
2419 Py_INCREF(node);
Antoine Pitrouee329312012-10-04 19:53:29 +02002420 self->this = node;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002421
2422 Py_DECREF(self->last);
2423 Py_INCREF(node);
Antoine Pitrouee329312012-10-04 19:53:29 +02002424 self->last = node;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002425
2426 if (self->start_event_obj) {
2427 PyObject* res;
2428 PyObject* action = self->start_event_obj;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002429 res = PyTuple_Pack(2, action, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002430 if (res) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002431 PyList_Append(self->events, res);
2432 Py_DECREF(res);
2433 } else
2434 PyErr_Clear(); /* FIXME: propagate error */
2435 }
2436
2437 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002438
2439 error:
2440 Py_DECREF(node);
2441 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002442}
2443
2444LOCAL(PyObject*)
2445treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
2446{
2447 if (!self->data) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002448 if (self->last == Py_None) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00002449 /* ignore calls to data before the first call to start */
2450 Py_RETURN_NONE;
2451 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002452 /* store the first item as is */
2453 Py_INCREF(data); self->data = data;
2454 } else {
2455 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00002456 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
2457 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002458 /* XXX this code path unused in Python 3? */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002459 /* expat often generates single character data sections; handle
2460 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00002461 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
2462 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002463 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00002464 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002465 } else if (PyList_CheckExact(self->data)) {
2466 if (PyList_Append(self->data, data) < 0)
2467 return NULL;
2468 } else {
2469 PyObject* list = PyList_New(2);
2470 if (!list)
2471 return NULL;
2472 PyList_SET_ITEM(list, 0, self->data);
2473 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2474 self->data = list;
2475 }
2476 }
2477
2478 Py_RETURN_NONE;
2479}
2480
2481LOCAL(PyObject*)
2482treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2483{
2484 PyObject* item;
2485
2486 if (self->data) {
2487 if (self->this == self->last) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002488 if (treebuilder_set_element_text(self->last, self->data))
2489 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002490 } else {
Antoine Pitrouee329312012-10-04 19:53:29 +02002491 if (treebuilder_set_element_tail(self->last, self->data))
2492 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002493 }
2494 self->data = NULL;
2495 }
2496
2497 if (self->index == 0) {
2498 PyErr_SetString(
2499 PyExc_IndexError,
2500 "pop from empty stack"
2501 );
2502 return NULL;
2503 }
2504
2505 self->index--;
2506
2507 item = PyList_GET_ITEM(self->stack, self->index);
2508 Py_INCREF(item);
2509
2510 Py_DECREF(self->last);
2511
Antoine Pitrouee329312012-10-04 19:53:29 +02002512 self->last = self->this;
2513 self->this = item;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002514
2515 if (self->end_event_obj) {
2516 PyObject* res;
2517 PyObject* action = self->end_event_obj;
2518 PyObject* node = (PyObject*) self->last;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002519 res = PyTuple_Pack(2, action, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002520 if (res) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002521 PyList_Append(self->events, res);
2522 Py_DECREF(res);
2523 } else
2524 PyErr_Clear(); /* FIXME: propagate error */
2525 }
2526
2527 Py_INCREF(self->last);
2528 return (PyObject*) self->last;
2529}
2530
2531LOCAL(void)
2532treebuilder_handle_namespace(TreeBuilderObject* self, int start,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002533 PyObject *prefix, PyObject *uri)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002534{
2535 PyObject* res;
2536 PyObject* action;
2537 PyObject* parcel;
2538
2539 if (!self->events)
2540 return;
2541
2542 if (start) {
2543 if (!self->start_ns_event_obj)
2544 return;
2545 action = self->start_ns_event_obj;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002546 parcel = Py_BuildValue("OO", prefix, uri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002547 if (!parcel)
2548 return;
2549 Py_INCREF(action);
2550 } else {
2551 if (!self->end_ns_event_obj)
2552 return;
2553 action = self->end_ns_event_obj;
2554 Py_INCREF(action);
2555 parcel = Py_None;
2556 Py_INCREF(parcel);
2557 }
2558
2559 res = PyTuple_New(2);
2560
2561 if (res) {
2562 PyTuple_SET_ITEM(res, 0, action);
2563 PyTuple_SET_ITEM(res, 1, parcel);
2564 PyList_Append(self->events, res);
2565 Py_DECREF(res);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002566 }
2567 else {
2568 Py_DECREF(action);
2569 Py_DECREF(parcel);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002570 PyErr_Clear(); /* FIXME: propagate error */
Antoine Pitrouc1948842012-10-01 23:40:37 +02002571 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002572}
2573
2574/* -------------------------------------------------------------------- */
2575/* methods (in alphabetical order) */
2576
2577static PyObject*
2578treebuilder_data(TreeBuilderObject* self, PyObject* args)
2579{
2580 PyObject* data;
2581 if (!PyArg_ParseTuple(args, "O:data", &data))
2582 return NULL;
2583
2584 return treebuilder_handle_data(self, data);
2585}
2586
2587static PyObject*
2588treebuilder_end(TreeBuilderObject* self, PyObject* args)
2589{
2590 PyObject* tag;
2591 if (!PyArg_ParseTuple(args, "O:end", &tag))
2592 return NULL;
2593
2594 return treebuilder_handle_end(self, tag);
2595}
2596
2597LOCAL(PyObject*)
2598treebuilder_done(TreeBuilderObject* self)
2599{
2600 PyObject* res;
2601
2602 /* FIXME: check stack size? */
2603
2604 if (self->root)
2605 res = self->root;
2606 else
2607 res = Py_None;
2608
2609 Py_INCREF(res);
2610 return res;
2611}
2612
2613static PyObject*
2614treebuilder_close(TreeBuilderObject* self, PyObject* args)
2615{
2616 if (!PyArg_ParseTuple(args, ":close"))
2617 return NULL;
2618
2619 return treebuilder_done(self);
2620}
2621
2622static PyObject*
2623treebuilder_start(TreeBuilderObject* self, PyObject* args)
2624{
2625 PyObject* tag;
2626 PyObject* attrib = Py_None;
2627 if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib))
2628 return NULL;
2629
2630 return treebuilder_handle_start(self, tag, attrib);
2631}
2632
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002633static PyMethodDef treebuilder_methods[] = {
2634 {"data", (PyCFunction) treebuilder_data, METH_VARARGS},
2635 {"start", (PyCFunction) treebuilder_start, METH_VARARGS},
2636 {"end", (PyCFunction) treebuilder_end, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002637 {"close", (PyCFunction) treebuilder_close, METH_VARARGS},
2638 {NULL, NULL}
2639};
2640
Neal Norwitz227b5332006-03-22 09:28:35 +00002641static PyTypeObject TreeBuilder_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002642 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002643 "xml.etree.ElementTree.TreeBuilder", sizeof(TreeBuilderObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002644 /* methods */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002645 (destructor)treebuilder_dealloc, /* tp_dealloc */
2646 0, /* tp_print */
2647 0, /* tp_getattr */
2648 0, /* tp_setattr */
2649 0, /* tp_reserved */
2650 0, /* tp_repr */
2651 0, /* tp_as_number */
2652 0, /* tp_as_sequence */
2653 0, /* tp_as_mapping */
2654 0, /* tp_hash */
2655 0, /* tp_call */
2656 0, /* tp_str */
2657 0, /* tp_getattro */
2658 0, /* tp_setattro */
2659 0, /* tp_as_buffer */
Eli Bendersky48d358b2012-05-30 17:57:50 +03002660 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
2661 /* tp_flags */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002662 0, /* tp_doc */
Eli Bendersky48d358b2012-05-30 17:57:50 +03002663 (traverseproc)treebuilder_gc_traverse, /* tp_traverse */
2664 (inquiry)treebuilder_gc_clear, /* tp_clear */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002665 0, /* tp_richcompare */
2666 0, /* tp_weaklistoffset */
2667 0, /* tp_iter */
2668 0, /* tp_iternext */
2669 treebuilder_methods, /* tp_methods */
2670 0, /* tp_members */
2671 0, /* tp_getset */
2672 0, /* tp_base */
2673 0, /* tp_dict */
2674 0, /* tp_descr_get */
2675 0, /* tp_descr_set */
2676 0, /* tp_dictoffset */
2677 (initproc)treebuilder_init, /* tp_init */
2678 PyType_GenericAlloc, /* tp_alloc */
2679 treebuilder_new, /* tp_new */
2680 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002681};
2682
2683/* ==================================================================== */
2684/* the expat interface */
2685
2686#if defined(USE_EXPAT)
2687
2688#include "expat.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002689#include "pyexpat.h"
Eli Bendersky20d41742012-06-01 09:48:37 +03002690static struct PyExpat_CAPI *expat_capi;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002691#define EXPAT(func) (expat_capi->func)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002692
Eli Bendersky52467b12012-06-01 07:13:08 +03002693static XML_Memory_Handling_Suite ExpatMemoryHandler = {
2694 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
2695
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002696typedef struct {
2697 PyObject_HEAD
2698
2699 XML_Parser parser;
2700
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002701 PyObject *target;
2702 PyObject *entity;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002703
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002704 PyObject *names;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002705
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002706 PyObject *handle_start;
2707 PyObject *handle_data;
2708 PyObject *handle_end;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002709
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002710 PyObject *handle_comment;
2711 PyObject *handle_pi;
2712 PyObject *handle_doctype;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002713
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002714 PyObject *handle_close;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002715
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002716} XMLParserObject;
2717
Neal Norwitz227b5332006-03-22 09:28:35 +00002718static PyTypeObject XMLParser_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002719
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002720#define XMLParser_CheckExact(op) (Py_TYPE(op) == &XMLParser_Type)
2721
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002722/* helpers */
2723
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002724LOCAL(PyObject*)
2725makeuniversal(XMLParserObject* self, const char* string)
2726{
2727 /* convert a UTF-8 tag/attribute name from the expat parser
2728 to a universal name string */
2729
Antoine Pitrouc1948842012-10-01 23:40:37 +02002730 Py_ssize_t size = (Py_ssize_t) strlen(string);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002731 PyObject* key;
2732 PyObject* value;
2733
2734 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00002735 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002736 if (!key)
2737 return NULL;
2738
2739 value = PyDict_GetItem(self->names, key);
2740
2741 if (value) {
2742 Py_INCREF(value);
2743 } else {
2744 /* new name. convert to universal name, and decode as
2745 necessary */
2746
2747 PyObject* tag;
2748 char* p;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002749 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002750
2751 /* look for namespace separator */
2752 for (i = 0; i < size; i++)
2753 if (string[i] == '}')
2754 break;
2755 if (i != size) {
2756 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002757 tag = PyBytes_FromStringAndSize(NULL, size+1);
2758 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002759 p[0] = '{';
2760 memcpy(p+1, string, size);
2761 size++;
2762 } else {
2763 /* plain name; use key as tag */
2764 Py_INCREF(key);
2765 tag = key;
2766 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002767
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002768 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002769 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00002770 value = PyUnicode_DecodeUTF8(p, size, "strict");
2771 Py_DECREF(tag);
2772 if (!value) {
2773 Py_DECREF(key);
2774 return NULL;
2775 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002776
2777 /* add to names dictionary */
2778 if (PyDict_SetItem(self->names, key, value) < 0) {
2779 Py_DECREF(key);
2780 Py_DECREF(value);
2781 return NULL;
2782 }
2783 }
2784
2785 Py_DECREF(key);
2786 return value;
2787}
2788
Eli Bendersky5b77d812012-03-16 08:20:05 +02002789/* Set the ParseError exception with the given parameters.
2790 * If message is not NULL, it's used as the error string. Otherwise, the
2791 * message string is the default for the given error_code.
2792*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002793static void
Eli Bendersky5b77d812012-03-16 08:20:05 +02002794expat_set_error(enum XML_Error error_code, int line, int column, char *message)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002795{
Eli Bendersky5b77d812012-03-16 08:20:05 +02002796 PyObject *errmsg, *error, *position, *code;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002797
Victor Stinner499dfcf2011-03-21 13:26:24 +01002798 errmsg = PyUnicode_FromFormat("%s: line %d, column %d",
Eli Bendersky5b77d812012-03-16 08:20:05 +02002799 message ? message : EXPAT(ErrorString)(error_code),
2800 line, column);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002801 if (errmsg == NULL)
2802 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002803
Victor Stinner499dfcf2011-03-21 13:26:24 +01002804 error = PyObject_CallFunction(elementtree_parseerror_obj, "O", errmsg);
2805 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002806 if (!error)
2807 return;
2808
Eli Bendersky5b77d812012-03-16 08:20:05 +02002809 /* Add code and position attributes */
2810 code = PyLong_FromLong((long)error_code);
2811 if (!code) {
2812 Py_DECREF(error);
2813 return;
2814 }
2815 if (PyObject_SetAttrString(error, "code", code) == -1) {
2816 Py_DECREF(error);
2817 Py_DECREF(code);
2818 return;
2819 }
2820 Py_DECREF(code);
2821
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002822 position = Py_BuildValue("(ii)", line, column);
2823 if (!position) {
2824 Py_DECREF(error);
2825 return;
2826 }
2827 if (PyObject_SetAttrString(error, "position", position) == -1) {
2828 Py_DECREF(error);
2829 Py_DECREF(position);
2830 return;
2831 }
2832 Py_DECREF(position);
2833
2834 PyErr_SetObject(elementtree_parseerror_obj, error);
2835 Py_DECREF(error);
2836}
2837
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002838/* -------------------------------------------------------------------- */
2839/* handlers */
2840
2841static void
2842expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2843 int data_len)
2844{
2845 PyObject* key;
2846 PyObject* value;
2847 PyObject* res;
2848
2849 if (data_len < 2 || data_in[0] != '&')
2850 return;
2851
Neal Norwitz0269b912007-08-08 06:56:02 +00002852 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002853 if (!key)
2854 return;
2855
2856 value = PyDict_GetItem(self->entity, key);
2857
2858 if (value) {
2859 if (TreeBuilder_CheckExact(self->target))
2860 res = treebuilder_handle_data(
2861 (TreeBuilderObject*) self->target, value
2862 );
2863 else if (self->handle_data)
2864 res = PyObject_CallFunction(self->handle_data, "O", value);
2865 else
2866 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002867 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002868 } else if (!PyErr_Occurred()) {
2869 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00002870 char message[128] = "undefined entity ";
2871 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002872 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02002873 XML_ERROR_UNDEFINED_ENTITY,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002874 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02002875 EXPAT(GetErrorColumnNumber)(self->parser),
2876 message
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002877 );
2878 }
2879
2880 Py_DECREF(key);
2881}
2882
2883static void
2884expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2885 const XML_Char **attrib_in)
2886{
2887 PyObject* res;
2888 PyObject* tag;
2889 PyObject* attrib;
2890 int ok;
2891
2892 /* tag name */
2893 tag = makeuniversal(self, tag_in);
2894 if (!tag)
2895 return; /* parser will look for errors */
2896
2897 /* attributes */
2898 if (attrib_in[0]) {
2899 attrib = PyDict_New();
2900 if (!attrib)
2901 return;
2902 while (attrib_in[0] && attrib_in[1]) {
2903 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00002904 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002905 if (!key || !value) {
2906 Py_XDECREF(value);
2907 Py_XDECREF(key);
2908 Py_DECREF(attrib);
2909 return;
2910 }
2911 ok = PyDict_SetItem(attrib, key, value);
2912 Py_DECREF(value);
2913 Py_DECREF(key);
2914 if (ok < 0) {
2915 Py_DECREF(attrib);
2916 return;
2917 }
2918 attrib_in += 2;
2919 }
2920 } else {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002921 /* Pass an empty dictionary on */
Eli Bendersky48d358b2012-05-30 17:57:50 +03002922 attrib = PyDict_New();
2923 if (!attrib)
2924 return;
2925 }
2926
2927 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002928 /* shortcut */
2929 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2930 tag, attrib);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002931 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002932 else if (self->handle_start) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002933 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002934 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002935 res = NULL;
2936
2937 Py_DECREF(tag);
2938 Py_DECREF(attrib);
2939
2940 Py_XDECREF(res);
2941}
2942
2943static void
2944expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2945 int data_len)
2946{
2947 PyObject* data;
2948 PyObject* res;
2949
Neal Norwitz0269b912007-08-08 06:56:02 +00002950 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002951 if (!data)
2952 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002953
2954 if (TreeBuilder_CheckExact(self->target))
2955 /* shortcut */
2956 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
2957 else if (self->handle_data)
2958 res = PyObject_CallFunction(self->handle_data, "O", data);
2959 else
2960 res = NULL;
2961
2962 Py_DECREF(data);
2963
2964 Py_XDECREF(res);
2965}
2966
2967static void
2968expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
2969{
2970 PyObject* tag;
2971 PyObject* res = NULL;
2972
2973 if (TreeBuilder_CheckExact(self->target))
2974 /* shortcut */
2975 /* the standard tree builder doesn't look at the end tag */
2976 res = treebuilder_handle_end(
2977 (TreeBuilderObject*) self->target, Py_None
2978 );
2979 else if (self->handle_end) {
2980 tag = makeuniversal(self, tag_in);
2981 if (tag) {
2982 res = PyObject_CallFunction(self->handle_end, "O", tag);
2983 Py_DECREF(tag);
2984 }
2985 }
2986
2987 Py_XDECREF(res);
2988}
2989
2990static void
2991expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
2992 const XML_Char *uri)
2993{
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002994 PyObject* sprefix = NULL;
2995 PyObject* suri = NULL;
2996
2997 suri = PyUnicode_DecodeUTF8(uri, strlen(uri), "strict");
2998 if (!suri)
2999 return;
3000
3001 if (prefix)
3002 sprefix = PyUnicode_DecodeUTF8(prefix, strlen(prefix), "strict");
3003 else
3004 sprefix = PyUnicode_FromString("");
3005 if (!sprefix) {
3006 Py_DECREF(suri);
3007 return;
3008 }
3009
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003010 treebuilder_handle_namespace(
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003011 (TreeBuilderObject*) self->target, 1, sprefix, suri
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003012 );
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003013
3014 Py_DECREF(sprefix);
3015 Py_DECREF(suri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003016}
3017
3018static void
3019expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
3020{
3021 treebuilder_handle_namespace(
3022 (TreeBuilderObject*) self->target, 0, NULL, NULL
3023 );
3024}
3025
3026static void
3027expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
3028{
3029 PyObject* comment;
3030 PyObject* res;
3031
3032 if (self->handle_comment) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003033 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003034 if (comment) {
3035 res = PyObject_CallFunction(self->handle_comment, "O", comment);
3036 Py_XDECREF(res);
3037 Py_DECREF(comment);
3038 }
3039 }
3040}
3041
Eli Bendersky45839902013-01-13 05:14:47 -08003042static void
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003043expat_start_doctype_handler(XMLParserObject *self,
3044 const XML_Char *doctype_name,
3045 const XML_Char *sysid,
3046 const XML_Char *pubid,
3047 int has_internal_subset)
3048{
3049 PyObject *self_pyobj = (PyObject *)self;
3050 PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
3051 PyObject *parser_doctype = NULL;
3052 PyObject *res = NULL;
3053
3054 doctype_name_obj = makeuniversal(self, doctype_name);
3055 if (!doctype_name_obj)
3056 return;
3057
3058 if (sysid) {
3059 sysid_obj = makeuniversal(self, sysid);
3060 if (!sysid_obj) {
3061 Py_DECREF(doctype_name_obj);
3062 return;
3063 }
3064 } else {
3065 Py_INCREF(Py_None);
3066 sysid_obj = Py_None;
3067 }
3068
3069 if (pubid) {
3070 pubid_obj = makeuniversal(self, pubid);
3071 if (!pubid_obj) {
3072 Py_DECREF(doctype_name_obj);
3073 Py_DECREF(sysid_obj);
3074 return;
3075 }
3076 } else {
3077 Py_INCREF(Py_None);
3078 pubid_obj = Py_None;
3079 }
3080
3081 /* If the target has a handler for doctype, call it. */
3082 if (self->handle_doctype) {
3083 res = PyObject_CallFunction(self->handle_doctype, "OOO",
3084 doctype_name_obj, pubid_obj, sysid_obj);
3085 Py_CLEAR(res);
3086 }
3087
3088 /* Now see if the parser itself has a doctype method. If yes and it's
3089 * a subclass, call it but warn about deprecation. If it's not a subclass
3090 * (i.e. vanilla XMLParser), do nothing.
3091 */
3092 parser_doctype = PyObject_GetAttrString(self_pyobj, "doctype");
3093 if (parser_doctype) {
3094 if (!XMLParser_CheckExact(self_pyobj)) {
3095 if (PyErr_WarnEx(PyExc_DeprecationWarning,
3096 "This method of XMLParser is deprecated. Define"
3097 " doctype() method on the TreeBuilder target.",
3098 1) < 0) {
3099 goto clear;
3100 }
3101 res = PyObject_CallFunction(parser_doctype, "OOO",
3102 doctype_name_obj, pubid_obj, sysid_obj);
3103 Py_CLEAR(res);
3104 }
3105 }
3106
3107clear:
3108 Py_XDECREF(parser_doctype);
3109 Py_DECREF(doctype_name_obj);
3110 Py_DECREF(pubid_obj);
3111 Py_DECREF(sysid_obj);
3112}
3113
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003114static void
3115expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
3116 const XML_Char* data_in)
3117{
3118 PyObject* target;
3119 PyObject* data;
3120 PyObject* res;
3121
3122 if (self->handle_pi) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003123 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3124 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003125 if (target && data) {
3126 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
3127 Py_XDECREF(res);
3128 Py_DECREF(data);
3129 Py_DECREF(target);
3130 } else {
3131 Py_XDECREF(data);
3132 Py_XDECREF(target);
3133 }
3134 }
3135}
3136
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003137static int
3138expat_unknown_encoding_handler(XMLParserObject *self, const XML_Char *name,
3139 XML_Encoding *info)
3140{
3141 PyObject* u;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003142 unsigned char s[256];
3143 int i;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003144 void *data;
3145 unsigned int kind;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003146
3147 memset(info, 0, sizeof(XML_Encoding));
3148
3149 for (i = 0; i < 256; i++)
3150 s[i] = i;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003151
Fredrik Lundhc3389992005-12-25 11:40:19 +00003152 u = PyUnicode_Decode((char*) s, 256, name, "replace");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003153 if (!u)
3154 return XML_STATUS_ERROR;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003155 if (PyUnicode_READY(u))
3156 return XML_STATUS_ERROR;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003157
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003158 if (PyUnicode_GET_LENGTH(u) != 256) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003159 Py_DECREF(u);
3160 return XML_STATUS_ERROR;
3161 }
3162
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003163 kind = PyUnicode_KIND(u);
3164 data = PyUnicode_DATA(u);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003165 for (i = 0; i < 256; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003166 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
3167 if (ch != Py_UNICODE_REPLACEMENT_CHARACTER)
3168 info->map[i] = ch;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003169 else
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003170 info->map[i] = -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003171 }
3172
3173 Py_DECREF(u);
3174
3175 return XML_STATUS_OK;
3176}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003177
3178/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003179
Eli Bendersky52467b12012-06-01 07:13:08 +03003180static PyObject *
3181xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003182{
Eli Bendersky52467b12012-06-01 07:13:08 +03003183 XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
3184 if (self) {
3185 self->parser = NULL;
3186 self->target = self->entity = self->names = NULL;
3187 self->handle_start = self->handle_data = self->handle_end = NULL;
3188 self->handle_comment = self->handle_pi = self->handle_close = NULL;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003189 self->handle_doctype = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003190 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003191 return (PyObject *)self;
3192}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003193
Eli Bendersky52467b12012-06-01 07:13:08 +03003194static int
3195xmlparser_init(PyObject *self, PyObject *args, PyObject *kwds)
3196{
3197 XMLParserObject *self_xp = (XMLParserObject *)self;
3198 PyObject *target = NULL, *html = NULL;
3199 char *encoding = NULL;
Eli Benderskyc68e1362012-06-03 06:09:42 +03003200 static char *kwlist[] = {"html", "target", "encoding", 0};
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003201
Eli Bendersky52467b12012-06-01 07:13:08 +03003202 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|OOz:XMLParser", kwlist,
3203 &html, &target, &encoding)) {
3204 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003205 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003206
Eli Bendersky52467b12012-06-01 07:13:08 +03003207 self_xp->entity = PyDict_New();
3208 if (!self_xp->entity)
3209 return -1;
3210
3211 self_xp->names = PyDict_New();
3212 if (!self_xp->names) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02003213 Py_CLEAR(self_xp->entity);
Eli Bendersky52467b12012-06-01 07:13:08 +03003214 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003215 }
3216
Eli Bendersky52467b12012-06-01 07:13:08 +03003217 self_xp->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
3218 if (!self_xp->parser) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02003219 Py_CLEAR(self_xp->entity);
3220 Py_CLEAR(self_xp->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003221 PyErr_NoMemory();
Eli Bendersky52467b12012-06-01 07:13:08 +03003222 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003223 }
3224
Eli Bendersky52467b12012-06-01 07:13:08 +03003225 if (target) {
3226 Py_INCREF(target);
3227 } else {
Eli Bendersky58d548d2012-05-29 15:45:16 +03003228 target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003229 if (!target) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02003230 Py_CLEAR(self_xp->entity);
3231 Py_CLEAR(self_xp->names);
Eli Bendersky52467b12012-06-01 07:13:08 +03003232 EXPAT(ParserFree)(self_xp->parser);
3233 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003234 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003235 }
3236 self_xp->target = target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003237
Eli Bendersky52467b12012-06-01 07:13:08 +03003238 self_xp->handle_start = PyObject_GetAttrString(target, "start");
3239 self_xp->handle_data = PyObject_GetAttrString(target, "data");
3240 self_xp->handle_end = PyObject_GetAttrString(target, "end");
3241 self_xp->handle_comment = PyObject_GetAttrString(target, "comment");
3242 self_xp->handle_pi = PyObject_GetAttrString(target, "pi");
3243 self_xp->handle_close = PyObject_GetAttrString(target, "close");
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003244 self_xp->handle_doctype = PyObject_GetAttrString(target, "doctype");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003245
3246 PyErr_Clear();
Eli Bendersky45839902013-01-13 05:14:47 -08003247
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003248 /* configure parser */
Eli Bendersky52467b12012-06-01 07:13:08 +03003249 EXPAT(SetUserData)(self_xp->parser, self_xp);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003250 EXPAT(SetElementHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003251 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003252 (XML_StartElementHandler) expat_start_handler,
3253 (XML_EndElementHandler) expat_end_handler
3254 );
3255 EXPAT(SetDefaultHandlerExpand)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003256 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003257 (XML_DefaultHandler) expat_default_handler
3258 );
3259 EXPAT(SetCharacterDataHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003260 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003261 (XML_CharacterDataHandler) expat_data_handler
3262 );
Eli Bendersky52467b12012-06-01 07:13:08 +03003263 if (self_xp->handle_comment)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003264 EXPAT(SetCommentHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003265 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003266 (XML_CommentHandler) expat_comment_handler
3267 );
Eli Bendersky52467b12012-06-01 07:13:08 +03003268 if (self_xp->handle_pi)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003269 EXPAT(SetProcessingInstructionHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003270 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003271 (XML_ProcessingInstructionHandler) expat_pi_handler
3272 );
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003273 EXPAT(SetStartDoctypeDeclHandler)(
3274 self_xp->parser,
3275 (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
3276 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003277 EXPAT(SetUnknownEncodingHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003278 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003279 (XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL
3280 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003281
Eli Bendersky52467b12012-06-01 07:13:08 +03003282 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003283}
3284
Eli Bendersky52467b12012-06-01 07:13:08 +03003285static int
3286xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
3287{
3288 Py_VISIT(self->handle_close);
3289 Py_VISIT(self->handle_pi);
3290 Py_VISIT(self->handle_comment);
3291 Py_VISIT(self->handle_end);
3292 Py_VISIT(self->handle_data);
3293 Py_VISIT(self->handle_start);
3294
3295 Py_VISIT(self->target);
3296 Py_VISIT(self->entity);
3297 Py_VISIT(self->names);
3298
3299 return 0;
3300}
3301
3302static int
3303xmlparser_gc_clear(XMLParserObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003304{
3305 EXPAT(ParserFree)(self->parser);
3306
Antoine Pitrouc1948842012-10-01 23:40:37 +02003307 Py_CLEAR(self->handle_close);
3308 Py_CLEAR(self->handle_pi);
3309 Py_CLEAR(self->handle_comment);
3310 Py_CLEAR(self->handle_end);
3311 Py_CLEAR(self->handle_data);
3312 Py_CLEAR(self->handle_start);
3313 Py_CLEAR(self->handle_doctype);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003314
Antoine Pitrouc1948842012-10-01 23:40:37 +02003315 Py_CLEAR(self->target);
3316 Py_CLEAR(self->entity);
3317 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003318
Eli Bendersky52467b12012-06-01 07:13:08 +03003319 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003320}
3321
Eli Bendersky52467b12012-06-01 07:13:08 +03003322static void
3323xmlparser_dealloc(XMLParserObject* self)
3324{
3325 PyObject_GC_UnTrack(self);
3326 xmlparser_gc_clear(self);
3327 Py_TYPE(self)->tp_free((PyObject *)self);
3328}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003329
3330LOCAL(PyObject*)
3331expat_parse(XMLParserObject* self, char* data, int data_len, int final)
3332{
3333 int ok;
3334
3335 ok = EXPAT(Parse)(self->parser, data, data_len, final);
3336
3337 if (PyErr_Occurred())
3338 return NULL;
3339
3340 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003341 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02003342 EXPAT(GetErrorCode)(self->parser),
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003343 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02003344 EXPAT(GetErrorColumnNumber)(self->parser),
3345 NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003346 );
3347 return NULL;
3348 }
3349
3350 Py_RETURN_NONE;
3351}
3352
3353static PyObject*
3354xmlparser_close(XMLParserObject* self, PyObject* args)
3355{
3356 /* end feeding data to parser */
3357
3358 PyObject* res;
3359 if (!PyArg_ParseTuple(args, ":close"))
3360 return NULL;
3361
3362 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003363 if (!res)
3364 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003365
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003366 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003367 Py_DECREF(res);
3368 return treebuilder_done((TreeBuilderObject*) self->target);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003369 } if (self->handle_close) {
3370 Py_DECREF(res);
3371 return PyObject_CallFunction(self->handle_close, "");
3372 } else
3373 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003374}
3375
3376static PyObject*
3377xmlparser_feed(XMLParserObject* self, PyObject* args)
3378{
3379 /* feed data to parser */
3380
3381 char* data;
3382 int data_len;
3383 if (!PyArg_ParseTuple(args, "s#:feed", &data, &data_len))
3384 return NULL;
3385
3386 return expat_parse(self, data, data_len, 0);
3387}
3388
3389static PyObject*
3390xmlparser_parse(XMLParserObject* self, PyObject* args)
3391{
3392 /* (internal) parse until end of input stream */
3393
3394 PyObject* reader;
3395 PyObject* buffer;
Eli Benderskyf996e772012-03-16 05:53:30 +02003396 PyObject* temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003397 PyObject* res;
3398
3399 PyObject* fileobj;
3400 if (!PyArg_ParseTuple(args, "O:_parse", &fileobj))
3401 return NULL;
3402
3403 reader = PyObject_GetAttrString(fileobj, "read");
3404 if (!reader)
3405 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003406
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003407 /* read from open file object */
3408 for (;;) {
3409
3410 buffer = PyObject_CallFunction(reader, "i", 64*1024);
3411
3412 if (!buffer) {
3413 /* read failed (e.g. due to KeyboardInterrupt) */
3414 Py_DECREF(reader);
3415 return NULL;
3416 }
3417
Eli Benderskyf996e772012-03-16 05:53:30 +02003418 if (PyUnicode_CheckExact(buffer)) {
3419 /* A unicode object is encoded into bytes using UTF-8 */
3420 if (PyUnicode_GET_SIZE(buffer) == 0) {
3421 Py_DECREF(buffer);
3422 break;
3423 }
3424 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
Antoine Pitrouc1948842012-10-01 23:40:37 +02003425 Py_DECREF(buffer);
Eli Benderskyf996e772012-03-16 05:53:30 +02003426 if (!temp) {
3427 /* Propagate exception from PyUnicode_AsEncodedString */
Eli Benderskyf996e772012-03-16 05:53:30 +02003428 Py_DECREF(reader);
3429 return NULL;
3430 }
Eli Benderskyf996e772012-03-16 05:53:30 +02003431 buffer = temp;
3432 }
3433 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003434 Py_DECREF(buffer);
3435 break;
3436 }
3437
3438 res = expat_parse(
Christian Heimes72b710a2008-05-26 13:28:38 +00003439 self, PyBytes_AS_STRING(buffer), PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003440 );
3441
3442 Py_DECREF(buffer);
3443
3444 if (!res) {
3445 Py_DECREF(reader);
3446 return NULL;
3447 }
3448 Py_DECREF(res);
3449
3450 }
3451
3452 Py_DECREF(reader);
3453
3454 res = expat_parse(self, "", 0, 1);
3455
3456 if (res && TreeBuilder_CheckExact(self->target)) {
3457 Py_DECREF(res);
3458 return treebuilder_done((TreeBuilderObject*) self->target);
3459 }
3460
3461 return res;
3462}
3463
3464static PyObject*
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003465xmlparser_doctype(XMLParserObject *self, PyObject *args)
3466{
3467 Py_RETURN_NONE;
3468}
3469
3470static PyObject*
3471xmlparser_setevents(XMLParserObject *self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003472{
3473 /* activate element event reporting */
3474
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003475 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003476 TreeBuilderObject* target;
3477
3478 PyObject* events; /* event collector */
3479 PyObject* event_set = Py_None;
3480 if (!PyArg_ParseTuple(args, "O!|O:_setevents", &PyList_Type, &events,
3481 &event_set))
3482 return NULL;
3483
3484 if (!TreeBuilder_CheckExact(self->target)) {
3485 PyErr_SetString(
3486 PyExc_TypeError,
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003487 "event handling only supported for ElementTree.TreeBuilder "
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003488 "targets"
3489 );
3490 return NULL;
3491 }
3492
3493 target = (TreeBuilderObject*) self->target;
3494
3495 Py_INCREF(events);
3496 Py_XDECREF(target->events);
3497 target->events = events;
3498
3499 /* clear out existing events */
Antoine Pitrouc1948842012-10-01 23:40:37 +02003500 Py_CLEAR(target->start_event_obj);
3501 Py_CLEAR(target->end_event_obj);
3502 Py_CLEAR(target->start_ns_event_obj);
3503 Py_CLEAR(target->end_ns_event_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003504
3505 if (event_set == Py_None) {
3506 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003507 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003508 Py_RETURN_NONE;
3509 }
3510
3511 if (!PyTuple_Check(event_set)) /* FIXME: handle arbitrary sequences */
3512 goto error;
3513
3514 for (i = 0; i < PyTuple_GET_SIZE(event_set); i++) {
3515 PyObject* item = PyTuple_GET_ITEM(event_set, i);
3516 char* event;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003517 if (PyUnicode_Check(item)) {
3518 event = _PyUnicode_AsString(item);
Victor Stinner0477bf32010-03-22 12:11:44 +00003519 if (event == NULL)
3520 goto error;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003521 } else if (PyBytes_Check(item))
3522 event = PyBytes_AS_STRING(item);
3523 else {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003524 goto error;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003525 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003526 if (strcmp(event, "start") == 0) {
3527 Py_INCREF(item);
3528 target->start_event_obj = item;
3529 } else if (strcmp(event, "end") == 0) {
3530 Py_INCREF(item);
3531 Py_XDECREF(target->end_event_obj);
3532 target->end_event_obj = item;
3533 } else if (strcmp(event, "start-ns") == 0) {
3534 Py_INCREF(item);
3535 Py_XDECREF(target->start_ns_event_obj);
3536 target->start_ns_event_obj = item;
3537 EXPAT(SetNamespaceDeclHandler)(
3538 self->parser,
3539 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3540 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3541 );
3542 } else if (strcmp(event, "end-ns") == 0) {
3543 Py_INCREF(item);
3544 Py_XDECREF(target->end_ns_event_obj);
3545 target->end_ns_event_obj = item;
3546 EXPAT(SetNamespaceDeclHandler)(
3547 self->parser,
3548 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3549 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3550 );
3551 } else {
3552 PyErr_Format(
3553 PyExc_ValueError,
3554 "unknown event '%s'", event
3555 );
3556 return NULL;
3557 }
3558 }
3559
3560 Py_RETURN_NONE;
3561
3562 error:
3563 PyErr_SetString(
3564 PyExc_TypeError,
3565 "invalid event tuple"
3566 );
3567 return NULL;
3568}
3569
3570static PyMethodDef xmlparser_methods[] = {
3571 {"feed", (PyCFunction) xmlparser_feed, METH_VARARGS},
3572 {"close", (PyCFunction) xmlparser_close, METH_VARARGS},
3573 {"_parse", (PyCFunction) xmlparser_parse, METH_VARARGS},
3574 {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003575 {"doctype", (PyCFunction) xmlparser_doctype, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003576 {NULL, NULL}
3577};
3578
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003579static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003580xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003581{
Alexander Belopolskye239d232010-12-08 23:31:48 +00003582 if (PyUnicode_Check(nameobj)) {
3583 PyObject* res;
3584 if (PyUnicode_CompareWithASCIIString(nameobj, "entity") == 0)
3585 res = self->entity;
3586 else if (PyUnicode_CompareWithASCIIString(nameobj, "target") == 0)
3587 res = self->target;
3588 else if (PyUnicode_CompareWithASCIIString(nameobj, "version") == 0) {
3589 return PyUnicode_FromFormat(
3590 "Expat %d.%d.%d", XML_MAJOR_VERSION,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003591 XML_MINOR_VERSION, XML_MICRO_VERSION);
Alexander Belopolskye239d232010-12-08 23:31:48 +00003592 }
3593 else
3594 goto generic;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003595
Alexander Belopolskye239d232010-12-08 23:31:48 +00003596 Py_INCREF(res);
3597 return res;
3598 }
3599 generic:
3600 return PyObject_GenericGetAttr((PyObject*) self, nameobj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003601}
3602
Neal Norwitz227b5332006-03-22 09:28:35 +00003603static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003604 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08003605 "xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003606 /* methods */
Eli Bendersky52467b12012-06-01 07:13:08 +03003607 (destructor)xmlparser_dealloc, /* tp_dealloc */
3608 0, /* tp_print */
3609 0, /* tp_getattr */
3610 0, /* tp_setattr */
3611 0, /* tp_reserved */
3612 0, /* tp_repr */
3613 0, /* tp_as_number */
3614 0, /* tp_as_sequence */
3615 0, /* tp_as_mapping */
3616 0, /* tp_hash */
3617 0, /* tp_call */
3618 0, /* tp_str */
3619 (getattrofunc)xmlparser_getattro, /* tp_getattro */
3620 0, /* tp_setattro */
3621 0, /* tp_as_buffer */
3622 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3623 /* tp_flags */
3624 0, /* tp_doc */
3625 (traverseproc)xmlparser_gc_traverse, /* tp_traverse */
3626 (inquiry)xmlparser_gc_clear, /* tp_clear */
3627 0, /* tp_richcompare */
3628 0, /* tp_weaklistoffset */
3629 0, /* tp_iter */
3630 0, /* tp_iternext */
3631 xmlparser_methods, /* tp_methods */
3632 0, /* tp_members */
3633 0, /* tp_getset */
3634 0, /* tp_base */
3635 0, /* tp_dict */
3636 0, /* tp_descr_get */
3637 0, /* tp_descr_set */
3638 0, /* tp_dictoffset */
3639 (initproc)xmlparser_init, /* tp_init */
3640 PyType_GenericAlloc, /* tp_alloc */
3641 xmlparser_new, /* tp_new */
3642 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003643};
3644
3645#endif
3646
3647/* ==================================================================== */
3648/* python module interface */
3649
3650static PyMethodDef _functions[] = {
Eli Benderskya8736902013-01-05 06:26:39 -08003651 {"SubElement", (PyCFunction) subelement, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003652 {NULL, NULL}
3653};
3654
Martin v. Löwis1a214512008-06-11 05:26:20 +00003655
3656static struct PyModuleDef _elementtreemodule = {
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003657 PyModuleDef_HEAD_INIT,
3658 "_elementtree",
3659 NULL,
3660 -1,
3661 _functions,
3662 NULL,
3663 NULL,
3664 NULL,
3665 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00003666};
3667
Neal Norwitzf6657e62006-12-28 04:47:50 +00003668PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00003669PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003670{
Eli Bendersky64d11e62012-06-15 07:42:50 +03003671 PyObject *m, *temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003672
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003673 /* Initialize object types */
3674 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003675 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003676 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003677 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003678#if defined(USE_EXPAT)
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003679 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003680 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003681#endif
3682
Martin v. Löwis1a214512008-06-11 05:26:20 +00003683 m = PyModule_Create(&_elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003684 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00003685 return NULL;
3686
Eli Bendersky828efde2012-04-05 05:40:58 +03003687 if (!(temp = PyImport_ImportModule("copy")))
3688 return NULL;
3689 elementtree_deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
3690 Py_XDECREF(temp);
3691
3692 if (!(elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
3693 return NULL;
3694
Eli Bendersky20d41742012-06-01 09:48:37 +03003695 /* link against pyexpat */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003696 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
3697 if (expat_capi) {
3698 /* check that it's usable */
3699 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
3700 expat_capi->size < sizeof(struct PyExpat_CAPI) ||
3701 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
3702 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
Eli Bendersky52467b12012-06-01 07:13:08 +03003703 expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
Eli Benderskyef391ac2012-07-21 20:28:46 +03003704 PyErr_SetString(PyExc_ImportError,
3705 "pyexpat version is incompatible");
3706 return NULL;
Eli Bendersky52467b12012-06-01 07:13:08 +03003707 }
Eli Benderskyef391ac2012-07-21 20:28:46 +03003708 } else {
Eli Bendersky52467b12012-06-01 07:13:08 +03003709 return NULL;
Eli Benderskyef391ac2012-07-21 20:28:46 +03003710 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003711
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003712 elementtree_parseerror_obj = PyErr_NewException(
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003713 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003714 );
3715 Py_INCREF(elementtree_parseerror_obj);
3716 PyModule_AddObject(m, "ParseError", elementtree_parseerror_obj);
3717
Eli Bendersky092af1f2012-03-04 07:14:03 +02003718 Py_INCREF((PyObject *)&Element_Type);
3719 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
3720
Eli Bendersky58d548d2012-05-29 15:45:16 +03003721 Py_INCREF((PyObject *)&TreeBuilder_Type);
3722 PyModule_AddObject(m, "TreeBuilder", (PyObject *)&TreeBuilder_Type);
3723
Eli Bendersky52467b12012-06-01 07:13:08 +03003724#if defined(USE_EXPAT)
3725 Py_INCREF((PyObject *)&XMLParser_Type);
3726 PyModule_AddObject(m, "XMLParser", (PyObject *)&XMLParser_Type);
3727#endif
3728
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003729 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003730}