blob: 8cc98031ca1d15248e8e9837192a9b5120551a7a [file] [log] [blame]
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001/*
2 * ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003 * $Id: _elementtree.c 3473 2009-01-11 22:53:55Z fredrik $
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
5 * elementtree accelerator
6 *
7 * History:
8 * 1999-06-20 fl created (as part of sgmlop)
9 * 2001-05-29 fl effdom edition
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000010 * 2003-02-27 fl elementtree edition (alpha)
11 * 2004-06-03 fl updates for elementtree 1.2
Florent Xiclunaf15351d2010-03-13 23:24:31 +000012 * 2005-01-05 fl major optimization effort
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000013 * 2005-01-11 fl first public release (cElementTree 0.8)
14 * 2005-01-12 fl split element object into base and extras
15 * 2005-01-13 fl use tagged pointers for tail/text (cElementTree 0.9)
16 * 2005-01-17 fl added treebuilder close method
17 * 2005-01-17 fl fixed crash in getchildren
18 * 2005-01-18 fl removed observer api, added iterparse (cElementTree 0.9.3)
19 * 2005-01-23 fl revised iterparse api; added namespace event support (0.9.8)
20 * 2005-01-26 fl added VERSION module property (cElementTree 1.0)
21 * 2005-01-28 fl added remove method (1.0.1)
22 * 2005-03-01 fl added iselement function; fixed makeelement aliasing (1.0.2)
23 * 2005-03-13 fl export Comment and ProcessingInstruction/PI helpers
24 * 2005-03-26 fl added Comment and PI support to XMLParser
25 * 2005-03-27 fl event optimizations; complain about bogus events
26 * 2005-08-08 fl fixed read error handling in parse
27 * 2005-08-11 fl added runtime test for copy workaround (1.0.3)
28 * 2005-12-13 fl added expat_capi support (for xml.etree) (1.0.4)
29 * 2005-12-16 fl added support for non-standard encodings
Fredrik Lundh44ed4db2006-03-12 21:06:35 +000030 * 2006-03-08 fl fixed a couple of potential null-refs and leaks
31 * 2006-03-12 fl merge in 2.5 ssize_t changes
Florent Xiclunaf15351d2010-03-13 23:24:31 +000032 * 2007-08-25 fl call custom builder's close method from XMLParser
33 * 2007-08-31 fl added iter, extend from ET 1.3
34 * 2007-09-01 fl fixed ParseError exception, setslice source type, etc
35 * 2007-09-03 fl fixed handling of negative insert indexes
36 * 2007-09-04 fl added itertext from ET 1.3
37 * 2007-09-06 fl added position attribute to ParseError exception
38 * 2008-06-06 fl delay error reporting in iterparse (from Hrvoje Niksic)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000039 *
Florent Xiclunaf15351d2010-03-13 23:24:31 +000040 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
41 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000042 *
43 * info@pythonware.com
44 * http://www.pythonware.com
45 */
46
Fredrik Lundh6d52b552005-12-16 22:06:43 +000047/* Licensed to PSF under a Contributor Agreement. */
Florent Xiclunaf15351d2010-03-13 23:24:31 +000048/* See http://www.python.org/psf/license for licensing details. */
Fredrik Lundh6d52b552005-12-16 22:06:43 +000049
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000050#include "Python.h"
Eli Benderskyebf37a22012-04-03 22:02:37 +030051#include "structmember.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000052
Thomas Wouters00ee7ba2006-08-21 19:07:27 +000053#define VERSION "1.0.6"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000054
55/* -------------------------------------------------------------------- */
56/* configuration */
57
58/* Leave defined to include the expat-based XMLParser type */
59#define USE_EXPAT
60
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000061/* An element can hold this many children without extra memory
62 allocations. */
63#define STATIC_CHILDREN 4
64
65/* For best performance, chose a value so that 80-90% of all nodes
66 have no more than the given number of children. Set this to zero
67 to minimize the size of the element structure itself (this only
68 helps if you have lots of leaf nodes with attributes). */
69
70/* Also note that pymalloc always allocates blocks in multiples of
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010071 eight bytes. For the current C version of ElementTree, this means
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000072 that the number of children should be an even number, at least on
73 32-bit platforms. */
74
75/* -------------------------------------------------------------------- */
76
77#if 0
78static int memory = 0;
79#define ALLOC(size, comment)\
80do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
81#define RELEASE(size, comment)\
82do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
83#else
84#define ALLOC(size, comment)
85#define RELEASE(size, comment)
86#endif
87
88/* compiler tweaks */
89#if defined(_MSC_VER)
90#define LOCAL(type) static __inline type __fastcall
91#else
92#define LOCAL(type) static type
93#endif
94
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000095/* macros used to store 'join' flags in string object pointers. note
96 that all use of text and tail as object pointers must be wrapped in
97 JOIN_OBJ. see comments in the ElementObject definition for more
98 info. */
99#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
100#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
Antoine Pitrouca8aa4a2012-09-20 20:56:47 +0200101#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~(Py_uintptr_t)1))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000102
103/* glue functions (see the init function for details) */
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000104static PyObject* elementtree_parseerror_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000105static PyObject* elementtree_deepcopy_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000106static PyObject* elementpath_obj;
107
108/* helpers */
109
110LOCAL(PyObject*)
111deepcopy(PyObject* object, PyObject* memo)
112{
113 /* do a deep copy of the given object */
114
115 PyObject* args;
116 PyObject* result;
117
118 if (!elementtree_deepcopy_obj) {
119 PyErr_SetString(
120 PyExc_RuntimeError,
121 "deepcopy helper not found"
122 );
123 return NULL;
124 }
125
Antoine Pitrouc1948842012-10-01 23:40:37 +0200126 args = PyTuple_Pack(2, object, memo);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000127 if (!args)
128 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000129 result = PyObject_CallObject(elementtree_deepcopy_obj, args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000130 Py_DECREF(args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000131 return result;
132}
133
134LOCAL(PyObject*)
135list_join(PyObject* list)
136{
137 /* join list elements (destroying the list in the process) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000138 PyObject* joiner;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000139 PyObject* result;
140
Antoine Pitrouc1948842012-10-01 23:40:37 +0200141 joiner = PyUnicode_FromStringAndSize("", 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000142 if (!joiner)
143 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200144 result = PyUnicode_Join(joiner, list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000145 Py_DECREF(joiner);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200146 if (result)
147 Py_DECREF(list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000148 return result;
149}
150
Eli Bendersky48d358b2012-05-30 17:57:50 +0300151/* Is the given object an empty dictionary?
152*/
153static int
154is_empty_dict(PyObject *obj)
155{
156 return PyDict_CheckExact(obj) && PyDict_Size(obj) == 0;
157}
158
159
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000160/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200161/* the Element type */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000162
163typedef struct {
164
165 /* attributes (a dictionary object), or None if no attributes */
166 PyObject* attrib;
167
168 /* child elements */
169 int length; /* actual number of items */
170 int allocated; /* allocated items */
171
172 /* this either points to _children or to a malloced buffer */
173 PyObject* *children;
174
175 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100176
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000177} ElementObjectExtra;
178
179typedef struct {
180 PyObject_HEAD
181
182 /* element tag (a string). */
183 PyObject* tag;
184
185 /* text before first child. note that this is a tagged pointer;
186 use JOIN_OBJ to get the object pointer. the join flag is used
187 to distinguish lists created by the tree builder from lists
188 assigned to the attribute by application code; the former
189 should be joined before being returned to the user, the latter
190 should be left intact. */
191 PyObject* text;
192
193 /* text after this element, in parent. note that this is a tagged
194 pointer; use JOIN_OBJ to get the object pointer. */
195 PyObject* tail;
196
197 ElementObjectExtra* extra;
198
Eli Benderskyebf37a22012-04-03 22:02:37 +0300199 PyObject *weakreflist; /* For tp_weaklistoffset */
200
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000201} ElementObject;
202
Neal Norwitz227b5332006-03-22 09:28:35 +0000203static PyTypeObject Element_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000204
Christian Heimes90aa7642007-12-19 02:45:37 +0000205#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000206
207/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200208/* Element constructors and destructor */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000209
210LOCAL(int)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200211create_extra(ElementObject* self, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000212{
213 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
214 if (!self->extra)
215 return -1;
216
217 if (!attrib)
218 attrib = Py_None;
219
220 Py_INCREF(attrib);
221 self->extra->attrib = attrib;
222
223 self->extra->length = 0;
224 self->extra->allocated = STATIC_CHILDREN;
225 self->extra->children = self->extra->_children;
226
227 return 0;
228}
229
230LOCAL(void)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200231dealloc_extra(ElementObject* self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000232{
Eli Bendersky08b85292012-04-04 15:55:07 +0300233 ElementObjectExtra *myextra;
234 int i;
235
Eli Benderskyebf37a22012-04-03 22:02:37 +0300236 if (!self->extra)
237 return;
238
239 /* Avoid DECREFs calling into this code again (cycles, etc.)
240 */
Eli Bendersky08b85292012-04-04 15:55:07 +0300241 myextra = self->extra;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300242 self->extra = NULL;
243
244 Py_DECREF(myextra->attrib);
245
Eli Benderskyebf37a22012-04-03 22:02:37 +0300246 for (i = 0; i < myextra->length; i++)
247 Py_DECREF(myextra->children[i]);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000248
Eli Benderskyebf37a22012-04-03 22:02:37 +0300249 if (myextra->children != myextra->_children)
250 PyObject_Free(myextra->children);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000251
Eli Benderskyebf37a22012-04-03 22:02:37 +0300252 PyObject_Free(myextra);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000253}
254
Eli Bendersky092af1f2012-03-04 07:14:03 +0200255/* Convenience internal function to create new Element objects with the given
256 * tag and attributes.
257*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000258LOCAL(PyObject*)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200259create_new_element(PyObject* tag, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000260{
261 ElementObject* self;
262
Eli Bendersky0192ba32012-03-30 16:38:33 +0300263 self = PyObject_GC_New(ElementObject, &Element_Type);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000264 if (self == NULL)
265 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000266 self->extra = NULL;
267
Eli Bendersky48d358b2012-05-30 17:57:50 +0300268 if (attrib != Py_None && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200269 if (create_extra(self, attrib) < 0) {
Thomas Wouters477c8d52006-05-27 19:21:47 +0000270 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000271 return NULL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000272 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000273 }
274
275 Py_INCREF(tag);
276 self->tag = tag;
277
278 Py_INCREF(Py_None);
279 self->text = Py_None;
280
281 Py_INCREF(Py_None);
282 self->tail = Py_None;
283
Eli Benderskyebf37a22012-04-03 22:02:37 +0300284 self->weakreflist = NULL;
285
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000286 ALLOC(sizeof(ElementObject), "create element");
Eli Bendersky0192ba32012-03-30 16:38:33 +0300287 PyObject_GC_Track(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000288 return (PyObject*) self;
289}
290
Eli Bendersky092af1f2012-03-04 07:14:03 +0200291static PyObject *
292element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
293{
294 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
295 if (e != NULL) {
296 Py_INCREF(Py_None);
297 e->tag = Py_None;
298
299 Py_INCREF(Py_None);
300 e->text = Py_None;
301
302 Py_INCREF(Py_None);
303 e->tail = Py_None;
304
305 e->extra = NULL;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300306 e->weakreflist = NULL;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200307 }
308 return (PyObject *)e;
309}
310
Eli Bendersky737b1732012-05-29 06:02:56 +0300311/* Helper function for extracting the attrib dictionary from a keywords dict.
312 * This is required by some constructors/functions in this module that can
313 * either accept attrib as a keyword argument or all attributes splashed
314 * directly into *kwds.
315 * If there is no 'attrib' keyword, return an empty dict.
316 */
317static PyObject*
318get_attrib_from_keywords(PyObject *kwds)
319{
320 PyObject *attrib_str = PyUnicode_FromString("attrib");
321 PyObject *attrib = PyDict_GetItem(kwds, attrib_str);
322
323 if (attrib) {
324 /* If attrib was found in kwds, copy its value and remove it from
325 * kwds
326 */
327 if (!PyDict_Check(attrib)) {
328 Py_DECREF(attrib_str);
329 PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
330 Py_TYPE(attrib)->tp_name);
331 return NULL;
332 }
333 attrib = PyDict_Copy(attrib);
334 PyDict_DelItem(kwds, attrib_str);
335 } else {
336 attrib = PyDict_New();
337 }
338
339 Py_DECREF(attrib_str);
340
341 if (attrib)
342 PyDict_Update(attrib, kwds);
343 return attrib;
344}
345
Eli Bendersky092af1f2012-03-04 07:14:03 +0200346static int
347element_init(PyObject *self, PyObject *args, PyObject *kwds)
348{
349 PyObject *tag;
350 PyObject *tmp;
351 PyObject *attrib = NULL;
352 ElementObject *self_elem;
353
354 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
355 return -1;
356
Eli Bendersky737b1732012-05-29 06:02:56 +0300357 if (attrib) {
358 /* attrib passed as positional arg */
359 attrib = PyDict_Copy(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200360 if (!attrib)
361 return -1;
Eli Bendersky737b1732012-05-29 06:02:56 +0300362 if (kwds) {
363 if (PyDict_Update(attrib, kwds) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200364 Py_DECREF(attrib);
Eli Bendersky737b1732012-05-29 06:02:56 +0300365 return -1;
366 }
367 }
368 } else if (kwds) {
369 /* have keywords args */
370 attrib = get_attrib_from_keywords(kwds);
371 if (!attrib)
372 return -1;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200373 }
374
375 self_elem = (ElementObject *)self;
376
Antoine Pitrouc1948842012-10-01 23:40:37 +0200377 if (attrib != NULL && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200378 if (create_extra(self_elem, attrib) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200379 Py_DECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200380 return -1;
381 }
382 }
383
Eli Bendersky48d358b2012-05-30 17:57:50 +0300384 /* We own a reference to attrib here and it's no longer needed. */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200385 Py_XDECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200386
387 /* Replace the objects already pointed to by tag, text and tail. */
388 tmp = self_elem->tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200389 Py_INCREF(tag);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200390 self_elem->tag = tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200391 Py_DECREF(tmp);
392
393 tmp = self_elem->text;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200394 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200395 self_elem->text = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200396 Py_DECREF(JOIN_OBJ(tmp));
397
398 tmp = self_elem->tail;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200399 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200400 self_elem->tail = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200401 Py_DECREF(JOIN_OBJ(tmp));
402
403 return 0;
404}
405
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000406LOCAL(int)
407element_resize(ElementObject* self, int extra)
408{
409 int size;
410 PyObject* *children;
411
412 /* make sure self->children can hold the given number of extra
413 elements. set an exception and return -1 if allocation failed */
414
415 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200416 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000417
418 size = self->extra->length + extra;
419
420 if (size > self->extra->allocated) {
421 /* use Python 2.4's list growth strategy */
422 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000423 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100424 * which needs at least 4 bytes.
425 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000426 * be safe.
427 */
428 size = size ? size : 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000429 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000430 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100431 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000432 * false alarm always assume at least one child to be safe.
433 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000434 children = PyObject_Realloc(self->extra->children,
435 size * sizeof(PyObject*));
436 if (!children)
437 goto nomemory;
438 } else {
439 children = PyObject_Malloc(size * sizeof(PyObject*));
440 if (!children)
441 goto nomemory;
442 /* copy existing children from static area to malloc buffer */
443 memcpy(children, self->extra->children,
444 self->extra->length * sizeof(PyObject*));
445 }
446 self->extra->children = children;
447 self->extra->allocated = size;
448 }
449
450 return 0;
451
452 nomemory:
453 PyErr_NoMemory();
454 return -1;
455}
456
457LOCAL(int)
458element_add_subelement(ElementObject* self, PyObject* element)
459{
460 /* add a child element to a parent */
461
462 if (element_resize(self, 1) < 0)
463 return -1;
464
465 Py_INCREF(element);
466 self->extra->children[self->extra->length] = element;
467
468 self->extra->length++;
469
470 return 0;
471}
472
473LOCAL(PyObject*)
474element_get_attrib(ElementObject* self)
475{
476 /* return borrowed reference to attrib dictionary */
477 /* note: this function assumes that the extra section exists */
478
479 PyObject* res = self->extra->attrib;
480
481 if (res == Py_None) {
482 /* create missing dictionary */
483 res = PyDict_New();
484 if (!res)
485 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200486 Py_DECREF(Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000487 self->extra->attrib = res;
488 }
489
490 return res;
491}
492
493LOCAL(PyObject*)
494element_get_text(ElementObject* self)
495{
496 /* return borrowed reference to text attribute */
497
498 PyObject* res = self->text;
499
500 if (JOIN_GET(res)) {
501 res = JOIN_OBJ(res);
502 if (PyList_CheckExact(res)) {
503 res = list_join(res);
504 if (!res)
505 return NULL;
506 self->text = res;
507 }
508 }
509
510 return res;
511}
512
513LOCAL(PyObject*)
514element_get_tail(ElementObject* self)
515{
516 /* return borrowed reference to text attribute */
517
518 PyObject* res = self->tail;
519
520 if (JOIN_GET(res)) {
521 res = JOIN_OBJ(res);
522 if (PyList_CheckExact(res)) {
523 res = list_join(res);
524 if (!res)
525 return NULL;
526 self->tail = res;
527 }
528 }
529
530 return res;
531}
532
533static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300534subelement(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000535{
536 PyObject* elem;
537
538 ElementObject* parent;
539 PyObject* tag;
540 PyObject* attrib = NULL;
541 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
542 &Element_Type, &parent, &tag,
543 &PyDict_Type, &attrib))
544 return NULL;
545
Eli Bendersky737b1732012-05-29 06:02:56 +0300546 if (attrib) {
547 /* attrib passed as positional arg */
548 attrib = PyDict_Copy(attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000549 if (!attrib)
550 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300551 if (kwds) {
552 if (PyDict_Update(attrib, kwds) < 0) {
553 return NULL;
554 }
555 }
556 } else if (kwds) {
557 /* have keyword args */
558 attrib = get_attrib_from_keywords(kwds);
559 if (!attrib)
560 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000561 } else {
Eli Bendersky737b1732012-05-29 06:02:56 +0300562 /* no attrib arg, no kwds, so no attribute */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000563 Py_INCREF(Py_None);
564 attrib = Py_None;
565 }
566
Eli Bendersky092af1f2012-03-04 07:14:03 +0200567 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000568
569 Py_DECREF(attrib);
570
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000571 if (element_add_subelement(parent, elem) < 0) {
572 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000573 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000574 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000575
576 return elem;
577}
578
Eli Bendersky0192ba32012-03-30 16:38:33 +0300579static int
580element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
581{
582 Py_VISIT(self->tag);
583 Py_VISIT(JOIN_OBJ(self->text));
584 Py_VISIT(JOIN_OBJ(self->tail));
585
586 if (self->extra) {
587 int i;
588 Py_VISIT(self->extra->attrib);
589
590 for (i = 0; i < self->extra->length; ++i)
591 Py_VISIT(self->extra->children[i]);
592 }
593 return 0;
594}
595
596static int
597element_gc_clear(ElementObject *self)
598{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300599 Py_CLEAR(self->tag);
Eli Benderskyebf37a22012-04-03 22:02:37 +0300600
601 /* The following is like Py_CLEAR for self->text and self->tail, but
602 * written explicitily because the real pointers hide behind access
603 * macros.
604 */
605 if (self->text) {
606 PyObject *tmp = JOIN_OBJ(self->text);
607 self->text = NULL;
608 Py_DECREF(tmp);
609 }
610
611 if (self->tail) {
612 PyObject *tmp = JOIN_OBJ(self->tail);
613 self->tail = NULL;
614 Py_DECREF(tmp);
615 }
Eli Bendersky0192ba32012-03-30 16:38:33 +0300616
617 /* After dropping all references from extra, it's no longer valid anyway,
Eli Benderskyebf37a22012-04-03 22:02:37 +0300618 * so fully deallocate it.
Eli Bendersky0192ba32012-03-30 16:38:33 +0300619 */
Eli Benderskyebf37a22012-04-03 22:02:37 +0300620 dealloc_extra(self);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300621 return 0;
622}
623
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000624static void
625element_dealloc(ElementObject* self)
626{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300627 PyObject_GC_UnTrack(self);
Eli Benderskyebf37a22012-04-03 22:02:37 +0300628
629 if (self->weakreflist != NULL)
630 PyObject_ClearWeakRefs((PyObject *) self);
631
Eli Bendersky0192ba32012-03-30 16:38:33 +0300632 /* element_gc_clear clears all references and deallocates extra
633 */
634 element_gc_clear(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000635
636 RELEASE(sizeof(ElementObject), "destroy element");
Eli Bendersky092af1f2012-03-04 07:14:03 +0200637 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000638}
639
640/* -------------------------------------------------------------------- */
641/* methods (in alphabetical order) */
642
643static PyObject*
644element_append(ElementObject* self, PyObject* args)
645{
646 PyObject* element;
647 if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element))
648 return NULL;
649
650 if (element_add_subelement(self, element) < 0)
651 return NULL;
652
653 Py_RETURN_NONE;
654}
655
656static PyObject*
Eli Bendersky0192ba32012-03-30 16:38:33 +0300657element_clearmethod(ElementObject* self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000658{
659 if (!PyArg_ParseTuple(args, ":clear"))
660 return NULL;
661
Eli Benderskyebf37a22012-04-03 22:02:37 +0300662 dealloc_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000663
664 Py_INCREF(Py_None);
665 Py_DECREF(JOIN_OBJ(self->text));
666 self->text = Py_None;
667
668 Py_INCREF(Py_None);
669 Py_DECREF(JOIN_OBJ(self->tail));
670 self->tail = Py_None;
671
672 Py_RETURN_NONE;
673}
674
675static PyObject*
676element_copy(ElementObject* self, PyObject* args)
677{
678 int i;
679 ElementObject* element;
680
681 if (!PyArg_ParseTuple(args, ":__copy__"))
682 return NULL;
683
Eli Bendersky092af1f2012-03-04 07:14:03 +0200684 element = (ElementObject*) create_new_element(
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000685 self->tag, (self->extra) ? self->extra->attrib : Py_None
686 );
687 if (!element)
688 return NULL;
689
690 Py_DECREF(JOIN_OBJ(element->text));
691 element->text = self->text;
692 Py_INCREF(JOIN_OBJ(element->text));
693
694 Py_DECREF(JOIN_OBJ(element->tail));
695 element->tail = self->tail;
696 Py_INCREF(JOIN_OBJ(element->tail));
697
698 if (self->extra) {
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100699
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000700 if (element_resize(element, self->extra->length) < 0) {
701 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000702 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000703 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000704
705 for (i = 0; i < self->extra->length; i++) {
706 Py_INCREF(self->extra->children[i]);
707 element->extra->children[i] = self->extra->children[i];
708 }
709
710 element->extra->length = self->extra->length;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100711
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000712 }
713
714 return (PyObject*) element;
715}
716
717static PyObject*
718element_deepcopy(ElementObject* self, PyObject* args)
719{
720 int i;
721 ElementObject* element;
722 PyObject* tag;
723 PyObject* attrib;
724 PyObject* text;
725 PyObject* tail;
726 PyObject* id;
727
728 PyObject* memo;
729 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
730 return NULL;
731
732 tag = deepcopy(self->tag, memo);
733 if (!tag)
734 return NULL;
735
736 if (self->extra) {
737 attrib = deepcopy(self->extra->attrib, memo);
738 if (!attrib) {
739 Py_DECREF(tag);
740 return NULL;
741 }
742 } else {
743 Py_INCREF(Py_None);
744 attrib = Py_None;
745 }
746
Eli Bendersky092af1f2012-03-04 07:14:03 +0200747 element = (ElementObject*) create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000748
749 Py_DECREF(tag);
750 Py_DECREF(attrib);
751
752 if (!element)
753 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100754
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000755 text = deepcopy(JOIN_OBJ(self->text), memo);
756 if (!text)
757 goto error;
758 Py_DECREF(element->text);
759 element->text = JOIN_SET(text, JOIN_GET(self->text));
760
761 tail = deepcopy(JOIN_OBJ(self->tail), memo);
762 if (!tail)
763 goto error;
764 Py_DECREF(element->tail);
765 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
766
767 if (self->extra) {
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100768
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000769 if (element_resize(element, self->extra->length) < 0)
770 goto error;
771
772 for (i = 0; i < self->extra->length; i++) {
773 PyObject* child = deepcopy(self->extra->children[i], memo);
774 if (!child) {
775 element->extra->length = i;
776 goto error;
777 }
778 element->extra->children[i] = child;
779 }
780
781 element->extra->length = self->extra->length;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100782
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000783 }
784
785 /* add object to memo dictionary (so deepcopy won't visit it again) */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200786 id = PyLong_FromSsize_t((Py_uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000787 if (!id)
788 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000789
790 i = PyDict_SetItem(memo, id, (PyObject*) element);
791
792 Py_DECREF(id);
793
794 if (i < 0)
795 goto error;
796
797 return (PyObject*) element;
798
799 error:
800 Py_DECREF(element);
801 return NULL;
802}
803
Martin v. Löwisbce16662012-06-17 10:41:22 +0200804static PyObject*
805element_sizeof(PyObject* _self, PyObject* args)
806{
807 ElementObject *self = (ElementObject*)_self;
808 Py_ssize_t result = sizeof(ElementObject);
809 if (self->extra) {
810 result += sizeof(ElementObjectExtra);
811 if (self->extra->children != self->extra->_children)
812 result += sizeof(PyObject*) * self->extra->allocated;
813 }
814 return PyLong_FromSsize_t(result);
815}
816
Eli Bendersky698bdb22013-01-10 06:01:06 -0800817/* dict keys for getstate/setstate. */
818#define PICKLED_TAG "tag"
819#define PICKLED_CHILDREN "_children"
820#define PICKLED_ATTRIB "attrib"
821#define PICKLED_TAIL "tail"
822#define PICKLED_TEXT "text"
823
824/* __getstate__ returns a fabricated instance dict as in the pure-Python
825 * Element implementation, for interoperability/interchangeability. This
826 * makes the pure-Python implementation details an API, but (a) there aren't
827 * any unnecessary structures there; and (b) it buys compatibility with 3.2
828 * pickles. See issue #16076.
829 */
830static PyObject *
831element_getstate(ElementObject *self)
832{
833 int i, noattrib;
834 PyObject *instancedict = NULL, *children;
835
836 /* Build a list of children. */
837 children = PyList_New(self->extra ? self->extra->length : 0);
838 if (!children)
839 return NULL;
840 for (i = 0; i < PyList_GET_SIZE(children); i++) {
841 PyObject *child = self->extra->children[i];
842 Py_INCREF(child);
843 PyList_SET_ITEM(children, i, child);
844 }
845
846 /* Construct the state object. */
847 noattrib = (self->extra == NULL || self->extra->attrib == Py_None);
848 if (noattrib)
849 instancedict = Py_BuildValue("{sOsOs{}sOsO}",
850 PICKLED_TAG, self->tag,
851 PICKLED_CHILDREN, children,
852 PICKLED_ATTRIB,
853 PICKLED_TEXT, self->text,
854 PICKLED_TAIL, self->tail);
855 else
856 instancedict = Py_BuildValue("{sOsOsOsOsO}",
857 PICKLED_TAG, self->tag,
858 PICKLED_CHILDREN, children,
859 PICKLED_ATTRIB, self->extra->attrib,
860 PICKLED_TEXT, self->text,
861 PICKLED_TAIL, self->tail);
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800862 if (instancedict) {
863 Py_DECREF(children);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800864 return instancedict;
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800865 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800866 else {
867 for (i = 0; i < PyList_GET_SIZE(children); i++)
868 Py_DECREF(PyList_GET_ITEM(children, i));
869 Py_DECREF(children);
870
871 return NULL;
872 }
873}
874
875static PyObject *
876element_setstate_from_attributes(ElementObject *self,
877 PyObject *tag,
878 PyObject *attrib,
879 PyObject *text,
880 PyObject *tail,
881 PyObject *children)
882{
883 Py_ssize_t i, nchildren;
884
885 if (!tag) {
886 PyErr_SetString(PyExc_TypeError, "tag may not be NULL");
887 return NULL;
888 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800889
890 Py_CLEAR(self->tag);
891 self->tag = tag;
892 Py_INCREF(self->tag);
893
894 Py_CLEAR(self->text);
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800895 self->text = text ? text : Py_None;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800896 Py_INCREF(self->text);
897
898 Py_CLEAR(self->tail);
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800899 self->tail = tail ? tail : Py_None;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800900 Py_INCREF(self->tail);
901
902 /* Handle ATTRIB and CHILDREN. */
903 if (!children && !attrib)
904 Py_RETURN_NONE;
905
906 /* Compute 'nchildren'. */
907 if (children) {
908 if (!PyList_Check(children)) {
909 PyErr_SetString(PyExc_TypeError, "'_children' is not a list");
910 return NULL;
911 }
912 nchildren = PyList_Size(children);
913 }
914 else {
915 nchildren = 0;
916 }
917
918 /* Allocate 'extra'. */
919 if (element_resize(self, nchildren)) {
920 return NULL;
921 }
922 assert(self->extra && self->extra->allocated >= nchildren);
923
924 /* Copy children */
925 for (i = 0; i < nchildren; i++) {
926 self->extra->children[i] = PyList_GET_ITEM(children, i);
927 Py_INCREF(self->extra->children[i]);
928 }
929
930 self->extra->length = nchildren;
931 self->extra->allocated = nchildren;
932
933 /* Stash attrib. */
934 if (attrib) {
935 Py_CLEAR(self->extra->attrib);
936 self->extra->attrib = attrib;
937 Py_INCREF(attrib);
938 }
939
940 Py_RETURN_NONE;
941}
942
943/* __setstate__ for Element instance from the Python implementation.
944 * 'state' should be the instance dict.
945 */
946static PyObject *
947element_setstate_from_Python(ElementObject *self, PyObject *state)
948{
949 static char *kwlist[] = {PICKLED_TAG, PICKLED_ATTRIB, PICKLED_TEXT,
950 PICKLED_TAIL, PICKLED_CHILDREN, 0};
951 PyObject *args;
952 PyObject *tag, *attrib, *text, *tail, *children;
953 int error;
954
955 /* More instance dict members than we know to handle? */
956 tag = attrib = text = tail = children = NULL;
957 args = PyTuple_New(0);
958 error = ! PyArg_ParseTupleAndKeywords(args, state, "|$OOOOO", kwlist, &tag,
959 &attrib, &text, &tail, &children);
960 Py_DECREF(args);
961 if (error)
962 return NULL;
963 else
964 return element_setstate_from_attributes(self, tag, attrib, text,
965 tail, children);
966}
967
968static PyObject *
969element_setstate(ElementObject *self, PyObject *state)
970{
971 if (!PyDict_CheckExact(state)) {
972 PyErr_Format(PyExc_TypeError,
973 "Don't know how to unpickle \"%.200R\" as an Element",
974 state);
975 return NULL;
976 }
977 else
978 return element_setstate_from_Python(self, state);
979}
980
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000981LOCAL(int)
982checkpath(PyObject* tag)
983{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000984 Py_ssize_t i;
985 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000986
987 /* check if a tag contains an xpath character */
988
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000989#define PATHCHAR(ch) \
990 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000991
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000992 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200993 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
994 void *data = PyUnicode_DATA(tag);
995 unsigned int kind = PyUnicode_KIND(tag);
996 for (i = 0; i < len; i++) {
997 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
998 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000999 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001000 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001001 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001002 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001003 return 1;
1004 }
1005 return 0;
1006 }
Christian Heimes72b710a2008-05-26 13:28:38 +00001007 if (PyBytes_Check(tag)) {
1008 char *p = PyBytes_AS_STRING(tag);
1009 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001010 if (p[i] == '{')
1011 check = 0;
1012 else if (p[i] == '}')
1013 check = 1;
1014 else if (check && PATHCHAR(p[i]))
1015 return 1;
1016 }
1017 return 0;
1018 }
1019
1020 return 1; /* unknown type; might be path expression */
1021}
1022
1023static PyObject*
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001024element_extend(ElementObject* self, PyObject* args)
1025{
1026 PyObject* seq;
1027 Py_ssize_t i, seqlen = 0;
1028
1029 PyObject* seq_in;
1030 if (!PyArg_ParseTuple(args, "O:extend", &seq_in))
1031 return NULL;
1032
1033 seq = PySequence_Fast(seq_in, "");
1034 if (!seq) {
1035 PyErr_Format(
1036 PyExc_TypeError,
1037 "expected sequence, not \"%.200s\"", Py_TYPE(seq_in)->tp_name
1038 );
1039 return NULL;
1040 }
1041
1042 seqlen = PySequence_Size(seq);
1043 for (i = 0; i < seqlen; i++) {
1044 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001045 if (!PyObject_IsInstance(element, (PyObject *)&Element_Type)) {
1046 Py_DECREF(seq);
1047 PyErr_Format(
1048 PyExc_TypeError,
1049 "expected an Element, not \"%.200s\"",
1050 Py_TYPE(element)->tp_name);
1051 return NULL;
1052 }
1053
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001054 if (element_add_subelement(self, element) < 0) {
1055 Py_DECREF(seq);
1056 return NULL;
1057 }
1058 }
1059
1060 Py_DECREF(seq);
1061
1062 Py_RETURN_NONE;
1063}
1064
1065static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001066element_find(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001067{
1068 int i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001069 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001070 PyObject* namespaces = Py_None;
Eli Bendersky737b1732012-05-29 06:02:56 +03001071 static char *kwlist[] = {"path", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001072
Eli Bendersky737b1732012-05-29 06:02:56 +03001073 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:find", kwlist,
1074 &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001075 return NULL;
1076
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001077 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001078 _Py_IDENTIFIER(find);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001079 return _PyObject_CallMethodId(
1080 elementpath_obj, &PyId_find, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001081 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001082 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001083
1084 if (!self->extra)
1085 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001086
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001087 for (i = 0; i < self->extra->length; i++) {
1088 PyObject* item = self->extra->children[i];
1089 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +00001090 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001091 Py_INCREF(item);
1092 return item;
1093 }
1094 }
1095
1096 Py_RETURN_NONE;
1097}
1098
1099static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001100element_findtext(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001101{
1102 int i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001103 PyObject* tag;
1104 PyObject* default_value = Py_None;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001105 PyObject* namespaces = Py_None;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001106 _Py_IDENTIFIER(findtext);
Eli Bendersky737b1732012-05-29 06:02:56 +03001107 static char *kwlist[] = {"path", "default", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001108
Eli Bendersky737b1732012-05-29 06:02:56 +03001109 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO:findtext", kwlist,
1110 &tag, &default_value, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001111 return NULL;
1112
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001113 if (checkpath(tag) || namespaces != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001114 return _PyObject_CallMethodId(
1115 elementpath_obj, &PyId_findtext, "OOOO", self, tag, default_value, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001116 );
1117
1118 if (!self->extra) {
1119 Py_INCREF(default_value);
1120 return default_value;
1121 }
1122
1123 for (i = 0; i < self->extra->length; i++) {
1124 ElementObject* item = (ElementObject*) self->extra->children[i];
Mark Dickinson211c6252009-02-01 10:28:51 +00001125 if (Element_CheckExact(item) && (PyObject_RichCompareBool(item->tag, tag, Py_EQ) == 1)) {
1126
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001127 PyObject* text = element_get_text(item);
1128 if (text == Py_None)
Christian Heimes72b710a2008-05-26 13:28:38 +00001129 return PyBytes_FromString("");
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001130 Py_XINCREF(text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001131 return text;
1132 }
1133 }
1134
1135 Py_INCREF(default_value);
1136 return default_value;
1137}
1138
1139static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001140element_findall(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001141{
1142 int i;
1143 PyObject* out;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001144 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001145 PyObject* namespaces = Py_None;
Eli Bendersky737b1732012-05-29 06:02:56 +03001146 static char *kwlist[] = {"path", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001147
Eli Bendersky737b1732012-05-29 06:02:56 +03001148 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:findall", kwlist,
1149 &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001150 return NULL;
1151
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001152 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001153 _Py_IDENTIFIER(findall);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001154 return _PyObject_CallMethodId(
1155 elementpath_obj, &PyId_findall, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001156 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001157 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001158
1159 out = PyList_New(0);
1160 if (!out)
1161 return NULL;
1162
1163 if (!self->extra)
1164 return out;
1165
1166 for (i = 0; i < self->extra->length; i++) {
1167 PyObject* item = self->extra->children[i];
1168 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +00001169 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001170 if (PyList_Append(out, item) < 0) {
1171 Py_DECREF(out);
1172 return NULL;
1173 }
1174 }
1175 }
1176
1177 return out;
1178}
1179
1180static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001181element_iterfind(ElementObject *self, PyObject *args, PyObject *kwds)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001182{
1183 PyObject* tag;
1184 PyObject* namespaces = Py_None;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001185 _Py_IDENTIFIER(iterfind);
Eli Bendersky737b1732012-05-29 06:02:56 +03001186 static char *kwlist[] = {"path", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001187
Eli Bendersky737b1732012-05-29 06:02:56 +03001188 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:iterfind", kwlist,
1189 &tag, &namespaces))
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001190 return NULL;
1191
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001192 return _PyObject_CallMethodId(
1193 elementpath_obj, &PyId_iterfind, "OOO", self, tag, namespaces
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001194 );
1195}
1196
1197static PyObject*
Eli Benderskya8736902013-01-05 06:26:39 -08001198element_get(ElementObject* self, PyObject* args, PyObject* kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001199{
1200 PyObject* value;
Eli Benderskya8736902013-01-05 06:26:39 -08001201 static char* kwlist[] = {"key", "default", 0};
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001202
1203 PyObject* key;
1204 PyObject* default_value = Py_None;
Eli Benderskya8736902013-01-05 06:26:39 -08001205
1206 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:get", kwlist, &key,
1207 &default_value))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001208 return NULL;
1209
1210 if (!self->extra || self->extra->attrib == Py_None)
1211 value = default_value;
1212 else {
1213 value = PyDict_GetItem(self->extra->attrib, key);
1214 if (!value)
1215 value = default_value;
1216 }
1217
1218 Py_INCREF(value);
1219 return value;
1220}
1221
1222static PyObject*
1223element_getchildren(ElementObject* self, PyObject* args)
1224{
1225 int i;
1226 PyObject* list;
1227
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001228 /* FIXME: report as deprecated? */
1229
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001230 if (!PyArg_ParseTuple(args, ":getchildren"))
1231 return NULL;
1232
1233 if (!self->extra)
1234 return PyList_New(0);
1235
1236 list = PyList_New(self->extra->length);
1237 if (!list)
1238 return NULL;
1239
1240 for (i = 0; i < self->extra->length; i++) {
1241 PyObject* item = self->extra->children[i];
1242 Py_INCREF(item);
1243 PyList_SET_ITEM(list, i, item);
1244 }
1245
1246 return list;
1247}
1248
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001249
Eli Bendersky64d11e62012-06-15 07:42:50 +03001250static PyObject *
1251create_elementiter(ElementObject *self, PyObject *tag, int gettext);
1252
1253
1254static PyObject *
Eli Benderskya8736902013-01-05 06:26:39 -08001255element_iter(ElementObject *self, PyObject *args, PyObject *kwds)
Eli Bendersky64d11e62012-06-15 07:42:50 +03001256{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001257 PyObject* tag = Py_None;
Eli Benderskya8736902013-01-05 06:26:39 -08001258 static char* kwlist[] = {"tag", 0};
1259
1260 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:iter", kwlist, &tag))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001261 return NULL;
1262
Eli Bendersky64d11e62012-06-15 07:42:50 +03001263 return create_elementiter(self, tag, 0);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001264}
1265
1266
1267static PyObject*
1268element_itertext(ElementObject* self, PyObject* args)
1269{
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001270 if (!PyArg_ParseTuple(args, ":itertext"))
1271 return NULL;
1272
Eli Bendersky64d11e62012-06-15 07:42:50 +03001273 return create_elementiter(self, Py_None, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001274}
1275
Eli Bendersky64d11e62012-06-15 07:42:50 +03001276
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001277static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001278element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001279{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001280 ElementObject* self = (ElementObject*) self_;
1281
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001282 if (!self->extra || index < 0 || index >= self->extra->length) {
1283 PyErr_SetString(
1284 PyExc_IndexError,
1285 "child index out of range"
1286 );
1287 return NULL;
1288 }
1289
1290 Py_INCREF(self->extra->children[index]);
1291 return self->extra->children[index];
1292}
1293
1294static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001295element_insert(ElementObject* self, PyObject* args)
1296{
1297 int i;
1298
1299 int index;
1300 PyObject* element;
1301 if (!PyArg_ParseTuple(args, "iO!:insert", &index,
1302 &Element_Type, &element))
1303 return NULL;
1304
1305 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001306 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001307
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001308 if (index < 0) {
1309 index += self->extra->length;
1310 if (index < 0)
1311 index = 0;
1312 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001313 if (index > self->extra->length)
1314 index = self->extra->length;
1315
1316 if (element_resize(self, 1) < 0)
1317 return NULL;
1318
1319 for (i = self->extra->length; i > index; i--)
1320 self->extra->children[i] = self->extra->children[i-1];
1321
1322 Py_INCREF(element);
1323 self->extra->children[index] = element;
1324
1325 self->extra->length++;
1326
1327 Py_RETURN_NONE;
1328}
1329
1330static PyObject*
1331element_items(ElementObject* self, PyObject* args)
1332{
1333 if (!PyArg_ParseTuple(args, ":items"))
1334 return NULL;
1335
1336 if (!self->extra || self->extra->attrib == Py_None)
1337 return PyList_New(0);
1338
1339 return PyDict_Items(self->extra->attrib);
1340}
1341
1342static PyObject*
1343element_keys(ElementObject* self, PyObject* args)
1344{
1345 if (!PyArg_ParseTuple(args, ":keys"))
1346 return NULL;
1347
1348 if (!self->extra || self->extra->attrib == Py_None)
1349 return PyList_New(0);
1350
1351 return PyDict_Keys(self->extra->attrib);
1352}
1353
Martin v. Löwis18e16552006-02-15 17:27:45 +00001354static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001355element_length(ElementObject* self)
1356{
1357 if (!self->extra)
1358 return 0;
1359
1360 return self->extra->length;
1361}
1362
1363static PyObject*
1364element_makeelement(PyObject* self, PyObject* args, PyObject* kw)
1365{
1366 PyObject* elem;
1367
1368 PyObject* tag;
1369 PyObject* attrib;
1370 if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib))
1371 return NULL;
1372
1373 attrib = PyDict_Copy(attrib);
1374 if (!attrib)
1375 return NULL;
1376
Eli Bendersky092af1f2012-03-04 07:14:03 +02001377 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001378
1379 Py_DECREF(attrib);
1380
1381 return elem;
1382}
1383
1384static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001385element_remove(ElementObject* self, PyObject* args)
1386{
1387 int i;
1388
1389 PyObject* element;
1390 if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element))
1391 return NULL;
1392
1393 if (!self->extra) {
1394 /* element has no children, so raise exception */
1395 PyErr_SetString(
1396 PyExc_ValueError,
1397 "list.remove(x): x not in list"
1398 );
1399 return NULL;
1400 }
1401
1402 for (i = 0; i < self->extra->length; i++) {
1403 if (self->extra->children[i] == element)
1404 break;
Mark Dickinson211c6252009-02-01 10:28:51 +00001405 if (PyObject_RichCompareBool(self->extra->children[i], element, Py_EQ) == 1)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001406 break;
1407 }
1408
1409 if (i == self->extra->length) {
1410 /* element is not in children, so raise exception */
1411 PyErr_SetString(
1412 PyExc_ValueError,
1413 "list.remove(x): x not in list"
1414 );
1415 return NULL;
1416 }
1417
1418 Py_DECREF(self->extra->children[i]);
1419
1420 self->extra->length--;
1421
1422 for (; i < self->extra->length; i++)
1423 self->extra->children[i] = self->extra->children[i+1];
1424
1425 Py_RETURN_NONE;
1426}
1427
1428static PyObject*
1429element_repr(ElementObject* self)
1430{
Eli Bendersky092af1f2012-03-04 07:14:03 +02001431 if (self->tag)
1432 return PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1433 else
1434 return PyUnicode_FromFormat("<Element at %p>", self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001435}
1436
1437static PyObject*
1438element_set(ElementObject* self, PyObject* args)
1439{
1440 PyObject* attrib;
1441
1442 PyObject* key;
1443 PyObject* value;
1444 if (!PyArg_ParseTuple(args, "OO:set", &key, &value))
1445 return NULL;
1446
1447 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001448 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001449
1450 attrib = element_get_attrib(self);
1451 if (!attrib)
1452 return NULL;
1453
1454 if (PyDict_SetItem(attrib, key, value) < 0)
1455 return NULL;
1456
1457 Py_RETURN_NONE;
1458}
1459
1460static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001461element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001462{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001463 ElementObject* self = (ElementObject*) self_;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001464 int i;
1465 PyObject* old;
1466
1467 if (!self->extra || index < 0 || index >= self->extra->length) {
1468 PyErr_SetString(
1469 PyExc_IndexError,
1470 "child assignment index out of range");
1471 return -1;
1472 }
1473
1474 old = self->extra->children[index];
1475
1476 if (item) {
1477 Py_INCREF(item);
1478 self->extra->children[index] = item;
1479 } else {
1480 self->extra->length--;
1481 for (i = index; i < self->extra->length; i++)
1482 self->extra->children[i] = self->extra->children[i+1];
1483 }
1484
1485 Py_DECREF(old);
1486
1487 return 0;
1488}
1489
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001490static PyObject*
1491element_subscr(PyObject* self_, PyObject* item)
1492{
1493 ElementObject* self = (ElementObject*) self_;
1494
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001495 if (PyIndex_Check(item)) {
1496 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001497
1498 if (i == -1 && PyErr_Occurred()) {
1499 return NULL;
1500 }
1501 if (i < 0 && self->extra)
1502 i += self->extra->length;
1503 return element_getitem(self_, i);
1504 }
1505 else if (PySlice_Check(item)) {
1506 Py_ssize_t start, stop, step, slicelen, cur, i;
1507 PyObject* list;
1508
1509 if (!self->extra)
1510 return PyList_New(0);
1511
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001512 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001513 self->extra->length,
1514 &start, &stop, &step, &slicelen) < 0) {
1515 return NULL;
1516 }
1517
1518 if (slicelen <= 0)
1519 return PyList_New(0);
1520 else {
1521 list = PyList_New(slicelen);
1522 if (!list)
1523 return NULL;
1524
1525 for (cur = start, i = 0; i < slicelen;
1526 cur += step, i++) {
1527 PyObject* item = self->extra->children[cur];
1528 Py_INCREF(item);
1529 PyList_SET_ITEM(list, i, item);
1530 }
1531
1532 return list;
1533 }
1534 }
1535 else {
1536 PyErr_SetString(PyExc_TypeError,
1537 "element indices must be integers");
1538 return NULL;
1539 }
1540}
1541
1542static int
1543element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1544{
1545 ElementObject* self = (ElementObject*) self_;
1546
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001547 if (PyIndex_Check(item)) {
1548 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001549
1550 if (i == -1 && PyErr_Occurred()) {
1551 return -1;
1552 }
1553 if (i < 0 && self->extra)
1554 i += self->extra->length;
1555 return element_setitem(self_, i, value);
1556 }
1557 else if (PySlice_Check(item)) {
1558 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1559
1560 PyObject* recycle = NULL;
1561 PyObject* seq = NULL;
1562
1563 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001564 create_extra(self, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001565
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001566 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001567 self->extra->length,
1568 &start, &stop, &step, &slicelen) < 0) {
1569 return -1;
1570 }
1571
Eli Bendersky865756a2012-03-09 13:38:15 +02001572 if (value == NULL) {
1573 /* Delete slice */
1574 size_t cur;
1575 Py_ssize_t i;
1576
1577 if (slicelen <= 0)
1578 return 0;
1579
1580 /* Since we're deleting, the direction of the range doesn't matter,
1581 * so for simplicity make it always ascending.
1582 */
1583 if (step < 0) {
1584 stop = start + 1;
1585 start = stop + step * (slicelen - 1) - 1;
1586 step = -step;
1587 }
1588
1589 assert((size_t)slicelen <= PY_SIZE_MAX / sizeof(PyObject *));
1590
1591 /* recycle is a list that will contain all the children
1592 * scheduled for removal.
1593 */
1594 if (!(recycle = PyList_New(slicelen))) {
1595 PyErr_NoMemory();
1596 return -1;
1597 }
1598
1599 /* This loop walks over all the children that have to be deleted,
1600 * with cur pointing at them. num_moved is the amount of children
1601 * until the next deleted child that have to be "shifted down" to
1602 * occupy the deleted's places.
1603 * Note that in the ith iteration, shifting is done i+i places down
1604 * because i children were already removed.
1605 */
1606 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1607 /* Compute how many children have to be moved, clipping at the
1608 * list end.
1609 */
1610 Py_ssize_t num_moved = step - 1;
1611 if (cur + step >= (size_t)self->extra->length) {
1612 num_moved = self->extra->length - cur - 1;
1613 }
1614
1615 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1616
1617 memmove(
1618 self->extra->children + cur - i,
1619 self->extra->children + cur + 1,
1620 num_moved * sizeof(PyObject *));
1621 }
1622
1623 /* Leftover "tail" after the last removed child */
1624 cur = start + (size_t)slicelen * step;
1625 if (cur < (size_t)self->extra->length) {
1626 memmove(
1627 self->extra->children + cur - slicelen,
1628 self->extra->children + cur,
1629 (self->extra->length - cur) * sizeof(PyObject *));
1630 }
1631
1632 self->extra->length -= slicelen;
1633
1634 /* Discard the recycle list with all the deleted sub-elements */
1635 Py_XDECREF(recycle);
1636 return 0;
1637 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001638 else {
Eli Bendersky865756a2012-03-09 13:38:15 +02001639 /* A new slice is actually being assigned */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001640 seq = PySequence_Fast(value, "");
1641 if (!seq) {
1642 PyErr_Format(
1643 PyExc_TypeError,
1644 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1645 );
1646 return -1;
1647 }
1648 newlen = PySequence_Size(seq);
1649 }
1650
1651 if (step != 1 && newlen != slicelen)
1652 {
1653 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001654 "attempt to assign sequence of size %zd "
1655 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001656 newlen, slicelen
1657 );
1658 return -1;
1659 }
1660
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001661 /* Resize before creating the recycle bin, to prevent refleaks. */
1662 if (newlen > slicelen) {
1663 if (element_resize(self, newlen - slicelen) < 0) {
1664 if (seq) {
1665 Py_DECREF(seq);
1666 }
1667 return -1;
1668 }
1669 }
1670
1671 if (slicelen > 0) {
1672 /* to avoid recursive calls to this method (via decref), move
1673 old items to the recycle bin here, and get rid of them when
1674 we're done modifying the element */
1675 recycle = PyList_New(slicelen);
1676 if (!recycle) {
1677 if (seq) {
1678 Py_DECREF(seq);
1679 }
1680 return -1;
1681 }
1682 for (cur = start, i = 0; i < slicelen;
1683 cur += step, i++)
1684 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1685 }
1686
1687 if (newlen < slicelen) {
1688 /* delete slice */
1689 for (i = stop; i < self->extra->length; i++)
1690 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1691 } else if (newlen > slicelen) {
1692 /* insert slice */
1693 for (i = self->extra->length-1; i >= stop; i--)
1694 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1695 }
1696
1697 /* replace the slice */
1698 for (cur = start, i = 0; i < newlen;
1699 cur += step, i++) {
1700 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1701 Py_INCREF(element);
1702 self->extra->children[cur] = element;
1703 }
1704
1705 self->extra->length += newlen - slicelen;
1706
1707 if (seq) {
1708 Py_DECREF(seq);
1709 }
1710
1711 /* discard the recycle bin, and everything in it */
1712 Py_XDECREF(recycle);
1713
1714 return 0;
1715 }
1716 else {
1717 PyErr_SetString(PyExc_TypeError,
1718 "element indices must be integers");
1719 return -1;
1720 }
1721}
1722
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001723static PyMethodDef element_methods[] = {
1724
Eli Bendersky0192ba32012-03-30 16:38:33 +03001725 {"clear", (PyCFunction) element_clearmethod, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001726
Eli Benderskya8736902013-01-05 06:26:39 -08001727 {"get", (PyCFunction) element_get, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001728 {"set", (PyCFunction) element_set, METH_VARARGS},
1729
Eli Bendersky737b1732012-05-29 06:02:56 +03001730 {"find", (PyCFunction) element_find, METH_VARARGS | METH_KEYWORDS},
1731 {"findtext", (PyCFunction) element_findtext, METH_VARARGS | METH_KEYWORDS},
1732 {"findall", (PyCFunction) element_findall, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001733
1734 {"append", (PyCFunction) element_append, METH_VARARGS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001735 {"extend", (PyCFunction) element_extend, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001736 {"insert", (PyCFunction) element_insert, METH_VARARGS},
1737 {"remove", (PyCFunction) element_remove, METH_VARARGS},
1738
Eli Benderskya8736902013-01-05 06:26:39 -08001739 {"iter", (PyCFunction) element_iter, METH_VARARGS | METH_KEYWORDS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001740 {"itertext", (PyCFunction) element_itertext, METH_VARARGS},
Eli Bendersky737b1732012-05-29 06:02:56 +03001741 {"iterfind", (PyCFunction) element_iterfind, METH_VARARGS | METH_KEYWORDS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001742
Eli Benderskya8736902013-01-05 06:26:39 -08001743 {"getiterator", (PyCFunction) element_iter, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001744 {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS},
1745
1746 {"items", (PyCFunction) element_items, METH_VARARGS},
1747 {"keys", (PyCFunction) element_keys, METH_VARARGS},
1748
1749 {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS},
1750
1751 {"__copy__", (PyCFunction) element_copy, METH_VARARGS},
1752 {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS},
Martin v. Löwisbce16662012-06-17 10:41:22 +02001753 {"__sizeof__", element_sizeof, METH_NOARGS},
Eli Bendersky698bdb22013-01-10 06:01:06 -08001754 {"__getstate__", (PyCFunction)element_getstate, METH_NOARGS},
1755 {"__setstate__", (PyCFunction)element_setstate, METH_O},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001756
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001757 {NULL, NULL}
1758};
1759
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001760static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001761element_getattro(ElementObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001762{
1763 PyObject* res;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001764 char *name = "";
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001765
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001766 if (PyUnicode_Check(nameobj))
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001767 name = _PyUnicode_AsString(nameobj);
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001768
Alexander Belopolskye239d232010-12-08 23:31:48 +00001769 if (name == NULL)
1770 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001771
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001772 /* handle common attributes first */
1773 if (strcmp(name, "tag") == 0) {
1774 res = self->tag;
1775 Py_INCREF(res);
1776 return res;
1777 } else if (strcmp(name, "text") == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001778 res = element_get_text(self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001779 Py_INCREF(res);
1780 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001781 }
1782
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001783 /* methods */
1784 res = PyObject_GenericGetAttr((PyObject*) self, nameobj);
1785 if (res)
1786 return res;
1787
1788 /* less common attributes */
1789 if (strcmp(name, "tail") == 0) {
1790 PyErr_Clear();
1791 res = element_get_tail(self);
1792 } else if (strcmp(name, "attrib") == 0) {
1793 PyErr_Clear();
1794 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001795 create_extra(self, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001796 res = element_get_attrib(self);
1797 }
1798
1799 if (!res)
1800 return NULL;
1801
1802 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001803 return res;
1804}
1805
Eli Benderskyb20df952012-05-20 06:33:29 +03001806static PyObject*
1807element_setattro(ElementObject* self, PyObject* nameobj, PyObject* value)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001808{
Eli Benderskyb20df952012-05-20 06:33:29 +03001809 char *name = "";
1810 if (PyUnicode_Check(nameobj))
1811 name = _PyUnicode_AsString(nameobj);
1812
1813 if (name == NULL)
1814 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001815
1816 if (strcmp(name, "tag") == 0) {
1817 Py_DECREF(self->tag);
1818 self->tag = value;
1819 Py_INCREF(self->tag);
1820 } else if (strcmp(name, "text") == 0) {
1821 Py_DECREF(JOIN_OBJ(self->text));
1822 self->text = value;
1823 Py_INCREF(self->text);
1824 } else if (strcmp(name, "tail") == 0) {
1825 Py_DECREF(JOIN_OBJ(self->tail));
1826 self->tail = value;
1827 Py_INCREF(self->tail);
1828 } else if (strcmp(name, "attrib") == 0) {
1829 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001830 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001831 Py_DECREF(self->extra->attrib);
1832 self->extra->attrib = value;
1833 Py_INCREF(self->extra->attrib);
1834 } else {
1835 PyErr_SetString(PyExc_AttributeError, name);
Eli Benderskyb20df952012-05-20 06:33:29 +03001836 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001837 }
1838
Eli Benderskyb20df952012-05-20 06:33:29 +03001839 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001840}
1841
1842static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001843 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001844 0, /* sq_concat */
1845 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001846 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001847 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001848 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001849 0,
1850};
1851
1852static PyMappingMethods element_as_mapping = {
1853 (lenfunc) element_length,
1854 (binaryfunc) element_subscr,
1855 (objobjargproc) element_ass_subscr,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001856};
1857
Neal Norwitz227b5332006-03-22 09:28:35 +00001858static PyTypeObject Element_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001859 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08001860 "xml.etree.ElementTree.Element", sizeof(ElementObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001861 /* methods */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001862 (destructor)element_dealloc, /* tp_dealloc */
1863 0, /* tp_print */
1864 0, /* tp_getattr */
Eli Benderskyb20df952012-05-20 06:33:29 +03001865 0, /* tp_setattr */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001866 0, /* tp_reserved */
1867 (reprfunc)element_repr, /* tp_repr */
1868 0, /* tp_as_number */
1869 &element_as_sequence, /* tp_as_sequence */
1870 &element_as_mapping, /* tp_as_mapping */
1871 0, /* tp_hash */
1872 0, /* tp_call */
1873 0, /* tp_str */
1874 (getattrofunc)element_getattro, /* tp_getattro */
Eli Benderskyb20df952012-05-20 06:33:29 +03001875 (setattrofunc)element_setattro, /* tp_setattro */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001876 0, /* tp_as_buffer */
Eli Bendersky0192ba32012-03-30 16:38:33 +03001877 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
1878 /* tp_flags */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001879 0, /* tp_doc */
Eli Bendersky0192ba32012-03-30 16:38:33 +03001880 (traverseproc)element_gc_traverse, /* tp_traverse */
1881 (inquiry)element_gc_clear, /* tp_clear */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001882 0, /* tp_richcompare */
Eli Benderskyebf37a22012-04-03 22:02:37 +03001883 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001884 0, /* tp_iter */
1885 0, /* tp_iternext */
1886 element_methods, /* tp_methods */
1887 0, /* tp_members */
1888 0, /* tp_getset */
1889 0, /* tp_base */
1890 0, /* tp_dict */
1891 0, /* tp_descr_get */
1892 0, /* tp_descr_set */
1893 0, /* tp_dictoffset */
1894 (initproc)element_init, /* tp_init */
1895 PyType_GenericAlloc, /* tp_alloc */
1896 element_new, /* tp_new */
1897 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001898};
1899
Eli Bendersky64d11e62012-06-15 07:42:50 +03001900/******************************* Element iterator ****************************/
1901
1902/* ElementIterObject represents the iteration state over an XML element in
1903 * pre-order traversal. To keep track of which sub-element should be returned
1904 * next, a stack of parents is maintained. This is a standard stack-based
1905 * iterative pre-order traversal of a tree.
1906 * The stack is managed using a single-linked list starting at parent_stack.
1907 * Each stack node contains the saved parent to which we should return after
1908 * the current one is exhausted, and the next child to examine in that parent.
1909 */
1910typedef struct ParentLocator_t {
1911 ElementObject *parent;
1912 Py_ssize_t child_index;
1913 struct ParentLocator_t *next;
1914} ParentLocator;
1915
1916typedef struct {
1917 PyObject_HEAD
1918 ParentLocator *parent_stack;
1919 ElementObject *root_element;
1920 PyObject *sought_tag;
1921 int root_done;
1922 int gettext;
1923} ElementIterObject;
1924
1925
1926static void
1927elementiter_dealloc(ElementIterObject *it)
1928{
1929 ParentLocator *p = it->parent_stack;
1930 while (p) {
1931 ParentLocator *temp = p;
1932 Py_XDECREF(p->parent);
1933 p = p->next;
1934 PyObject_Free(temp);
1935 }
1936
1937 Py_XDECREF(it->sought_tag);
1938 Py_XDECREF(it->root_element);
1939
1940 PyObject_GC_UnTrack(it);
1941 PyObject_GC_Del(it);
1942}
1943
1944static int
1945elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
1946{
1947 ParentLocator *p = it->parent_stack;
1948 while (p) {
1949 Py_VISIT(p->parent);
1950 p = p->next;
1951 }
1952
1953 Py_VISIT(it->root_element);
1954 Py_VISIT(it->sought_tag);
1955 return 0;
1956}
1957
1958/* Helper function for elementiter_next. Add a new parent to the parent stack.
1959 */
1960static ParentLocator *
1961parent_stack_push_new(ParentLocator *stack, ElementObject *parent)
1962{
1963 ParentLocator *new_node = PyObject_Malloc(sizeof(ParentLocator));
1964 if (new_node) {
1965 new_node->parent = parent;
1966 Py_INCREF(parent);
1967 new_node->child_index = 0;
1968 new_node->next = stack;
1969 }
1970 return new_node;
1971}
1972
1973static PyObject *
1974elementiter_next(ElementIterObject *it)
1975{
1976 /* Sub-element iterator.
1977 *
1978 * A short note on gettext: this function serves both the iter() and
1979 * itertext() methods to avoid code duplication. However, there are a few
1980 * small differences in the way these iterations work. Namely:
1981 * - itertext() only yields text from nodes that have it, and continues
1982 * iterating when a node doesn't have text (so it doesn't return any
1983 * node like iter())
1984 * - itertext() also has to handle tail, after finishing with all the
1985 * children of a node.
1986 */
Eli Bendersky113da642012-06-15 07:52:49 +03001987 ElementObject *cur_parent;
1988 Py_ssize_t child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03001989
1990 while (1) {
1991 /* Handle the case reached in the beginning and end of iteration, where
1992 * the parent stack is empty. The root_done flag gives us indication
1993 * whether we've just started iterating (so root_done is 0), in which
1994 * case the root is returned. If root_done is 1 and we're here, the
1995 * iterator is exhausted.
1996 */
1997 if (!it->parent_stack->parent) {
1998 if (it->root_done) {
1999 PyErr_SetNone(PyExc_StopIteration);
2000 return NULL;
2001 } else {
2002 it->parent_stack = parent_stack_push_new(it->parent_stack,
2003 it->root_element);
2004 if (!it->parent_stack) {
2005 PyErr_NoMemory();
2006 return NULL;
2007 }
2008
2009 it->root_done = 1;
2010 if (it->sought_tag == Py_None ||
2011 PyObject_RichCompareBool(it->root_element->tag,
2012 it->sought_tag, Py_EQ) == 1) {
2013 if (it->gettext) {
Eli Benderskye6174ca2013-01-10 06:27:53 -08002014 PyObject *text = element_get_text(it->root_element);
2015 if (!text)
2016 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002017 if (PyObject_IsTrue(text)) {
2018 Py_INCREF(text);
2019 return text;
2020 }
2021 } else {
2022 Py_INCREF(it->root_element);
2023 return (PyObject *)it->root_element;
2024 }
2025 }
2026 }
2027 }
2028
2029 /* See if there are children left to traverse in the current parent. If
2030 * yes, visit the next child. If not, pop the stack and try again.
2031 */
Eli Bendersky113da642012-06-15 07:52:49 +03002032 cur_parent = it->parent_stack->parent;
2033 child_index = it->parent_stack->child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002034 if (cur_parent->extra && child_index < cur_parent->extra->length) {
2035 ElementObject *child = (ElementObject *)
2036 cur_parent->extra->children[child_index];
2037 it->parent_stack->child_index++;
2038 it->parent_stack = parent_stack_push_new(it->parent_stack,
2039 child);
2040 if (!it->parent_stack) {
2041 PyErr_NoMemory();
2042 return NULL;
2043 }
2044
2045 if (it->gettext) {
Eli Benderskye6174ca2013-01-10 06:27:53 -08002046 PyObject *text = element_get_text(child);
2047 if (!text)
2048 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002049 if (PyObject_IsTrue(text)) {
2050 Py_INCREF(text);
2051 return text;
2052 }
2053 } else if (it->sought_tag == Py_None ||
2054 PyObject_RichCompareBool(child->tag,
2055 it->sought_tag, Py_EQ) == 1) {
2056 Py_INCREF(child);
2057 return (PyObject *)child;
2058 }
2059 else
2060 continue;
2061 }
2062 else {
Eli Benderskye6174ca2013-01-10 06:27:53 -08002063 PyObject *tail;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002064 ParentLocator *next = it->parent_stack->next;
Eli Benderskye6174ca2013-01-10 06:27:53 -08002065 if (it->gettext) {
2066 tail = element_get_tail(cur_parent);
2067 if (!tail)
2068 return NULL;
2069 }
2070 else
2071 tail = Py_None;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002072 Py_XDECREF(it->parent_stack->parent);
2073 PyObject_Free(it->parent_stack);
2074 it->parent_stack = next;
2075
2076 /* Note that extra condition on it->parent_stack->parent here;
2077 * this is because itertext() is supposed to only return *inner*
2078 * text, not text following the element it began iteration with.
2079 */
2080 if (it->parent_stack->parent && PyObject_IsTrue(tail)) {
2081 Py_INCREF(tail);
2082 return tail;
2083 }
2084 }
2085 }
2086
2087 return NULL;
2088}
2089
2090
2091static PyTypeObject ElementIter_Type = {
2092 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002093 /* Using the module's name since the pure-Python implementation does not
2094 have such a type. */
Eli Bendersky64d11e62012-06-15 07:42:50 +03002095 "_elementtree._element_iterator", /* tp_name */
2096 sizeof(ElementIterObject), /* tp_basicsize */
2097 0, /* tp_itemsize */
2098 /* methods */
2099 (destructor)elementiter_dealloc, /* tp_dealloc */
2100 0, /* tp_print */
2101 0, /* tp_getattr */
2102 0, /* tp_setattr */
2103 0, /* tp_reserved */
2104 0, /* tp_repr */
2105 0, /* tp_as_number */
2106 0, /* tp_as_sequence */
2107 0, /* tp_as_mapping */
2108 0, /* tp_hash */
2109 0, /* tp_call */
2110 0, /* tp_str */
2111 0, /* tp_getattro */
2112 0, /* tp_setattro */
2113 0, /* tp_as_buffer */
2114 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2115 0, /* tp_doc */
2116 (traverseproc)elementiter_traverse, /* tp_traverse */
2117 0, /* tp_clear */
2118 0, /* tp_richcompare */
2119 0, /* tp_weaklistoffset */
2120 PyObject_SelfIter, /* tp_iter */
2121 (iternextfunc)elementiter_next, /* tp_iternext */
2122 0, /* tp_methods */
2123 0, /* tp_members */
2124 0, /* tp_getset */
2125 0, /* tp_base */
2126 0, /* tp_dict */
2127 0, /* tp_descr_get */
2128 0, /* tp_descr_set */
2129 0, /* tp_dictoffset */
2130 0, /* tp_init */
2131 0, /* tp_alloc */
2132 0, /* tp_new */
2133};
2134
2135
2136static PyObject *
2137create_elementiter(ElementObject *self, PyObject *tag, int gettext)
2138{
2139 ElementIterObject *it;
2140 PyObject *star = NULL;
2141
2142 it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
2143 if (!it)
2144 return NULL;
2145 if (!(it->parent_stack = PyObject_Malloc(sizeof(ParentLocator)))) {
2146 PyObject_GC_Del(it);
2147 return NULL;
2148 }
2149
2150 it->parent_stack->parent = NULL;
2151 it->parent_stack->child_index = 0;
2152 it->parent_stack->next = NULL;
2153
2154 if (PyUnicode_Check(tag))
2155 star = PyUnicode_FromString("*");
2156 else if (PyBytes_Check(tag))
2157 star = PyBytes_FromString("*");
2158
2159 if (star && PyObject_RichCompareBool(tag, star, Py_EQ) == 1)
2160 tag = Py_None;
2161
2162 Py_XDECREF(star);
2163 it->sought_tag = tag;
2164 it->root_done = 0;
2165 it->gettext = gettext;
2166 it->root_element = self;
2167
2168 Py_INCREF(self);
2169 Py_INCREF(tag);
2170
2171 PyObject_GC_Track(it);
2172 return (PyObject *)it;
2173}
2174
2175
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002176/* ==================================================================== */
2177/* the tree builder type */
2178
2179typedef struct {
2180 PyObject_HEAD
2181
Eli Bendersky58d548d2012-05-29 15:45:16 +03002182 PyObject *root; /* root node (first created node) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002183
Antoine Pitrouee329312012-10-04 19:53:29 +02002184 PyObject *this; /* current node */
2185 PyObject *last; /* most recently created node */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002186
Eli Bendersky58d548d2012-05-29 15:45:16 +03002187 PyObject *data; /* data collector (string or list), or NULL */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002188
Eli Bendersky58d548d2012-05-29 15:45:16 +03002189 PyObject *stack; /* element stack */
2190 Py_ssize_t index; /* current stack size (0 means empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002191
Eli Bendersky48d358b2012-05-30 17:57:50 +03002192 PyObject *element_factory;
2193
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002194 /* element tracing */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002195 PyObject *events; /* list of events, or NULL if not collecting */
2196 PyObject *start_event_obj; /* event objects (NULL to ignore) */
2197 PyObject *end_event_obj;
2198 PyObject *start_ns_event_obj;
2199 PyObject *end_ns_event_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002200} TreeBuilderObject;
2201
Neal Norwitz227b5332006-03-22 09:28:35 +00002202static PyTypeObject TreeBuilder_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002203
Christian Heimes90aa7642007-12-19 02:45:37 +00002204#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002205
2206/* -------------------------------------------------------------------- */
2207/* constructor and destructor */
2208
Eli Bendersky58d548d2012-05-29 15:45:16 +03002209static PyObject *
2210treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002211{
Eli Bendersky58d548d2012-05-29 15:45:16 +03002212 TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
2213 if (t != NULL) {
2214 t->root = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002215
Eli Bendersky58d548d2012-05-29 15:45:16 +03002216 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002217 t->this = Py_None;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002218 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002219 t->last = Py_None;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002220
Eli Bendersky58d548d2012-05-29 15:45:16 +03002221 t->data = NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002222 t->element_factory = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002223 t->stack = PyList_New(20);
2224 if (!t->stack) {
2225 Py_DECREF(t->this);
2226 Py_DECREF(t->last);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002227 Py_DECREF((PyObject *) t);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002228 return NULL;
2229 }
2230 t->index = 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002231
Eli Bendersky58d548d2012-05-29 15:45:16 +03002232 t->events = NULL;
2233 t->start_event_obj = t->end_event_obj = NULL;
2234 t->start_ns_event_obj = t->end_ns_event_obj = NULL;
2235 }
2236 return (PyObject *)t;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002237}
2238
Eli Bendersky58d548d2012-05-29 15:45:16 +03002239static int
2240treebuilder_init(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002241{
Eli Benderskyc68e1362012-06-03 06:09:42 +03002242 static char *kwlist[] = {"element_factory", 0};
Eli Bendersky48d358b2012-05-30 17:57:50 +03002243 PyObject *element_factory = NULL;
2244 TreeBuilderObject *self_tb = (TreeBuilderObject *)self;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002245 PyObject *tmp;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002246
2247 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:TreeBuilder", kwlist,
2248 &element_factory)) {
2249 return -1;
2250 }
2251
2252 if (element_factory) {
2253 Py_INCREF(element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002254 tmp = self_tb->element_factory;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002255 self_tb->element_factory = element_factory;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002256 Py_XDECREF(tmp);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002257 }
2258
Eli Bendersky58d548d2012-05-29 15:45:16 +03002259 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002260}
2261
Eli Bendersky48d358b2012-05-30 17:57:50 +03002262static int
2263treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
2264{
2265 Py_VISIT(self->root);
2266 Py_VISIT(self->this);
2267 Py_VISIT(self->last);
2268 Py_VISIT(self->data);
2269 Py_VISIT(self->stack);
2270 Py_VISIT(self->element_factory);
2271 return 0;
2272}
2273
2274static int
2275treebuilder_gc_clear(TreeBuilderObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002276{
Antoine Pitrouc1948842012-10-01 23:40:37 +02002277 Py_CLEAR(self->end_ns_event_obj);
2278 Py_CLEAR(self->start_ns_event_obj);
2279 Py_CLEAR(self->end_event_obj);
2280 Py_CLEAR(self->start_event_obj);
2281 Py_CLEAR(self->events);
2282 Py_CLEAR(self->stack);
2283 Py_CLEAR(self->data);
2284 Py_CLEAR(self->last);
2285 Py_CLEAR(self->this);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002286 Py_CLEAR(self->element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002287 Py_CLEAR(self->root);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002288 return 0;
2289}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002290
Eli Bendersky48d358b2012-05-30 17:57:50 +03002291static void
2292treebuilder_dealloc(TreeBuilderObject *self)
2293{
2294 PyObject_GC_UnTrack(self);
2295 treebuilder_gc_clear(self);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002296 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002297}
2298
2299/* -------------------------------------------------------------------- */
Antoine Pitrouee329312012-10-04 19:53:29 +02002300/* helpers for handling of arbitrary element-like objects */
2301
2302static int
2303treebuilder_set_element_text_or_tail(PyObject *element, PyObject *data,
2304 PyObject **dest, _Py_Identifier *name)
2305{
2306 if (Element_CheckExact(element)) {
2307 Py_DECREF(JOIN_OBJ(*dest));
2308 *dest = JOIN_SET(data, PyList_CheckExact(data));
2309 return 0;
2310 }
2311 else {
2312 PyObject *joined = list_join(data);
2313 int r;
2314 if (joined == NULL)
2315 return -1;
2316 r = _PyObject_SetAttrId(element, name, joined);
2317 Py_DECREF(joined);
2318 return r;
2319 }
2320}
2321
2322/* These two functions steal a reference to data */
2323static int
2324treebuilder_set_element_text(PyObject *element, PyObject *data)
2325{
2326 _Py_IDENTIFIER(text);
2327 return treebuilder_set_element_text_or_tail(
2328 element, data, &((ElementObject *) element)->text, &PyId_text);
2329}
2330
2331static int
2332treebuilder_set_element_tail(PyObject *element, PyObject *data)
2333{
2334 _Py_IDENTIFIER(tail);
2335 return treebuilder_set_element_text_or_tail(
2336 element, data, &((ElementObject *) element)->tail, &PyId_tail);
2337}
2338
2339static int
2340treebuilder_add_subelement(PyObject *element, PyObject *child)
2341{
2342 _Py_IDENTIFIER(append);
2343 if (Element_CheckExact(element)) {
2344 ElementObject *elem = (ElementObject *) element;
2345 return element_add_subelement(elem, child);
2346 }
2347 else {
2348 PyObject *res;
2349 res = _PyObject_CallMethodId(element, &PyId_append, "O", child);
2350 if (res == NULL)
2351 return -1;
2352 Py_DECREF(res);
2353 return 0;
2354 }
2355}
2356
2357/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002358/* handlers */
2359
2360LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002361treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
2362 PyObject* attrib)
2363{
2364 PyObject* node;
2365 PyObject* this;
2366
2367 if (self->data) {
2368 if (self->this == self->last) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002369 if (treebuilder_set_element_text(self->last, self->data))
2370 return NULL;
2371 }
2372 else {
2373 if (treebuilder_set_element_tail(self->last, self->data))
2374 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002375 }
2376 self->data = NULL;
2377 }
2378
Eli Bendersky48d358b2012-05-30 17:57:50 +03002379 if (self->element_factory) {
2380 node = PyObject_CallFunction(self->element_factory, "OO", tag, attrib);
2381 } else {
2382 node = create_new_element(tag, attrib);
2383 }
2384 if (!node) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002385 return NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002386 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002387
Antoine Pitrouee329312012-10-04 19:53:29 +02002388 this = self->this;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002389
2390 if (this != Py_None) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002391 if (treebuilder_add_subelement(this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002392 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002393 } else {
2394 if (self->root) {
2395 PyErr_SetString(
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002396 elementtree_parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002397 "multiple elements on top level"
2398 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002399 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002400 }
2401 Py_INCREF(node);
2402 self->root = node;
2403 }
2404
2405 if (self->index < PyList_GET_SIZE(self->stack)) {
2406 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002407 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002408 Py_INCREF(this);
2409 } else {
2410 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002411 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002412 }
2413 self->index++;
2414
2415 Py_DECREF(this);
2416 Py_INCREF(node);
Antoine Pitrouee329312012-10-04 19:53:29 +02002417 self->this = node;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002418
2419 Py_DECREF(self->last);
2420 Py_INCREF(node);
Antoine Pitrouee329312012-10-04 19:53:29 +02002421 self->last = node;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002422
2423 if (self->start_event_obj) {
2424 PyObject* res;
2425 PyObject* action = self->start_event_obj;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002426 res = PyTuple_Pack(2, action, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002427 if (res) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002428 PyList_Append(self->events, res);
2429 Py_DECREF(res);
2430 } else
2431 PyErr_Clear(); /* FIXME: propagate error */
2432 }
2433
2434 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002435
2436 error:
2437 Py_DECREF(node);
2438 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002439}
2440
2441LOCAL(PyObject*)
2442treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
2443{
2444 if (!self->data) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002445 if (self->last == Py_None) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00002446 /* ignore calls to data before the first call to start */
2447 Py_RETURN_NONE;
2448 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002449 /* store the first item as is */
2450 Py_INCREF(data); self->data = data;
2451 } else {
2452 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00002453 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
2454 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002455 /* XXX this code path unused in Python 3? */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002456 /* expat often generates single character data sections; handle
2457 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00002458 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
2459 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002460 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00002461 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002462 } else if (PyList_CheckExact(self->data)) {
2463 if (PyList_Append(self->data, data) < 0)
2464 return NULL;
2465 } else {
2466 PyObject* list = PyList_New(2);
2467 if (!list)
2468 return NULL;
2469 PyList_SET_ITEM(list, 0, self->data);
2470 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2471 self->data = list;
2472 }
2473 }
2474
2475 Py_RETURN_NONE;
2476}
2477
2478LOCAL(PyObject*)
2479treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2480{
2481 PyObject* item;
2482
2483 if (self->data) {
2484 if (self->this == self->last) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002485 if (treebuilder_set_element_text(self->last, self->data))
2486 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002487 } else {
Antoine Pitrouee329312012-10-04 19:53:29 +02002488 if (treebuilder_set_element_tail(self->last, self->data))
2489 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002490 }
2491 self->data = NULL;
2492 }
2493
2494 if (self->index == 0) {
2495 PyErr_SetString(
2496 PyExc_IndexError,
2497 "pop from empty stack"
2498 );
2499 return NULL;
2500 }
2501
2502 self->index--;
2503
2504 item = PyList_GET_ITEM(self->stack, self->index);
2505 Py_INCREF(item);
2506
2507 Py_DECREF(self->last);
2508
Antoine Pitrouee329312012-10-04 19:53:29 +02002509 self->last = self->this;
2510 self->this = item;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002511
2512 if (self->end_event_obj) {
2513 PyObject* res;
2514 PyObject* action = self->end_event_obj;
2515 PyObject* node = (PyObject*) self->last;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002516 res = PyTuple_Pack(2, action, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002517 if (res) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002518 PyList_Append(self->events, res);
2519 Py_DECREF(res);
2520 } else
2521 PyErr_Clear(); /* FIXME: propagate error */
2522 }
2523
2524 Py_INCREF(self->last);
2525 return (PyObject*) self->last;
2526}
2527
2528LOCAL(void)
2529treebuilder_handle_namespace(TreeBuilderObject* self, int start,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002530 PyObject *prefix, PyObject *uri)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002531{
2532 PyObject* res;
2533 PyObject* action;
2534 PyObject* parcel;
2535
2536 if (!self->events)
2537 return;
2538
2539 if (start) {
2540 if (!self->start_ns_event_obj)
2541 return;
2542 action = self->start_ns_event_obj;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002543 parcel = Py_BuildValue("OO", prefix, uri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002544 if (!parcel)
2545 return;
2546 Py_INCREF(action);
2547 } else {
2548 if (!self->end_ns_event_obj)
2549 return;
2550 action = self->end_ns_event_obj;
2551 Py_INCREF(action);
2552 parcel = Py_None;
2553 Py_INCREF(parcel);
2554 }
2555
2556 res = PyTuple_New(2);
2557
2558 if (res) {
2559 PyTuple_SET_ITEM(res, 0, action);
2560 PyTuple_SET_ITEM(res, 1, parcel);
2561 PyList_Append(self->events, res);
2562 Py_DECREF(res);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002563 }
2564 else {
2565 Py_DECREF(action);
2566 Py_DECREF(parcel);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002567 PyErr_Clear(); /* FIXME: propagate error */
Antoine Pitrouc1948842012-10-01 23:40:37 +02002568 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002569}
2570
2571/* -------------------------------------------------------------------- */
2572/* methods (in alphabetical order) */
2573
2574static PyObject*
2575treebuilder_data(TreeBuilderObject* self, PyObject* args)
2576{
2577 PyObject* data;
2578 if (!PyArg_ParseTuple(args, "O:data", &data))
2579 return NULL;
2580
2581 return treebuilder_handle_data(self, data);
2582}
2583
2584static PyObject*
2585treebuilder_end(TreeBuilderObject* self, PyObject* args)
2586{
2587 PyObject* tag;
2588 if (!PyArg_ParseTuple(args, "O:end", &tag))
2589 return NULL;
2590
2591 return treebuilder_handle_end(self, tag);
2592}
2593
2594LOCAL(PyObject*)
2595treebuilder_done(TreeBuilderObject* self)
2596{
2597 PyObject* res;
2598
2599 /* FIXME: check stack size? */
2600
2601 if (self->root)
2602 res = self->root;
2603 else
2604 res = Py_None;
2605
2606 Py_INCREF(res);
2607 return res;
2608}
2609
2610static PyObject*
2611treebuilder_close(TreeBuilderObject* self, PyObject* args)
2612{
2613 if (!PyArg_ParseTuple(args, ":close"))
2614 return NULL;
2615
2616 return treebuilder_done(self);
2617}
2618
2619static PyObject*
2620treebuilder_start(TreeBuilderObject* self, PyObject* args)
2621{
2622 PyObject* tag;
2623 PyObject* attrib = Py_None;
2624 if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib))
2625 return NULL;
2626
2627 return treebuilder_handle_start(self, tag, attrib);
2628}
2629
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002630static PyMethodDef treebuilder_methods[] = {
2631 {"data", (PyCFunction) treebuilder_data, METH_VARARGS},
2632 {"start", (PyCFunction) treebuilder_start, METH_VARARGS},
2633 {"end", (PyCFunction) treebuilder_end, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002634 {"close", (PyCFunction) treebuilder_close, METH_VARARGS},
2635 {NULL, NULL}
2636};
2637
Neal Norwitz227b5332006-03-22 09:28:35 +00002638static PyTypeObject TreeBuilder_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002639 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002640 "xml.etree.ElementTree.TreeBuilder", sizeof(TreeBuilderObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002641 /* methods */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002642 (destructor)treebuilder_dealloc, /* tp_dealloc */
2643 0, /* tp_print */
2644 0, /* tp_getattr */
2645 0, /* tp_setattr */
2646 0, /* tp_reserved */
2647 0, /* tp_repr */
2648 0, /* tp_as_number */
2649 0, /* tp_as_sequence */
2650 0, /* tp_as_mapping */
2651 0, /* tp_hash */
2652 0, /* tp_call */
2653 0, /* tp_str */
2654 0, /* tp_getattro */
2655 0, /* tp_setattro */
2656 0, /* tp_as_buffer */
Eli Bendersky48d358b2012-05-30 17:57:50 +03002657 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
2658 /* tp_flags */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002659 0, /* tp_doc */
Eli Bendersky48d358b2012-05-30 17:57:50 +03002660 (traverseproc)treebuilder_gc_traverse, /* tp_traverse */
2661 (inquiry)treebuilder_gc_clear, /* tp_clear */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002662 0, /* tp_richcompare */
2663 0, /* tp_weaklistoffset */
2664 0, /* tp_iter */
2665 0, /* tp_iternext */
2666 treebuilder_methods, /* tp_methods */
2667 0, /* tp_members */
2668 0, /* tp_getset */
2669 0, /* tp_base */
2670 0, /* tp_dict */
2671 0, /* tp_descr_get */
2672 0, /* tp_descr_set */
2673 0, /* tp_dictoffset */
2674 (initproc)treebuilder_init, /* tp_init */
2675 PyType_GenericAlloc, /* tp_alloc */
2676 treebuilder_new, /* tp_new */
2677 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002678};
2679
2680/* ==================================================================== */
2681/* the expat interface */
2682
2683#if defined(USE_EXPAT)
2684
2685#include "expat.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002686#include "pyexpat.h"
Eli Bendersky20d41742012-06-01 09:48:37 +03002687static struct PyExpat_CAPI *expat_capi;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002688#define EXPAT(func) (expat_capi->func)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002689
Eli Bendersky52467b12012-06-01 07:13:08 +03002690static XML_Memory_Handling_Suite ExpatMemoryHandler = {
2691 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
2692
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002693typedef struct {
2694 PyObject_HEAD
2695
2696 XML_Parser parser;
2697
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002698 PyObject *target;
2699 PyObject *entity;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002700
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002701 PyObject *names;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002702
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002703 PyObject *handle_start;
2704 PyObject *handle_data;
2705 PyObject *handle_end;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002706
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002707 PyObject *handle_comment;
2708 PyObject *handle_pi;
2709 PyObject *handle_doctype;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002710
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002711 PyObject *handle_close;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002712
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002713} XMLParserObject;
2714
Neal Norwitz227b5332006-03-22 09:28:35 +00002715static PyTypeObject XMLParser_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002716
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002717#define XMLParser_CheckExact(op) (Py_TYPE(op) == &XMLParser_Type)
2718
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002719/* helpers */
2720
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002721LOCAL(PyObject*)
2722makeuniversal(XMLParserObject* self, const char* string)
2723{
2724 /* convert a UTF-8 tag/attribute name from the expat parser
2725 to a universal name string */
2726
Antoine Pitrouc1948842012-10-01 23:40:37 +02002727 Py_ssize_t size = (Py_ssize_t) strlen(string);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002728 PyObject* key;
2729 PyObject* value;
2730
2731 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00002732 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002733 if (!key)
2734 return NULL;
2735
2736 value = PyDict_GetItem(self->names, key);
2737
2738 if (value) {
2739 Py_INCREF(value);
2740 } else {
2741 /* new name. convert to universal name, and decode as
2742 necessary */
2743
2744 PyObject* tag;
2745 char* p;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002746 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002747
2748 /* look for namespace separator */
2749 for (i = 0; i < size; i++)
2750 if (string[i] == '}')
2751 break;
2752 if (i != size) {
2753 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002754 tag = PyBytes_FromStringAndSize(NULL, size+1);
2755 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002756 p[0] = '{';
2757 memcpy(p+1, string, size);
2758 size++;
2759 } else {
2760 /* plain name; use key as tag */
2761 Py_INCREF(key);
2762 tag = key;
2763 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002764
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002765 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002766 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00002767 value = PyUnicode_DecodeUTF8(p, size, "strict");
2768 Py_DECREF(tag);
2769 if (!value) {
2770 Py_DECREF(key);
2771 return NULL;
2772 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002773
2774 /* add to names dictionary */
2775 if (PyDict_SetItem(self->names, key, value) < 0) {
2776 Py_DECREF(key);
2777 Py_DECREF(value);
2778 return NULL;
2779 }
2780 }
2781
2782 Py_DECREF(key);
2783 return value;
2784}
2785
Eli Bendersky5b77d812012-03-16 08:20:05 +02002786/* Set the ParseError exception with the given parameters.
2787 * If message is not NULL, it's used as the error string. Otherwise, the
2788 * message string is the default for the given error_code.
2789*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002790static void
Eli Bendersky5b77d812012-03-16 08:20:05 +02002791expat_set_error(enum XML_Error error_code, int line, int column, char *message)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002792{
Eli Bendersky5b77d812012-03-16 08:20:05 +02002793 PyObject *errmsg, *error, *position, *code;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002794
Victor Stinner499dfcf2011-03-21 13:26:24 +01002795 errmsg = PyUnicode_FromFormat("%s: line %d, column %d",
Eli Bendersky5b77d812012-03-16 08:20:05 +02002796 message ? message : EXPAT(ErrorString)(error_code),
2797 line, column);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002798 if (errmsg == NULL)
2799 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002800
Victor Stinner499dfcf2011-03-21 13:26:24 +01002801 error = PyObject_CallFunction(elementtree_parseerror_obj, "O", errmsg);
2802 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002803 if (!error)
2804 return;
2805
Eli Bendersky5b77d812012-03-16 08:20:05 +02002806 /* Add code and position attributes */
2807 code = PyLong_FromLong((long)error_code);
2808 if (!code) {
2809 Py_DECREF(error);
2810 return;
2811 }
2812 if (PyObject_SetAttrString(error, "code", code) == -1) {
2813 Py_DECREF(error);
2814 Py_DECREF(code);
2815 return;
2816 }
2817 Py_DECREF(code);
2818
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002819 position = Py_BuildValue("(ii)", line, column);
2820 if (!position) {
2821 Py_DECREF(error);
2822 return;
2823 }
2824 if (PyObject_SetAttrString(error, "position", position) == -1) {
2825 Py_DECREF(error);
2826 Py_DECREF(position);
2827 return;
2828 }
2829 Py_DECREF(position);
2830
2831 PyErr_SetObject(elementtree_parseerror_obj, error);
2832 Py_DECREF(error);
2833}
2834
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002835/* -------------------------------------------------------------------- */
2836/* handlers */
2837
2838static void
2839expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2840 int data_len)
2841{
2842 PyObject* key;
2843 PyObject* value;
2844 PyObject* res;
2845
2846 if (data_len < 2 || data_in[0] != '&')
2847 return;
2848
Neal Norwitz0269b912007-08-08 06:56:02 +00002849 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002850 if (!key)
2851 return;
2852
2853 value = PyDict_GetItem(self->entity, key);
2854
2855 if (value) {
2856 if (TreeBuilder_CheckExact(self->target))
2857 res = treebuilder_handle_data(
2858 (TreeBuilderObject*) self->target, value
2859 );
2860 else if (self->handle_data)
2861 res = PyObject_CallFunction(self->handle_data, "O", value);
2862 else
2863 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002864 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002865 } else if (!PyErr_Occurred()) {
2866 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00002867 char message[128] = "undefined entity ";
2868 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002869 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02002870 XML_ERROR_UNDEFINED_ENTITY,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002871 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02002872 EXPAT(GetErrorColumnNumber)(self->parser),
2873 message
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002874 );
2875 }
2876
2877 Py_DECREF(key);
2878}
2879
2880static void
2881expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2882 const XML_Char **attrib_in)
2883{
2884 PyObject* res;
2885 PyObject* tag;
2886 PyObject* attrib;
2887 int ok;
2888
2889 /* tag name */
2890 tag = makeuniversal(self, tag_in);
2891 if (!tag)
2892 return; /* parser will look for errors */
2893
2894 /* attributes */
2895 if (attrib_in[0]) {
2896 attrib = PyDict_New();
2897 if (!attrib)
2898 return;
2899 while (attrib_in[0] && attrib_in[1]) {
2900 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00002901 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002902 if (!key || !value) {
2903 Py_XDECREF(value);
2904 Py_XDECREF(key);
2905 Py_DECREF(attrib);
2906 return;
2907 }
2908 ok = PyDict_SetItem(attrib, key, value);
2909 Py_DECREF(value);
2910 Py_DECREF(key);
2911 if (ok < 0) {
2912 Py_DECREF(attrib);
2913 return;
2914 }
2915 attrib_in += 2;
2916 }
2917 } else {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002918 /* Pass an empty dictionary on */
Eli Bendersky48d358b2012-05-30 17:57:50 +03002919 attrib = PyDict_New();
2920 if (!attrib)
2921 return;
2922 }
2923
2924 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002925 /* shortcut */
2926 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2927 tag, attrib);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002928 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002929 else if (self->handle_start) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002930 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002931 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002932 res = NULL;
2933
2934 Py_DECREF(tag);
2935 Py_DECREF(attrib);
2936
2937 Py_XDECREF(res);
2938}
2939
2940static void
2941expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2942 int data_len)
2943{
2944 PyObject* data;
2945 PyObject* res;
2946
Neal Norwitz0269b912007-08-08 06:56:02 +00002947 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002948 if (!data)
2949 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002950
2951 if (TreeBuilder_CheckExact(self->target))
2952 /* shortcut */
2953 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
2954 else if (self->handle_data)
2955 res = PyObject_CallFunction(self->handle_data, "O", data);
2956 else
2957 res = NULL;
2958
2959 Py_DECREF(data);
2960
2961 Py_XDECREF(res);
2962}
2963
2964static void
2965expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
2966{
2967 PyObject* tag;
2968 PyObject* res = NULL;
2969
2970 if (TreeBuilder_CheckExact(self->target))
2971 /* shortcut */
2972 /* the standard tree builder doesn't look at the end tag */
2973 res = treebuilder_handle_end(
2974 (TreeBuilderObject*) self->target, Py_None
2975 );
2976 else if (self->handle_end) {
2977 tag = makeuniversal(self, tag_in);
2978 if (tag) {
2979 res = PyObject_CallFunction(self->handle_end, "O", tag);
2980 Py_DECREF(tag);
2981 }
2982 }
2983
2984 Py_XDECREF(res);
2985}
2986
2987static void
2988expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
2989 const XML_Char *uri)
2990{
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002991 PyObject* sprefix = NULL;
2992 PyObject* suri = NULL;
2993
2994 suri = PyUnicode_DecodeUTF8(uri, strlen(uri), "strict");
2995 if (!suri)
2996 return;
2997
2998 if (prefix)
2999 sprefix = PyUnicode_DecodeUTF8(prefix, strlen(prefix), "strict");
3000 else
3001 sprefix = PyUnicode_FromString("");
3002 if (!sprefix) {
3003 Py_DECREF(suri);
3004 return;
3005 }
3006
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003007 treebuilder_handle_namespace(
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003008 (TreeBuilderObject*) self->target, 1, sprefix, suri
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003009 );
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003010
3011 Py_DECREF(sprefix);
3012 Py_DECREF(suri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003013}
3014
3015static void
3016expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
3017{
3018 treebuilder_handle_namespace(
3019 (TreeBuilderObject*) self->target, 0, NULL, NULL
3020 );
3021}
3022
3023static void
3024expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
3025{
3026 PyObject* comment;
3027 PyObject* res;
3028
3029 if (self->handle_comment) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003030 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003031 if (comment) {
3032 res = PyObject_CallFunction(self->handle_comment, "O", comment);
3033 Py_XDECREF(res);
3034 Py_DECREF(comment);
3035 }
3036 }
3037}
3038
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003039static void
3040expat_start_doctype_handler(XMLParserObject *self,
3041 const XML_Char *doctype_name,
3042 const XML_Char *sysid,
3043 const XML_Char *pubid,
3044 int has_internal_subset)
3045{
3046 PyObject *self_pyobj = (PyObject *)self;
3047 PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
3048 PyObject *parser_doctype = NULL;
3049 PyObject *res = NULL;
3050
3051 doctype_name_obj = makeuniversal(self, doctype_name);
3052 if (!doctype_name_obj)
3053 return;
3054
3055 if (sysid) {
3056 sysid_obj = makeuniversal(self, sysid);
3057 if (!sysid_obj) {
3058 Py_DECREF(doctype_name_obj);
3059 return;
3060 }
3061 } else {
3062 Py_INCREF(Py_None);
3063 sysid_obj = Py_None;
3064 }
3065
3066 if (pubid) {
3067 pubid_obj = makeuniversal(self, pubid);
3068 if (!pubid_obj) {
3069 Py_DECREF(doctype_name_obj);
3070 Py_DECREF(sysid_obj);
3071 return;
3072 }
3073 } else {
3074 Py_INCREF(Py_None);
3075 pubid_obj = Py_None;
3076 }
3077
3078 /* If the target has a handler for doctype, call it. */
3079 if (self->handle_doctype) {
3080 res = PyObject_CallFunction(self->handle_doctype, "OOO",
3081 doctype_name_obj, pubid_obj, sysid_obj);
3082 Py_CLEAR(res);
3083 }
3084
3085 /* Now see if the parser itself has a doctype method. If yes and it's
3086 * a subclass, call it but warn about deprecation. If it's not a subclass
3087 * (i.e. vanilla XMLParser), do nothing.
3088 */
3089 parser_doctype = PyObject_GetAttrString(self_pyobj, "doctype");
3090 if (parser_doctype) {
3091 if (!XMLParser_CheckExact(self_pyobj)) {
3092 if (PyErr_WarnEx(PyExc_DeprecationWarning,
3093 "This method of XMLParser is deprecated. Define"
3094 " doctype() method on the TreeBuilder target.",
3095 1) < 0) {
3096 goto clear;
3097 }
3098 res = PyObject_CallFunction(parser_doctype, "OOO",
3099 doctype_name_obj, pubid_obj, sysid_obj);
3100 Py_CLEAR(res);
3101 }
3102 }
3103
3104clear:
3105 Py_XDECREF(parser_doctype);
3106 Py_DECREF(doctype_name_obj);
3107 Py_DECREF(pubid_obj);
3108 Py_DECREF(sysid_obj);
3109}
3110
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003111static void
3112expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
3113 const XML_Char* data_in)
3114{
3115 PyObject* target;
3116 PyObject* data;
3117 PyObject* res;
3118
3119 if (self->handle_pi) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003120 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3121 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003122 if (target && data) {
3123 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
3124 Py_XDECREF(res);
3125 Py_DECREF(data);
3126 Py_DECREF(target);
3127 } else {
3128 Py_XDECREF(data);
3129 Py_XDECREF(target);
3130 }
3131 }
3132}
3133
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003134static int
3135expat_unknown_encoding_handler(XMLParserObject *self, const XML_Char *name,
3136 XML_Encoding *info)
3137{
3138 PyObject* u;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003139 unsigned char s[256];
3140 int i;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003141 void *data;
3142 unsigned int kind;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003143
3144 memset(info, 0, sizeof(XML_Encoding));
3145
3146 for (i = 0; i < 256; i++)
3147 s[i] = i;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003148
Fredrik Lundhc3389992005-12-25 11:40:19 +00003149 u = PyUnicode_Decode((char*) s, 256, name, "replace");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003150 if (!u)
3151 return XML_STATUS_ERROR;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003152 if (PyUnicode_READY(u))
3153 return XML_STATUS_ERROR;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003154
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003155 if (PyUnicode_GET_LENGTH(u) != 256) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003156 Py_DECREF(u);
3157 return XML_STATUS_ERROR;
3158 }
3159
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003160 kind = PyUnicode_KIND(u);
3161 data = PyUnicode_DATA(u);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003162 for (i = 0; i < 256; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003163 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
3164 if (ch != Py_UNICODE_REPLACEMENT_CHARACTER)
3165 info->map[i] = ch;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003166 else
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003167 info->map[i] = -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003168 }
3169
3170 Py_DECREF(u);
3171
3172 return XML_STATUS_OK;
3173}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003174
3175/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003176
Eli Bendersky52467b12012-06-01 07:13:08 +03003177static PyObject *
3178xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003179{
Eli Bendersky52467b12012-06-01 07:13:08 +03003180 XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
3181 if (self) {
3182 self->parser = NULL;
3183 self->target = self->entity = self->names = NULL;
3184 self->handle_start = self->handle_data = self->handle_end = NULL;
3185 self->handle_comment = self->handle_pi = self->handle_close = NULL;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003186 self->handle_doctype = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003187 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003188 return (PyObject *)self;
3189}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003190
Eli Bendersky52467b12012-06-01 07:13:08 +03003191static int
3192xmlparser_init(PyObject *self, PyObject *args, PyObject *kwds)
3193{
3194 XMLParserObject *self_xp = (XMLParserObject *)self;
3195 PyObject *target = NULL, *html = NULL;
3196 char *encoding = NULL;
Eli Benderskyc68e1362012-06-03 06:09:42 +03003197 static char *kwlist[] = {"html", "target", "encoding", 0};
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003198
Eli Bendersky52467b12012-06-01 07:13:08 +03003199 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|OOz:XMLParser", kwlist,
3200 &html, &target, &encoding)) {
3201 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003202 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003203
Eli Bendersky52467b12012-06-01 07:13:08 +03003204 self_xp->entity = PyDict_New();
3205 if (!self_xp->entity)
3206 return -1;
3207
3208 self_xp->names = PyDict_New();
3209 if (!self_xp->names) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02003210 Py_CLEAR(self_xp->entity);
Eli Bendersky52467b12012-06-01 07:13:08 +03003211 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003212 }
3213
Eli Bendersky52467b12012-06-01 07:13:08 +03003214 self_xp->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
3215 if (!self_xp->parser) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02003216 Py_CLEAR(self_xp->entity);
3217 Py_CLEAR(self_xp->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003218 PyErr_NoMemory();
Eli Bendersky52467b12012-06-01 07:13:08 +03003219 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003220 }
3221
Eli Bendersky52467b12012-06-01 07:13:08 +03003222 if (target) {
3223 Py_INCREF(target);
3224 } else {
Eli Bendersky58d548d2012-05-29 15:45:16 +03003225 target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003226 if (!target) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02003227 Py_CLEAR(self_xp->entity);
3228 Py_CLEAR(self_xp->names);
Eli Bendersky52467b12012-06-01 07:13:08 +03003229 EXPAT(ParserFree)(self_xp->parser);
3230 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003231 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003232 }
3233 self_xp->target = target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003234
Eli Bendersky52467b12012-06-01 07:13:08 +03003235 self_xp->handle_start = PyObject_GetAttrString(target, "start");
3236 self_xp->handle_data = PyObject_GetAttrString(target, "data");
3237 self_xp->handle_end = PyObject_GetAttrString(target, "end");
3238 self_xp->handle_comment = PyObject_GetAttrString(target, "comment");
3239 self_xp->handle_pi = PyObject_GetAttrString(target, "pi");
3240 self_xp->handle_close = PyObject_GetAttrString(target, "close");
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003241 self_xp->handle_doctype = PyObject_GetAttrString(target, "doctype");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003242
3243 PyErr_Clear();
Eli Bendersky52467b12012-06-01 07:13:08 +03003244
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003245 /* configure parser */
Eli Bendersky52467b12012-06-01 07:13:08 +03003246 EXPAT(SetUserData)(self_xp->parser, self_xp);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003247 EXPAT(SetElementHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003248 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003249 (XML_StartElementHandler) expat_start_handler,
3250 (XML_EndElementHandler) expat_end_handler
3251 );
3252 EXPAT(SetDefaultHandlerExpand)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003253 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003254 (XML_DefaultHandler) expat_default_handler
3255 );
3256 EXPAT(SetCharacterDataHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003257 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003258 (XML_CharacterDataHandler) expat_data_handler
3259 );
Eli Bendersky52467b12012-06-01 07:13:08 +03003260 if (self_xp->handle_comment)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003261 EXPAT(SetCommentHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003262 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003263 (XML_CommentHandler) expat_comment_handler
3264 );
Eli Bendersky52467b12012-06-01 07:13:08 +03003265 if (self_xp->handle_pi)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003266 EXPAT(SetProcessingInstructionHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003267 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003268 (XML_ProcessingInstructionHandler) expat_pi_handler
3269 );
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003270 EXPAT(SetStartDoctypeDeclHandler)(
3271 self_xp->parser,
3272 (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
3273 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003274 EXPAT(SetUnknownEncodingHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003275 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003276 (XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL
3277 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003278
Eli Bendersky52467b12012-06-01 07:13:08 +03003279 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003280}
3281
Eli Bendersky52467b12012-06-01 07:13:08 +03003282static int
3283xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
3284{
3285 Py_VISIT(self->handle_close);
3286 Py_VISIT(self->handle_pi);
3287 Py_VISIT(self->handle_comment);
3288 Py_VISIT(self->handle_end);
3289 Py_VISIT(self->handle_data);
3290 Py_VISIT(self->handle_start);
3291
3292 Py_VISIT(self->target);
3293 Py_VISIT(self->entity);
3294 Py_VISIT(self->names);
3295
3296 return 0;
3297}
3298
3299static int
3300xmlparser_gc_clear(XMLParserObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003301{
3302 EXPAT(ParserFree)(self->parser);
3303
Antoine Pitrouc1948842012-10-01 23:40:37 +02003304 Py_CLEAR(self->handle_close);
3305 Py_CLEAR(self->handle_pi);
3306 Py_CLEAR(self->handle_comment);
3307 Py_CLEAR(self->handle_end);
3308 Py_CLEAR(self->handle_data);
3309 Py_CLEAR(self->handle_start);
3310 Py_CLEAR(self->handle_doctype);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003311
Antoine Pitrouc1948842012-10-01 23:40:37 +02003312 Py_CLEAR(self->target);
3313 Py_CLEAR(self->entity);
3314 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003315
Eli Bendersky52467b12012-06-01 07:13:08 +03003316 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003317}
3318
Eli Bendersky52467b12012-06-01 07:13:08 +03003319static void
3320xmlparser_dealloc(XMLParserObject* self)
3321{
3322 PyObject_GC_UnTrack(self);
3323 xmlparser_gc_clear(self);
3324 Py_TYPE(self)->tp_free((PyObject *)self);
3325}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003326
3327LOCAL(PyObject*)
3328expat_parse(XMLParserObject* self, char* data, int data_len, int final)
3329{
3330 int ok;
3331
3332 ok = EXPAT(Parse)(self->parser, data, data_len, final);
3333
3334 if (PyErr_Occurred())
3335 return NULL;
3336
3337 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003338 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02003339 EXPAT(GetErrorCode)(self->parser),
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003340 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02003341 EXPAT(GetErrorColumnNumber)(self->parser),
3342 NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003343 );
3344 return NULL;
3345 }
3346
3347 Py_RETURN_NONE;
3348}
3349
3350static PyObject*
3351xmlparser_close(XMLParserObject* self, PyObject* args)
3352{
3353 /* end feeding data to parser */
3354
3355 PyObject* res;
3356 if (!PyArg_ParseTuple(args, ":close"))
3357 return NULL;
3358
3359 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003360 if (!res)
3361 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003362
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003363 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003364 Py_DECREF(res);
3365 return treebuilder_done((TreeBuilderObject*) self->target);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003366 } if (self->handle_close) {
3367 Py_DECREF(res);
3368 return PyObject_CallFunction(self->handle_close, "");
3369 } else
3370 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003371}
3372
3373static PyObject*
3374xmlparser_feed(XMLParserObject* self, PyObject* args)
3375{
3376 /* feed data to parser */
3377
3378 char* data;
3379 int data_len;
3380 if (!PyArg_ParseTuple(args, "s#:feed", &data, &data_len))
3381 return NULL;
3382
3383 return expat_parse(self, data, data_len, 0);
3384}
3385
3386static PyObject*
3387xmlparser_parse(XMLParserObject* self, PyObject* args)
3388{
3389 /* (internal) parse until end of input stream */
3390
3391 PyObject* reader;
3392 PyObject* buffer;
Eli Benderskyf996e772012-03-16 05:53:30 +02003393 PyObject* temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003394 PyObject* res;
3395
3396 PyObject* fileobj;
3397 if (!PyArg_ParseTuple(args, "O:_parse", &fileobj))
3398 return NULL;
3399
3400 reader = PyObject_GetAttrString(fileobj, "read");
3401 if (!reader)
3402 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003403
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003404 /* read from open file object */
3405 for (;;) {
3406
3407 buffer = PyObject_CallFunction(reader, "i", 64*1024);
3408
3409 if (!buffer) {
3410 /* read failed (e.g. due to KeyboardInterrupt) */
3411 Py_DECREF(reader);
3412 return NULL;
3413 }
3414
Eli Benderskyf996e772012-03-16 05:53:30 +02003415 if (PyUnicode_CheckExact(buffer)) {
3416 /* A unicode object is encoded into bytes using UTF-8 */
3417 if (PyUnicode_GET_SIZE(buffer) == 0) {
3418 Py_DECREF(buffer);
3419 break;
3420 }
3421 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
Antoine Pitrouc1948842012-10-01 23:40:37 +02003422 Py_DECREF(buffer);
Eli Benderskyf996e772012-03-16 05:53:30 +02003423 if (!temp) {
3424 /* Propagate exception from PyUnicode_AsEncodedString */
Eli Benderskyf996e772012-03-16 05:53:30 +02003425 Py_DECREF(reader);
3426 return NULL;
3427 }
Eli Benderskyf996e772012-03-16 05:53:30 +02003428 buffer = temp;
3429 }
3430 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003431 Py_DECREF(buffer);
3432 break;
3433 }
3434
3435 res = expat_parse(
Christian Heimes72b710a2008-05-26 13:28:38 +00003436 self, PyBytes_AS_STRING(buffer), PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003437 );
3438
3439 Py_DECREF(buffer);
3440
3441 if (!res) {
3442 Py_DECREF(reader);
3443 return NULL;
3444 }
3445 Py_DECREF(res);
3446
3447 }
3448
3449 Py_DECREF(reader);
3450
3451 res = expat_parse(self, "", 0, 1);
3452
3453 if (res && TreeBuilder_CheckExact(self->target)) {
3454 Py_DECREF(res);
3455 return treebuilder_done((TreeBuilderObject*) self->target);
3456 }
3457
3458 return res;
3459}
3460
3461static PyObject*
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003462xmlparser_doctype(XMLParserObject *self, PyObject *args)
3463{
3464 Py_RETURN_NONE;
3465}
3466
3467static PyObject*
3468xmlparser_setevents(XMLParserObject *self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003469{
3470 /* activate element event reporting */
3471
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003472 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003473 TreeBuilderObject* target;
3474
3475 PyObject* events; /* event collector */
3476 PyObject* event_set = Py_None;
3477 if (!PyArg_ParseTuple(args, "O!|O:_setevents", &PyList_Type, &events,
3478 &event_set))
3479 return NULL;
3480
3481 if (!TreeBuilder_CheckExact(self->target)) {
3482 PyErr_SetString(
3483 PyExc_TypeError,
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003484 "event handling only supported for ElementTree.TreeBuilder "
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003485 "targets"
3486 );
3487 return NULL;
3488 }
3489
3490 target = (TreeBuilderObject*) self->target;
3491
3492 Py_INCREF(events);
3493 Py_XDECREF(target->events);
3494 target->events = events;
3495
3496 /* clear out existing events */
Antoine Pitrouc1948842012-10-01 23:40:37 +02003497 Py_CLEAR(target->start_event_obj);
3498 Py_CLEAR(target->end_event_obj);
3499 Py_CLEAR(target->start_ns_event_obj);
3500 Py_CLEAR(target->end_ns_event_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003501
3502 if (event_set == Py_None) {
3503 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003504 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003505 Py_RETURN_NONE;
3506 }
3507
3508 if (!PyTuple_Check(event_set)) /* FIXME: handle arbitrary sequences */
3509 goto error;
3510
3511 for (i = 0; i < PyTuple_GET_SIZE(event_set); i++) {
3512 PyObject* item = PyTuple_GET_ITEM(event_set, i);
3513 char* event;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003514 if (PyUnicode_Check(item)) {
3515 event = _PyUnicode_AsString(item);
Victor Stinner0477bf32010-03-22 12:11:44 +00003516 if (event == NULL)
3517 goto error;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003518 } else if (PyBytes_Check(item))
3519 event = PyBytes_AS_STRING(item);
3520 else {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003521 goto error;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003522 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003523 if (strcmp(event, "start") == 0) {
3524 Py_INCREF(item);
3525 target->start_event_obj = item;
3526 } else if (strcmp(event, "end") == 0) {
3527 Py_INCREF(item);
3528 Py_XDECREF(target->end_event_obj);
3529 target->end_event_obj = item;
3530 } else if (strcmp(event, "start-ns") == 0) {
3531 Py_INCREF(item);
3532 Py_XDECREF(target->start_ns_event_obj);
3533 target->start_ns_event_obj = item;
3534 EXPAT(SetNamespaceDeclHandler)(
3535 self->parser,
3536 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3537 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3538 );
3539 } else if (strcmp(event, "end-ns") == 0) {
3540 Py_INCREF(item);
3541 Py_XDECREF(target->end_ns_event_obj);
3542 target->end_ns_event_obj = item;
3543 EXPAT(SetNamespaceDeclHandler)(
3544 self->parser,
3545 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3546 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3547 );
3548 } else {
3549 PyErr_Format(
3550 PyExc_ValueError,
3551 "unknown event '%s'", event
3552 );
3553 return NULL;
3554 }
3555 }
3556
3557 Py_RETURN_NONE;
3558
3559 error:
3560 PyErr_SetString(
3561 PyExc_TypeError,
3562 "invalid event tuple"
3563 );
3564 return NULL;
3565}
3566
3567static PyMethodDef xmlparser_methods[] = {
3568 {"feed", (PyCFunction) xmlparser_feed, METH_VARARGS},
3569 {"close", (PyCFunction) xmlparser_close, METH_VARARGS},
3570 {"_parse", (PyCFunction) xmlparser_parse, METH_VARARGS},
3571 {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003572 {"doctype", (PyCFunction) xmlparser_doctype, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003573 {NULL, NULL}
3574};
3575
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003576static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003577xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003578{
Alexander Belopolskye239d232010-12-08 23:31:48 +00003579 if (PyUnicode_Check(nameobj)) {
3580 PyObject* res;
3581 if (PyUnicode_CompareWithASCIIString(nameobj, "entity") == 0)
3582 res = self->entity;
3583 else if (PyUnicode_CompareWithASCIIString(nameobj, "target") == 0)
3584 res = self->target;
3585 else if (PyUnicode_CompareWithASCIIString(nameobj, "version") == 0) {
3586 return PyUnicode_FromFormat(
3587 "Expat %d.%d.%d", XML_MAJOR_VERSION,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003588 XML_MINOR_VERSION, XML_MICRO_VERSION);
Alexander Belopolskye239d232010-12-08 23:31:48 +00003589 }
3590 else
3591 goto generic;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003592
Alexander Belopolskye239d232010-12-08 23:31:48 +00003593 Py_INCREF(res);
3594 return res;
3595 }
3596 generic:
3597 return PyObject_GenericGetAttr((PyObject*) self, nameobj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003598}
3599
Neal Norwitz227b5332006-03-22 09:28:35 +00003600static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003601 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08003602 "xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003603 /* methods */
Eli Bendersky52467b12012-06-01 07:13:08 +03003604 (destructor)xmlparser_dealloc, /* tp_dealloc */
3605 0, /* tp_print */
3606 0, /* tp_getattr */
3607 0, /* tp_setattr */
3608 0, /* tp_reserved */
3609 0, /* tp_repr */
3610 0, /* tp_as_number */
3611 0, /* tp_as_sequence */
3612 0, /* tp_as_mapping */
3613 0, /* tp_hash */
3614 0, /* tp_call */
3615 0, /* tp_str */
3616 (getattrofunc)xmlparser_getattro, /* tp_getattro */
3617 0, /* tp_setattro */
3618 0, /* tp_as_buffer */
3619 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3620 /* tp_flags */
3621 0, /* tp_doc */
3622 (traverseproc)xmlparser_gc_traverse, /* tp_traverse */
3623 (inquiry)xmlparser_gc_clear, /* tp_clear */
3624 0, /* tp_richcompare */
3625 0, /* tp_weaklistoffset */
3626 0, /* tp_iter */
3627 0, /* tp_iternext */
3628 xmlparser_methods, /* tp_methods */
3629 0, /* tp_members */
3630 0, /* tp_getset */
3631 0, /* tp_base */
3632 0, /* tp_dict */
3633 0, /* tp_descr_get */
3634 0, /* tp_descr_set */
3635 0, /* tp_dictoffset */
3636 (initproc)xmlparser_init, /* tp_init */
3637 PyType_GenericAlloc, /* tp_alloc */
3638 xmlparser_new, /* tp_new */
3639 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003640};
3641
3642#endif
3643
3644/* ==================================================================== */
3645/* python module interface */
3646
3647static PyMethodDef _functions[] = {
Eli Benderskya8736902013-01-05 06:26:39 -08003648 {"SubElement", (PyCFunction) subelement, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003649 {NULL, NULL}
3650};
3651
Martin v. Löwis1a214512008-06-11 05:26:20 +00003652
3653static struct PyModuleDef _elementtreemodule = {
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003654 PyModuleDef_HEAD_INIT,
3655 "_elementtree",
3656 NULL,
3657 -1,
3658 _functions,
3659 NULL,
3660 NULL,
3661 NULL,
3662 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00003663};
3664
Neal Norwitzf6657e62006-12-28 04:47:50 +00003665PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00003666PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003667{
Eli Bendersky64d11e62012-06-15 07:42:50 +03003668 PyObject *m, *temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003669
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003670 /* Initialize object types */
3671 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003672 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003673 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003674 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003675#if defined(USE_EXPAT)
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003676 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003677 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003678#endif
3679
Martin v. Löwis1a214512008-06-11 05:26:20 +00003680 m = PyModule_Create(&_elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003681 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00003682 return NULL;
3683
Eli Bendersky828efde2012-04-05 05:40:58 +03003684 if (!(temp = PyImport_ImportModule("copy")))
3685 return NULL;
3686 elementtree_deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
3687 Py_XDECREF(temp);
3688
3689 if (!(elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
3690 return NULL;
3691
Eli Bendersky20d41742012-06-01 09:48:37 +03003692 /* link against pyexpat */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003693 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
3694 if (expat_capi) {
3695 /* check that it's usable */
3696 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
3697 expat_capi->size < sizeof(struct PyExpat_CAPI) ||
3698 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
3699 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
Eli Bendersky52467b12012-06-01 07:13:08 +03003700 expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
Eli Benderskyef391ac2012-07-21 20:28:46 +03003701 PyErr_SetString(PyExc_ImportError,
3702 "pyexpat version is incompatible");
3703 return NULL;
Eli Bendersky52467b12012-06-01 07:13:08 +03003704 }
Eli Benderskyef391ac2012-07-21 20:28:46 +03003705 } else {
Eli Bendersky52467b12012-06-01 07:13:08 +03003706 return NULL;
Eli Benderskyef391ac2012-07-21 20:28:46 +03003707 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003708
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003709 elementtree_parseerror_obj = PyErr_NewException(
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003710 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003711 );
3712 Py_INCREF(elementtree_parseerror_obj);
3713 PyModule_AddObject(m, "ParseError", elementtree_parseerror_obj);
3714
Eli Bendersky092af1f2012-03-04 07:14:03 +02003715 Py_INCREF((PyObject *)&Element_Type);
3716 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
3717
Eli Bendersky58d548d2012-05-29 15:45:16 +03003718 Py_INCREF((PyObject *)&TreeBuilder_Type);
3719 PyModule_AddObject(m, "TreeBuilder", (PyObject *)&TreeBuilder_Type);
3720
Eli Bendersky52467b12012-06-01 07:13:08 +03003721#if defined(USE_EXPAT)
3722 Py_INCREF((PyObject *)&XMLParser_Type);
3723 PyModule_AddObject(m, "XMLParser", (PyObject *)&XMLParser_Type);
3724#endif
3725
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003726 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003727}