blob: 5078b8372f838e7f398d8e9ef5f5acfa35b5b585 [file] [log] [blame]
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001/*
2 * ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003 * $Id: _elementtree.c 3473 2009-01-11 22:53:55Z fredrik $
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
5 * elementtree accelerator
6 *
7 * History:
8 * 1999-06-20 fl created (as part of sgmlop)
9 * 2001-05-29 fl effdom edition
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000010 * 2003-02-27 fl elementtree edition (alpha)
11 * 2004-06-03 fl updates for elementtree 1.2
Florent Xiclunaf15351d2010-03-13 23:24:31 +000012 * 2005-01-05 fl major optimization effort
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000013 * 2005-01-11 fl first public release (cElementTree 0.8)
14 * 2005-01-12 fl split element object into base and extras
15 * 2005-01-13 fl use tagged pointers for tail/text (cElementTree 0.9)
16 * 2005-01-17 fl added treebuilder close method
17 * 2005-01-17 fl fixed crash in getchildren
18 * 2005-01-18 fl removed observer api, added iterparse (cElementTree 0.9.3)
19 * 2005-01-23 fl revised iterparse api; added namespace event support (0.9.8)
20 * 2005-01-26 fl added VERSION module property (cElementTree 1.0)
21 * 2005-01-28 fl added remove method (1.0.1)
22 * 2005-03-01 fl added iselement function; fixed makeelement aliasing (1.0.2)
23 * 2005-03-13 fl export Comment and ProcessingInstruction/PI helpers
24 * 2005-03-26 fl added Comment and PI support to XMLParser
25 * 2005-03-27 fl event optimizations; complain about bogus events
26 * 2005-08-08 fl fixed read error handling in parse
27 * 2005-08-11 fl added runtime test for copy workaround (1.0.3)
28 * 2005-12-13 fl added expat_capi support (for xml.etree) (1.0.4)
29 * 2005-12-16 fl added support for non-standard encodings
Fredrik Lundh44ed4db2006-03-12 21:06:35 +000030 * 2006-03-08 fl fixed a couple of potential null-refs and leaks
31 * 2006-03-12 fl merge in 2.5 ssize_t changes
Florent Xiclunaf15351d2010-03-13 23:24:31 +000032 * 2007-08-25 fl call custom builder's close method from XMLParser
33 * 2007-08-31 fl added iter, extend from ET 1.3
34 * 2007-09-01 fl fixed ParseError exception, setslice source type, etc
35 * 2007-09-03 fl fixed handling of negative insert indexes
36 * 2007-09-04 fl added itertext from ET 1.3
37 * 2007-09-06 fl added position attribute to ParseError exception
38 * 2008-06-06 fl delay error reporting in iterparse (from Hrvoje Niksic)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000039 *
Florent Xiclunaf15351d2010-03-13 23:24:31 +000040 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
41 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000042 *
43 * info@pythonware.com
44 * http://www.pythonware.com
45 */
46
Fredrik Lundh6d52b552005-12-16 22:06:43 +000047/* Licensed to PSF under a Contributor Agreement. */
Florent Xiclunaf15351d2010-03-13 23:24:31 +000048/* See http://www.python.org/psf/license for licensing details. */
Fredrik Lundh6d52b552005-12-16 22:06:43 +000049
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000050#include "Python.h"
Eli Benderskyebf37a22012-04-03 22:02:37 +030051#include "structmember.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000052
Thomas Wouters00ee7ba2006-08-21 19:07:27 +000053#define VERSION "1.0.6"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000054
55/* -------------------------------------------------------------------- */
56/* configuration */
57
58/* Leave defined to include the expat-based XMLParser type */
59#define USE_EXPAT
60
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000061/* An element can hold this many children without extra memory
62 allocations. */
63#define STATIC_CHILDREN 4
64
65/* For best performance, chose a value so that 80-90% of all nodes
66 have no more than the given number of children. Set this to zero
67 to minimize the size of the element structure itself (this only
68 helps if you have lots of leaf nodes with attributes). */
69
70/* Also note that pymalloc always allocates blocks in multiples of
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010071 eight bytes. For the current C version of ElementTree, this means
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000072 that the number of children should be an even number, at least on
73 32-bit platforms. */
74
75/* -------------------------------------------------------------------- */
76
77#if 0
78static int memory = 0;
79#define ALLOC(size, comment)\
80do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
81#define RELEASE(size, comment)\
82do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
83#else
84#define ALLOC(size, comment)
85#define RELEASE(size, comment)
86#endif
87
88/* compiler tweaks */
89#if defined(_MSC_VER)
90#define LOCAL(type) static __inline type __fastcall
91#else
92#define LOCAL(type) static type
93#endif
94
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000095/* macros used to store 'join' flags in string object pointers. note
96 that all use of text and tail as object pointers must be wrapped in
97 JOIN_OBJ. see comments in the ElementObject definition for more
98 info. */
99#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
100#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
Antoine Pitrouca8aa4a2012-09-20 20:56:47 +0200101#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~(Py_uintptr_t)1))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000102
103/* glue functions (see the init function for details) */
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000104static PyObject* elementtree_parseerror_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000105static PyObject* elementtree_deepcopy_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000106static PyObject* elementpath_obj;
107
108/* helpers */
109
110LOCAL(PyObject*)
111deepcopy(PyObject* object, PyObject* memo)
112{
113 /* do a deep copy of the given object */
114
115 PyObject* args;
116 PyObject* result;
117
118 if (!elementtree_deepcopy_obj) {
119 PyErr_SetString(
120 PyExc_RuntimeError,
121 "deepcopy helper not found"
122 );
123 return NULL;
124 }
125
Antoine Pitrouc1948842012-10-01 23:40:37 +0200126 args = PyTuple_Pack(2, object, memo);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000127 if (!args)
128 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000129 result = PyObject_CallObject(elementtree_deepcopy_obj, args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000130 Py_DECREF(args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000131 return result;
132}
133
134LOCAL(PyObject*)
135list_join(PyObject* list)
136{
137 /* join list elements (destroying the list in the process) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000138 PyObject* joiner;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000139 PyObject* result;
140
Antoine Pitrouc1948842012-10-01 23:40:37 +0200141 joiner = PyUnicode_FromStringAndSize("", 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000142 if (!joiner)
143 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200144 result = PyUnicode_Join(joiner, list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000145 Py_DECREF(joiner);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200146 if (result)
147 Py_DECREF(list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000148 return result;
149}
150
Eli Bendersky48d358b2012-05-30 17:57:50 +0300151/* Is the given object an empty dictionary?
152*/
153static int
154is_empty_dict(PyObject *obj)
155{
156 return PyDict_CheckExact(obj) && PyDict_Size(obj) == 0;
157}
158
159
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000160/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200161/* the Element type */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000162
163typedef struct {
164
165 /* attributes (a dictionary object), or None if no attributes */
166 PyObject* attrib;
167
168 /* child elements */
169 int length; /* actual number of items */
170 int allocated; /* allocated items */
171
172 /* this either points to _children or to a malloced buffer */
173 PyObject* *children;
174
175 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100176
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000177} ElementObjectExtra;
178
179typedef struct {
180 PyObject_HEAD
181
182 /* element tag (a string). */
183 PyObject* tag;
184
185 /* text before first child. note that this is a tagged pointer;
186 use JOIN_OBJ to get the object pointer. the join flag is used
187 to distinguish lists created by the tree builder from lists
188 assigned to the attribute by application code; the former
189 should be joined before being returned to the user, the latter
190 should be left intact. */
191 PyObject* text;
192
193 /* text after this element, in parent. note that this is a tagged
194 pointer; use JOIN_OBJ to get the object pointer. */
195 PyObject* tail;
196
197 ElementObjectExtra* extra;
198
Eli Benderskyebf37a22012-04-03 22:02:37 +0300199 PyObject *weakreflist; /* For tp_weaklistoffset */
200
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000201} ElementObject;
202
Neal Norwitz227b5332006-03-22 09:28:35 +0000203static PyTypeObject Element_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000204
Christian Heimes90aa7642007-12-19 02:45:37 +0000205#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000206
207/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200208/* Element constructors and destructor */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000209
210LOCAL(int)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200211create_extra(ElementObject* self, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000212{
213 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
214 if (!self->extra)
215 return -1;
216
217 if (!attrib)
218 attrib = Py_None;
219
220 Py_INCREF(attrib);
221 self->extra->attrib = attrib;
222
223 self->extra->length = 0;
224 self->extra->allocated = STATIC_CHILDREN;
225 self->extra->children = self->extra->_children;
226
227 return 0;
228}
229
230LOCAL(void)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200231dealloc_extra(ElementObject* self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000232{
Eli Bendersky08b85292012-04-04 15:55:07 +0300233 ElementObjectExtra *myextra;
234 int i;
235
Eli Benderskyebf37a22012-04-03 22:02:37 +0300236 if (!self->extra)
237 return;
238
239 /* Avoid DECREFs calling into this code again (cycles, etc.)
240 */
Eli Bendersky08b85292012-04-04 15:55:07 +0300241 myextra = self->extra;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300242 self->extra = NULL;
243
244 Py_DECREF(myextra->attrib);
245
Eli Benderskyebf37a22012-04-03 22:02:37 +0300246 for (i = 0; i < myextra->length; i++)
247 Py_DECREF(myextra->children[i]);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000248
Eli Benderskyebf37a22012-04-03 22:02:37 +0300249 if (myextra->children != myextra->_children)
250 PyObject_Free(myextra->children);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000251
Eli Benderskyebf37a22012-04-03 22:02:37 +0300252 PyObject_Free(myextra);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000253}
254
Eli Bendersky092af1f2012-03-04 07:14:03 +0200255/* Convenience internal function to create new Element objects with the given
256 * tag and attributes.
257*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000258LOCAL(PyObject*)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200259create_new_element(PyObject* tag, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000260{
261 ElementObject* self;
262
Eli Bendersky0192ba32012-03-30 16:38:33 +0300263 self = PyObject_GC_New(ElementObject, &Element_Type);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000264 if (self == NULL)
265 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000266 self->extra = NULL;
267
Eli Bendersky48d358b2012-05-30 17:57:50 +0300268 if (attrib != Py_None && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200269 if (create_extra(self, attrib) < 0) {
Thomas Wouters477c8d52006-05-27 19:21:47 +0000270 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000271 return NULL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000272 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000273 }
274
275 Py_INCREF(tag);
276 self->tag = tag;
277
278 Py_INCREF(Py_None);
279 self->text = Py_None;
280
281 Py_INCREF(Py_None);
282 self->tail = Py_None;
283
Eli Benderskyebf37a22012-04-03 22:02:37 +0300284 self->weakreflist = NULL;
285
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000286 ALLOC(sizeof(ElementObject), "create element");
Eli Bendersky0192ba32012-03-30 16:38:33 +0300287 PyObject_GC_Track(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000288 return (PyObject*) self;
289}
290
Eli Bendersky092af1f2012-03-04 07:14:03 +0200291static PyObject *
292element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
293{
294 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
295 if (e != NULL) {
296 Py_INCREF(Py_None);
297 e->tag = Py_None;
298
299 Py_INCREF(Py_None);
300 e->text = Py_None;
301
302 Py_INCREF(Py_None);
303 e->tail = Py_None;
304
305 e->extra = NULL;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300306 e->weakreflist = NULL;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200307 }
308 return (PyObject *)e;
309}
310
Eli Bendersky737b1732012-05-29 06:02:56 +0300311/* Helper function for extracting the attrib dictionary from a keywords dict.
312 * This is required by some constructors/functions in this module that can
Eli Bendersky45839902013-01-13 05:14:47 -0800313 * either accept attrib as a keyword argument or all attributes splashed
Eli Bendersky737b1732012-05-29 06:02:56 +0300314 * directly into *kwds.
Eli Benderskyd4cb4b72013-04-22 05:25:25 -0700315 *
316 * Return a dictionary with the content of kwds merged into the content of
317 * attrib. If there is no attrib keyword, return a copy of kwds.
Eli Bendersky737b1732012-05-29 06:02:56 +0300318 */
319static PyObject*
320get_attrib_from_keywords(PyObject *kwds)
321{
322 PyObject *attrib_str = PyUnicode_FromString("attrib");
323 PyObject *attrib = PyDict_GetItem(kwds, attrib_str);
324
325 if (attrib) {
326 /* If attrib was found in kwds, copy its value and remove it from
327 * kwds
328 */
329 if (!PyDict_Check(attrib)) {
330 Py_DECREF(attrib_str);
331 PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
332 Py_TYPE(attrib)->tp_name);
333 return NULL;
334 }
335 attrib = PyDict_Copy(attrib);
336 PyDict_DelItem(kwds, attrib_str);
337 } else {
338 attrib = PyDict_New();
339 }
340
341 Py_DECREF(attrib_str);
342
343 if (attrib)
344 PyDict_Update(attrib, kwds);
345 return attrib;
346}
347
Eli Bendersky092af1f2012-03-04 07:14:03 +0200348static int
349element_init(PyObject *self, PyObject *args, PyObject *kwds)
350{
351 PyObject *tag;
352 PyObject *tmp;
353 PyObject *attrib = NULL;
354 ElementObject *self_elem;
355
356 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
357 return -1;
358
Eli Bendersky737b1732012-05-29 06:02:56 +0300359 if (attrib) {
360 /* attrib passed as positional arg */
361 attrib = PyDict_Copy(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200362 if (!attrib)
363 return -1;
Eli Bendersky737b1732012-05-29 06:02:56 +0300364 if (kwds) {
365 if (PyDict_Update(attrib, kwds) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200366 Py_DECREF(attrib);
Eli Bendersky737b1732012-05-29 06:02:56 +0300367 return -1;
368 }
369 }
370 } else if (kwds) {
371 /* have keywords args */
372 attrib = get_attrib_from_keywords(kwds);
373 if (!attrib)
374 return -1;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200375 }
376
377 self_elem = (ElementObject *)self;
378
Antoine Pitrouc1948842012-10-01 23:40:37 +0200379 if (attrib != NULL && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200380 if (create_extra(self_elem, attrib) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200381 Py_DECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200382 return -1;
383 }
384 }
385
Eli Bendersky48d358b2012-05-30 17:57:50 +0300386 /* We own a reference to attrib here and it's no longer needed. */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200387 Py_XDECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200388
389 /* Replace the objects already pointed to by tag, text and tail. */
390 tmp = self_elem->tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200391 Py_INCREF(tag);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200392 self_elem->tag = tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200393 Py_DECREF(tmp);
394
395 tmp = self_elem->text;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200396 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200397 self_elem->text = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200398 Py_DECREF(JOIN_OBJ(tmp));
399
400 tmp = self_elem->tail;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200401 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200402 self_elem->tail = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200403 Py_DECREF(JOIN_OBJ(tmp));
404
405 return 0;
406}
407
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000408LOCAL(int)
409element_resize(ElementObject* self, int extra)
410{
411 int size;
412 PyObject* *children;
413
414 /* make sure self->children can hold the given number of extra
415 elements. set an exception and return -1 if allocation failed */
416
417 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200418 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000419
420 size = self->extra->length + extra;
421
422 if (size > self->extra->allocated) {
423 /* use Python 2.4's list growth strategy */
424 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000425 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100426 * which needs at least 4 bytes.
427 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000428 * be safe.
429 */
430 size = size ? size : 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000431 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000432 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100433 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000434 * false alarm always assume at least one child to be safe.
435 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000436 children = PyObject_Realloc(self->extra->children,
437 size * sizeof(PyObject*));
438 if (!children)
439 goto nomemory;
440 } else {
441 children = PyObject_Malloc(size * sizeof(PyObject*));
442 if (!children)
443 goto nomemory;
444 /* copy existing children from static area to malloc buffer */
445 memcpy(children, self->extra->children,
446 self->extra->length * sizeof(PyObject*));
447 }
448 self->extra->children = children;
449 self->extra->allocated = size;
450 }
451
452 return 0;
453
454 nomemory:
455 PyErr_NoMemory();
456 return -1;
457}
458
459LOCAL(int)
460element_add_subelement(ElementObject* self, PyObject* element)
461{
462 /* add a child element to a parent */
463
464 if (element_resize(self, 1) < 0)
465 return -1;
466
467 Py_INCREF(element);
468 self->extra->children[self->extra->length] = element;
469
470 self->extra->length++;
471
472 return 0;
473}
474
475LOCAL(PyObject*)
476element_get_attrib(ElementObject* self)
477{
478 /* return borrowed reference to attrib dictionary */
479 /* note: this function assumes that the extra section exists */
480
481 PyObject* res = self->extra->attrib;
482
483 if (res == Py_None) {
484 /* create missing dictionary */
485 res = PyDict_New();
486 if (!res)
487 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200488 Py_DECREF(Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000489 self->extra->attrib = res;
490 }
491
492 return res;
493}
494
495LOCAL(PyObject*)
496element_get_text(ElementObject* self)
497{
498 /* return borrowed reference to text attribute */
499
500 PyObject* res = self->text;
501
502 if (JOIN_GET(res)) {
503 res = JOIN_OBJ(res);
504 if (PyList_CheckExact(res)) {
505 res = list_join(res);
506 if (!res)
507 return NULL;
508 self->text = res;
509 }
510 }
511
512 return res;
513}
514
515LOCAL(PyObject*)
516element_get_tail(ElementObject* self)
517{
518 /* return borrowed reference to text attribute */
519
520 PyObject* res = self->tail;
521
522 if (JOIN_GET(res)) {
523 res = JOIN_OBJ(res);
524 if (PyList_CheckExact(res)) {
525 res = list_join(res);
526 if (!res)
527 return NULL;
528 self->tail = res;
529 }
530 }
531
532 return res;
533}
534
535static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300536subelement(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000537{
538 PyObject* elem;
539
540 ElementObject* parent;
541 PyObject* tag;
542 PyObject* attrib = NULL;
543 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
544 &Element_Type, &parent, &tag,
545 &PyDict_Type, &attrib))
546 return NULL;
547
Eli Bendersky737b1732012-05-29 06:02:56 +0300548 if (attrib) {
549 /* attrib passed as positional arg */
550 attrib = PyDict_Copy(attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000551 if (!attrib)
552 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300553 if (kwds) {
554 if (PyDict_Update(attrib, kwds) < 0) {
555 return NULL;
556 }
557 }
558 } else if (kwds) {
559 /* have keyword args */
560 attrib = get_attrib_from_keywords(kwds);
561 if (!attrib)
562 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000563 } else {
Eli Bendersky737b1732012-05-29 06:02:56 +0300564 /* no attrib arg, no kwds, so no attribute */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000565 Py_INCREF(Py_None);
566 attrib = Py_None;
567 }
568
Eli Bendersky092af1f2012-03-04 07:14:03 +0200569 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000570
571 Py_DECREF(attrib);
572
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000573 if (element_add_subelement(parent, elem) < 0) {
574 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000575 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000576 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000577
578 return elem;
579}
580
Eli Bendersky0192ba32012-03-30 16:38:33 +0300581static int
582element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
583{
584 Py_VISIT(self->tag);
585 Py_VISIT(JOIN_OBJ(self->text));
586 Py_VISIT(JOIN_OBJ(self->tail));
587
588 if (self->extra) {
589 int i;
590 Py_VISIT(self->extra->attrib);
591
592 for (i = 0; i < self->extra->length; ++i)
593 Py_VISIT(self->extra->children[i]);
594 }
595 return 0;
596}
597
598static int
599element_gc_clear(ElementObject *self)
600{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300601 Py_CLEAR(self->tag);
Eli Benderskyebf37a22012-04-03 22:02:37 +0300602
603 /* The following is like Py_CLEAR for self->text and self->tail, but
604 * written explicitily because the real pointers hide behind access
605 * macros.
606 */
607 if (self->text) {
608 PyObject *tmp = JOIN_OBJ(self->text);
609 self->text = NULL;
610 Py_DECREF(tmp);
611 }
612
613 if (self->tail) {
614 PyObject *tmp = JOIN_OBJ(self->tail);
615 self->tail = NULL;
616 Py_DECREF(tmp);
617 }
Eli Bendersky0192ba32012-03-30 16:38:33 +0300618
619 /* After dropping all references from extra, it's no longer valid anyway,
Eli Benderskyebf37a22012-04-03 22:02:37 +0300620 * so fully deallocate it.
Eli Bendersky0192ba32012-03-30 16:38:33 +0300621 */
Eli Benderskyebf37a22012-04-03 22:02:37 +0300622 dealloc_extra(self);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300623 return 0;
624}
625
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000626static void
627element_dealloc(ElementObject* self)
628{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300629 PyObject_GC_UnTrack(self);
Eli Benderskyebf37a22012-04-03 22:02:37 +0300630
631 if (self->weakreflist != NULL)
632 PyObject_ClearWeakRefs((PyObject *) self);
633
Eli Bendersky0192ba32012-03-30 16:38:33 +0300634 /* element_gc_clear clears all references and deallocates extra
635 */
636 element_gc_clear(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000637
638 RELEASE(sizeof(ElementObject), "destroy element");
Eli Bendersky092af1f2012-03-04 07:14:03 +0200639 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000640}
641
642/* -------------------------------------------------------------------- */
643/* methods (in alphabetical order) */
644
645static PyObject*
646element_append(ElementObject* self, PyObject* args)
647{
648 PyObject* element;
649 if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element))
650 return NULL;
651
652 if (element_add_subelement(self, element) < 0)
653 return NULL;
654
655 Py_RETURN_NONE;
656}
657
658static PyObject*
Eli Bendersky0192ba32012-03-30 16:38:33 +0300659element_clearmethod(ElementObject* self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000660{
661 if (!PyArg_ParseTuple(args, ":clear"))
662 return NULL;
663
Eli Benderskyebf37a22012-04-03 22:02:37 +0300664 dealloc_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000665
666 Py_INCREF(Py_None);
667 Py_DECREF(JOIN_OBJ(self->text));
668 self->text = Py_None;
669
670 Py_INCREF(Py_None);
671 Py_DECREF(JOIN_OBJ(self->tail));
672 self->tail = Py_None;
673
674 Py_RETURN_NONE;
675}
676
677static PyObject*
678element_copy(ElementObject* self, PyObject* args)
679{
680 int i;
681 ElementObject* element;
682
683 if (!PyArg_ParseTuple(args, ":__copy__"))
684 return NULL;
685
Eli Bendersky092af1f2012-03-04 07:14:03 +0200686 element = (ElementObject*) create_new_element(
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000687 self->tag, (self->extra) ? self->extra->attrib : Py_None
688 );
689 if (!element)
690 return NULL;
691
692 Py_DECREF(JOIN_OBJ(element->text));
693 element->text = self->text;
694 Py_INCREF(JOIN_OBJ(element->text));
695
696 Py_DECREF(JOIN_OBJ(element->tail));
697 element->tail = self->tail;
698 Py_INCREF(JOIN_OBJ(element->tail));
699
700 if (self->extra) {
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100701
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000702 if (element_resize(element, self->extra->length) < 0) {
703 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000704 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000705 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000706
707 for (i = 0; i < self->extra->length; i++) {
708 Py_INCREF(self->extra->children[i]);
709 element->extra->children[i] = self->extra->children[i];
710 }
711
712 element->extra->length = self->extra->length;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100713
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000714 }
715
716 return (PyObject*) element;
717}
718
719static PyObject*
720element_deepcopy(ElementObject* self, PyObject* args)
721{
722 int i;
723 ElementObject* element;
724 PyObject* tag;
725 PyObject* attrib;
726 PyObject* text;
727 PyObject* tail;
728 PyObject* id;
729
730 PyObject* memo;
731 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
732 return NULL;
733
734 tag = deepcopy(self->tag, memo);
735 if (!tag)
736 return NULL;
737
738 if (self->extra) {
739 attrib = deepcopy(self->extra->attrib, memo);
740 if (!attrib) {
741 Py_DECREF(tag);
742 return NULL;
743 }
744 } else {
745 Py_INCREF(Py_None);
746 attrib = Py_None;
747 }
748
Eli Bendersky092af1f2012-03-04 07:14:03 +0200749 element = (ElementObject*) create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000750
751 Py_DECREF(tag);
752 Py_DECREF(attrib);
753
754 if (!element)
755 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100756
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000757 text = deepcopy(JOIN_OBJ(self->text), memo);
758 if (!text)
759 goto error;
760 Py_DECREF(element->text);
761 element->text = JOIN_SET(text, JOIN_GET(self->text));
762
763 tail = deepcopy(JOIN_OBJ(self->tail), memo);
764 if (!tail)
765 goto error;
766 Py_DECREF(element->tail);
767 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
768
769 if (self->extra) {
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100770
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000771 if (element_resize(element, self->extra->length) < 0)
772 goto error;
773
774 for (i = 0; i < self->extra->length; i++) {
775 PyObject* child = deepcopy(self->extra->children[i], memo);
776 if (!child) {
777 element->extra->length = i;
778 goto error;
779 }
780 element->extra->children[i] = child;
781 }
782
783 element->extra->length = self->extra->length;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100784
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000785 }
786
787 /* add object to memo dictionary (so deepcopy won't visit it again) */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200788 id = PyLong_FromSsize_t((Py_uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000789 if (!id)
790 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000791
792 i = PyDict_SetItem(memo, id, (PyObject*) element);
793
794 Py_DECREF(id);
795
796 if (i < 0)
797 goto error;
798
799 return (PyObject*) element;
800
801 error:
802 Py_DECREF(element);
803 return NULL;
804}
805
Martin v. Löwisbce16662012-06-17 10:41:22 +0200806static PyObject*
807element_sizeof(PyObject* _self, PyObject* args)
808{
809 ElementObject *self = (ElementObject*)_self;
810 Py_ssize_t result = sizeof(ElementObject);
811 if (self->extra) {
812 result += sizeof(ElementObjectExtra);
813 if (self->extra->children != self->extra->_children)
814 result += sizeof(PyObject*) * self->extra->allocated;
815 }
816 return PyLong_FromSsize_t(result);
817}
818
Eli Bendersky698bdb22013-01-10 06:01:06 -0800819/* dict keys for getstate/setstate. */
820#define PICKLED_TAG "tag"
821#define PICKLED_CHILDREN "_children"
822#define PICKLED_ATTRIB "attrib"
823#define PICKLED_TAIL "tail"
824#define PICKLED_TEXT "text"
825
826/* __getstate__ returns a fabricated instance dict as in the pure-Python
827 * Element implementation, for interoperability/interchangeability. This
828 * makes the pure-Python implementation details an API, but (a) there aren't
829 * any unnecessary structures there; and (b) it buys compatibility with 3.2
830 * pickles. See issue #16076.
831 */
832static PyObject *
833element_getstate(ElementObject *self)
834{
835 int i, noattrib;
836 PyObject *instancedict = NULL, *children;
837
838 /* Build a list of children. */
839 children = PyList_New(self->extra ? self->extra->length : 0);
840 if (!children)
841 return NULL;
842 for (i = 0; i < PyList_GET_SIZE(children); i++) {
843 PyObject *child = self->extra->children[i];
844 Py_INCREF(child);
845 PyList_SET_ITEM(children, i, child);
846 }
847
848 /* Construct the state object. */
849 noattrib = (self->extra == NULL || self->extra->attrib == Py_None);
850 if (noattrib)
851 instancedict = Py_BuildValue("{sOsOs{}sOsO}",
852 PICKLED_TAG, self->tag,
853 PICKLED_CHILDREN, children,
854 PICKLED_ATTRIB,
855 PICKLED_TEXT, self->text,
856 PICKLED_TAIL, self->tail);
857 else
858 instancedict = Py_BuildValue("{sOsOsOsOsO}",
859 PICKLED_TAG, self->tag,
860 PICKLED_CHILDREN, children,
861 PICKLED_ATTRIB, self->extra->attrib,
862 PICKLED_TEXT, self->text,
863 PICKLED_TAIL, self->tail);
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800864 if (instancedict) {
865 Py_DECREF(children);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800866 return instancedict;
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800867 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800868 else {
869 for (i = 0; i < PyList_GET_SIZE(children); i++)
870 Py_DECREF(PyList_GET_ITEM(children, i));
871 Py_DECREF(children);
872
873 return NULL;
874 }
875}
876
877static PyObject *
878element_setstate_from_attributes(ElementObject *self,
879 PyObject *tag,
880 PyObject *attrib,
881 PyObject *text,
882 PyObject *tail,
883 PyObject *children)
884{
885 Py_ssize_t i, nchildren;
886
887 if (!tag) {
888 PyErr_SetString(PyExc_TypeError, "tag may not be NULL");
889 return NULL;
890 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800891
892 Py_CLEAR(self->tag);
893 self->tag = tag;
894 Py_INCREF(self->tag);
895
896 Py_CLEAR(self->text);
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800897 self->text = text ? text : Py_None;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800898 Py_INCREF(self->text);
899
900 Py_CLEAR(self->tail);
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800901 self->tail = tail ? tail : Py_None;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800902 Py_INCREF(self->tail);
903
904 /* Handle ATTRIB and CHILDREN. */
905 if (!children && !attrib)
906 Py_RETURN_NONE;
907
908 /* Compute 'nchildren'. */
909 if (children) {
910 if (!PyList_Check(children)) {
911 PyErr_SetString(PyExc_TypeError, "'_children' is not a list");
912 return NULL;
913 }
914 nchildren = PyList_Size(children);
915 }
916 else {
917 nchildren = 0;
918 }
919
920 /* Allocate 'extra'. */
921 if (element_resize(self, nchildren)) {
922 return NULL;
923 }
924 assert(self->extra && self->extra->allocated >= nchildren);
925
926 /* Copy children */
927 for (i = 0; i < nchildren; i++) {
928 self->extra->children[i] = PyList_GET_ITEM(children, i);
929 Py_INCREF(self->extra->children[i]);
930 }
931
932 self->extra->length = nchildren;
933 self->extra->allocated = nchildren;
934
935 /* Stash attrib. */
936 if (attrib) {
937 Py_CLEAR(self->extra->attrib);
938 self->extra->attrib = attrib;
939 Py_INCREF(attrib);
940 }
941
942 Py_RETURN_NONE;
943}
944
945/* __setstate__ for Element instance from the Python implementation.
946 * 'state' should be the instance dict.
947 */
948static PyObject *
949element_setstate_from_Python(ElementObject *self, PyObject *state)
950{
951 static char *kwlist[] = {PICKLED_TAG, PICKLED_ATTRIB, PICKLED_TEXT,
952 PICKLED_TAIL, PICKLED_CHILDREN, 0};
953 PyObject *args;
954 PyObject *tag, *attrib, *text, *tail, *children;
Eli Bendersky799e3ed2013-01-12 05:42:38 -0800955 PyObject *retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800956
Eli Bendersky698bdb22013-01-10 06:01:06 -0800957 tag = attrib = text = tail = children = NULL;
958 args = PyTuple_New(0);
Eli Bendersky799e3ed2013-01-12 05:42:38 -0800959 if (!args)
Eli Bendersky698bdb22013-01-10 06:01:06 -0800960 return NULL;
Eli Bendersky799e3ed2013-01-12 05:42:38 -0800961
962 if (PyArg_ParseTupleAndKeywords(args, state, "|$OOOOO", kwlist, &tag,
963 &attrib, &text, &tail, &children))
964 retval = element_setstate_from_attributes(self, tag, attrib, text,
965 tail, children);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800966 else
Eli Bendersky799e3ed2013-01-12 05:42:38 -0800967 retval = NULL;
968
969 Py_DECREF(args);
970 return retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800971}
972
973static PyObject *
974element_setstate(ElementObject *self, PyObject *state)
975{
976 if (!PyDict_CheckExact(state)) {
977 PyErr_Format(PyExc_TypeError,
978 "Don't know how to unpickle \"%.200R\" as an Element",
979 state);
980 return NULL;
981 }
982 else
983 return element_setstate_from_Python(self, state);
984}
985
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000986LOCAL(int)
987checkpath(PyObject* tag)
988{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000989 Py_ssize_t i;
990 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000991
992 /* check if a tag contains an xpath character */
993
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000994#define PATHCHAR(ch) \
995 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000996
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000997 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200998 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
999 void *data = PyUnicode_DATA(tag);
1000 unsigned int kind = PyUnicode_KIND(tag);
1001 for (i = 0; i < len; i++) {
1002 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1003 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001004 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001005 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001006 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001007 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001008 return 1;
1009 }
1010 return 0;
1011 }
Christian Heimes72b710a2008-05-26 13:28:38 +00001012 if (PyBytes_Check(tag)) {
1013 char *p = PyBytes_AS_STRING(tag);
1014 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001015 if (p[i] == '{')
1016 check = 0;
1017 else if (p[i] == '}')
1018 check = 1;
1019 else if (check && PATHCHAR(p[i]))
1020 return 1;
1021 }
1022 return 0;
1023 }
1024
1025 return 1; /* unknown type; might be path expression */
1026}
1027
1028static PyObject*
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001029element_extend(ElementObject* self, PyObject* args)
1030{
1031 PyObject* seq;
1032 Py_ssize_t i, seqlen = 0;
1033
1034 PyObject* seq_in;
1035 if (!PyArg_ParseTuple(args, "O:extend", &seq_in))
1036 return NULL;
1037
1038 seq = PySequence_Fast(seq_in, "");
1039 if (!seq) {
1040 PyErr_Format(
1041 PyExc_TypeError,
1042 "expected sequence, not \"%.200s\"", Py_TYPE(seq_in)->tp_name
1043 );
1044 return NULL;
1045 }
1046
1047 seqlen = PySequence_Size(seq);
1048 for (i = 0; i < seqlen; i++) {
1049 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001050 if (!PyObject_IsInstance(element, (PyObject *)&Element_Type)) {
1051 Py_DECREF(seq);
1052 PyErr_Format(
1053 PyExc_TypeError,
1054 "expected an Element, not \"%.200s\"",
1055 Py_TYPE(element)->tp_name);
1056 return NULL;
1057 }
1058
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001059 if (element_add_subelement(self, element) < 0) {
1060 Py_DECREF(seq);
1061 return NULL;
1062 }
1063 }
1064
1065 Py_DECREF(seq);
1066
1067 Py_RETURN_NONE;
1068}
1069
1070static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001071element_find(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001072{
1073 int i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001074 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001075 PyObject* namespaces = Py_None;
Eli Bendersky737b1732012-05-29 06:02:56 +03001076 static char *kwlist[] = {"path", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001077
Eli Bendersky737b1732012-05-29 06:02:56 +03001078 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:find", kwlist,
1079 &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001080 return NULL;
1081
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001082 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001083 _Py_IDENTIFIER(find);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001084 return _PyObject_CallMethodId(
1085 elementpath_obj, &PyId_find, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001086 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001087 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001088
1089 if (!self->extra)
1090 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001091
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001092 for (i = 0; i < self->extra->length; i++) {
1093 PyObject* item = self->extra->children[i];
1094 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +00001095 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001096 Py_INCREF(item);
1097 return item;
1098 }
1099 }
1100
1101 Py_RETURN_NONE;
1102}
1103
1104static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001105element_findtext(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001106{
1107 int i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001108 PyObject* tag;
1109 PyObject* default_value = Py_None;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001110 PyObject* namespaces = Py_None;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001111 _Py_IDENTIFIER(findtext);
Eli Bendersky737b1732012-05-29 06:02:56 +03001112 static char *kwlist[] = {"path", "default", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001113
Eli Bendersky737b1732012-05-29 06:02:56 +03001114 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO:findtext", kwlist,
1115 &tag, &default_value, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001116 return NULL;
1117
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001118 if (checkpath(tag) || namespaces != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001119 return _PyObject_CallMethodId(
1120 elementpath_obj, &PyId_findtext, "OOOO", self, tag, default_value, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001121 );
1122
1123 if (!self->extra) {
1124 Py_INCREF(default_value);
1125 return default_value;
1126 }
1127
1128 for (i = 0; i < self->extra->length; i++) {
1129 ElementObject* item = (ElementObject*) self->extra->children[i];
Mark Dickinson211c6252009-02-01 10:28:51 +00001130 if (Element_CheckExact(item) && (PyObject_RichCompareBool(item->tag, tag, Py_EQ) == 1)) {
1131
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001132 PyObject* text = element_get_text(item);
1133 if (text == Py_None)
Eli Bendersky25771b32013-01-13 05:26:07 -08001134 return PyUnicode_New(0, 0);
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001135 Py_XINCREF(text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001136 return text;
1137 }
1138 }
1139
1140 Py_INCREF(default_value);
1141 return default_value;
1142}
1143
1144static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001145element_findall(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001146{
1147 int i;
1148 PyObject* out;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001149 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001150 PyObject* namespaces = Py_None;
Eli Bendersky737b1732012-05-29 06:02:56 +03001151 static char *kwlist[] = {"path", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001152
Eli Bendersky737b1732012-05-29 06:02:56 +03001153 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:findall", kwlist,
1154 &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001155 return NULL;
1156
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001157 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001158 _Py_IDENTIFIER(findall);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001159 return _PyObject_CallMethodId(
1160 elementpath_obj, &PyId_findall, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001161 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001162 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001163
1164 out = PyList_New(0);
1165 if (!out)
1166 return NULL;
1167
1168 if (!self->extra)
1169 return out;
1170
1171 for (i = 0; i < self->extra->length; i++) {
1172 PyObject* item = self->extra->children[i];
1173 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +00001174 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001175 if (PyList_Append(out, item) < 0) {
1176 Py_DECREF(out);
1177 return NULL;
1178 }
1179 }
1180 }
1181
1182 return out;
1183}
1184
1185static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001186element_iterfind(ElementObject *self, PyObject *args, PyObject *kwds)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001187{
1188 PyObject* tag;
1189 PyObject* namespaces = Py_None;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001190 _Py_IDENTIFIER(iterfind);
Eli Bendersky737b1732012-05-29 06:02:56 +03001191 static char *kwlist[] = {"path", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001192
Eli Bendersky737b1732012-05-29 06:02:56 +03001193 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:iterfind", kwlist,
1194 &tag, &namespaces))
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001195 return NULL;
1196
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001197 return _PyObject_CallMethodId(
1198 elementpath_obj, &PyId_iterfind, "OOO", self, tag, namespaces
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001199 );
1200}
1201
1202static PyObject*
Eli Benderskya8736902013-01-05 06:26:39 -08001203element_get(ElementObject* self, PyObject* args, PyObject* kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001204{
1205 PyObject* value;
Eli Benderskya8736902013-01-05 06:26:39 -08001206 static char* kwlist[] = {"key", "default", 0};
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001207
1208 PyObject* key;
1209 PyObject* default_value = Py_None;
Eli Benderskya8736902013-01-05 06:26:39 -08001210
1211 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:get", kwlist, &key,
1212 &default_value))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001213 return NULL;
1214
1215 if (!self->extra || self->extra->attrib == Py_None)
1216 value = default_value;
1217 else {
1218 value = PyDict_GetItem(self->extra->attrib, key);
1219 if (!value)
1220 value = default_value;
1221 }
1222
1223 Py_INCREF(value);
1224 return value;
1225}
1226
1227static PyObject*
1228element_getchildren(ElementObject* self, PyObject* args)
1229{
1230 int i;
1231 PyObject* list;
1232
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001233 /* FIXME: report as deprecated? */
1234
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001235 if (!PyArg_ParseTuple(args, ":getchildren"))
1236 return NULL;
1237
1238 if (!self->extra)
1239 return PyList_New(0);
1240
1241 list = PyList_New(self->extra->length);
1242 if (!list)
1243 return NULL;
1244
1245 for (i = 0; i < self->extra->length; i++) {
1246 PyObject* item = self->extra->children[i];
1247 Py_INCREF(item);
1248 PyList_SET_ITEM(list, i, item);
1249 }
1250
1251 return list;
1252}
1253
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001254
Eli Bendersky64d11e62012-06-15 07:42:50 +03001255static PyObject *
1256create_elementiter(ElementObject *self, PyObject *tag, int gettext);
1257
1258
1259static PyObject *
Eli Benderskya8736902013-01-05 06:26:39 -08001260element_iter(ElementObject *self, PyObject *args, PyObject *kwds)
Eli Bendersky64d11e62012-06-15 07:42:50 +03001261{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001262 PyObject* tag = Py_None;
Eli Benderskya8736902013-01-05 06:26:39 -08001263 static char* kwlist[] = {"tag", 0};
1264
1265 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:iter", kwlist, &tag))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001266 return NULL;
1267
Eli Bendersky64d11e62012-06-15 07:42:50 +03001268 return create_elementiter(self, tag, 0);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001269}
1270
1271
1272static PyObject*
1273element_itertext(ElementObject* self, PyObject* args)
1274{
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001275 if (!PyArg_ParseTuple(args, ":itertext"))
1276 return NULL;
1277
Eli Bendersky64d11e62012-06-15 07:42:50 +03001278 return create_elementiter(self, Py_None, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001279}
1280
Eli Bendersky64d11e62012-06-15 07:42:50 +03001281
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001282static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001283element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001284{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001285 ElementObject* self = (ElementObject*) self_;
1286
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001287 if (!self->extra || index < 0 || index >= self->extra->length) {
1288 PyErr_SetString(
1289 PyExc_IndexError,
1290 "child index out of range"
1291 );
1292 return NULL;
1293 }
1294
1295 Py_INCREF(self->extra->children[index]);
1296 return self->extra->children[index];
1297}
1298
1299static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001300element_insert(ElementObject* self, PyObject* args)
1301{
1302 int i;
1303
1304 int index;
1305 PyObject* element;
1306 if (!PyArg_ParseTuple(args, "iO!:insert", &index,
1307 &Element_Type, &element))
1308 return NULL;
1309
1310 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001311 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001312
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001313 if (index < 0) {
1314 index += self->extra->length;
1315 if (index < 0)
1316 index = 0;
1317 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001318 if (index > self->extra->length)
1319 index = self->extra->length;
1320
1321 if (element_resize(self, 1) < 0)
1322 return NULL;
1323
1324 for (i = self->extra->length; i > index; i--)
1325 self->extra->children[i] = self->extra->children[i-1];
1326
1327 Py_INCREF(element);
1328 self->extra->children[index] = element;
1329
1330 self->extra->length++;
1331
1332 Py_RETURN_NONE;
1333}
1334
1335static PyObject*
1336element_items(ElementObject* self, PyObject* args)
1337{
1338 if (!PyArg_ParseTuple(args, ":items"))
1339 return NULL;
1340
1341 if (!self->extra || self->extra->attrib == Py_None)
1342 return PyList_New(0);
1343
1344 return PyDict_Items(self->extra->attrib);
1345}
1346
1347static PyObject*
1348element_keys(ElementObject* self, PyObject* args)
1349{
1350 if (!PyArg_ParseTuple(args, ":keys"))
1351 return NULL;
1352
1353 if (!self->extra || self->extra->attrib == Py_None)
1354 return PyList_New(0);
1355
1356 return PyDict_Keys(self->extra->attrib);
1357}
1358
Martin v. Löwis18e16552006-02-15 17:27:45 +00001359static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001360element_length(ElementObject* self)
1361{
1362 if (!self->extra)
1363 return 0;
1364
1365 return self->extra->length;
1366}
1367
1368static PyObject*
1369element_makeelement(PyObject* self, PyObject* args, PyObject* kw)
1370{
1371 PyObject* elem;
1372
1373 PyObject* tag;
1374 PyObject* attrib;
1375 if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib))
1376 return NULL;
1377
1378 attrib = PyDict_Copy(attrib);
1379 if (!attrib)
1380 return NULL;
1381
Eli Bendersky092af1f2012-03-04 07:14:03 +02001382 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001383
1384 Py_DECREF(attrib);
1385
1386 return elem;
1387}
1388
1389static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001390element_remove(ElementObject* self, PyObject* args)
1391{
1392 int i;
1393
1394 PyObject* element;
1395 if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element))
1396 return NULL;
1397
1398 if (!self->extra) {
1399 /* element has no children, so raise exception */
1400 PyErr_SetString(
1401 PyExc_ValueError,
1402 "list.remove(x): x not in list"
1403 );
1404 return NULL;
1405 }
1406
1407 for (i = 0; i < self->extra->length; i++) {
1408 if (self->extra->children[i] == element)
1409 break;
Mark Dickinson211c6252009-02-01 10:28:51 +00001410 if (PyObject_RichCompareBool(self->extra->children[i], element, Py_EQ) == 1)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001411 break;
1412 }
1413
1414 if (i == self->extra->length) {
1415 /* element is not in children, so raise exception */
1416 PyErr_SetString(
1417 PyExc_ValueError,
1418 "list.remove(x): x not in list"
1419 );
1420 return NULL;
1421 }
1422
1423 Py_DECREF(self->extra->children[i]);
1424
1425 self->extra->length--;
1426
1427 for (; i < self->extra->length; i++)
1428 self->extra->children[i] = self->extra->children[i+1];
1429
1430 Py_RETURN_NONE;
1431}
1432
1433static PyObject*
1434element_repr(ElementObject* self)
1435{
Eli Bendersky092af1f2012-03-04 07:14:03 +02001436 if (self->tag)
1437 return PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1438 else
1439 return PyUnicode_FromFormat("<Element at %p>", self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001440}
1441
1442static PyObject*
1443element_set(ElementObject* self, PyObject* args)
1444{
1445 PyObject* attrib;
1446
1447 PyObject* key;
1448 PyObject* value;
1449 if (!PyArg_ParseTuple(args, "OO:set", &key, &value))
1450 return NULL;
1451
1452 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001453 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001454
1455 attrib = element_get_attrib(self);
1456 if (!attrib)
1457 return NULL;
1458
1459 if (PyDict_SetItem(attrib, key, value) < 0)
1460 return NULL;
1461
1462 Py_RETURN_NONE;
1463}
1464
1465static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001466element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001467{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001468 ElementObject* self = (ElementObject*) self_;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001469 int i;
1470 PyObject* old;
1471
1472 if (!self->extra || index < 0 || index >= self->extra->length) {
1473 PyErr_SetString(
1474 PyExc_IndexError,
1475 "child assignment index out of range");
1476 return -1;
1477 }
1478
1479 old = self->extra->children[index];
1480
1481 if (item) {
1482 Py_INCREF(item);
1483 self->extra->children[index] = item;
1484 } else {
1485 self->extra->length--;
1486 for (i = index; i < self->extra->length; i++)
1487 self->extra->children[i] = self->extra->children[i+1];
1488 }
1489
1490 Py_DECREF(old);
1491
1492 return 0;
1493}
1494
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001495static PyObject*
1496element_subscr(PyObject* self_, PyObject* item)
1497{
1498 ElementObject* self = (ElementObject*) self_;
1499
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001500 if (PyIndex_Check(item)) {
1501 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001502
1503 if (i == -1 && PyErr_Occurred()) {
1504 return NULL;
1505 }
1506 if (i < 0 && self->extra)
1507 i += self->extra->length;
1508 return element_getitem(self_, i);
1509 }
1510 else if (PySlice_Check(item)) {
1511 Py_ssize_t start, stop, step, slicelen, cur, i;
1512 PyObject* list;
1513
1514 if (!self->extra)
1515 return PyList_New(0);
1516
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001517 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001518 self->extra->length,
1519 &start, &stop, &step, &slicelen) < 0) {
1520 return NULL;
1521 }
1522
1523 if (slicelen <= 0)
1524 return PyList_New(0);
1525 else {
1526 list = PyList_New(slicelen);
1527 if (!list)
1528 return NULL;
1529
1530 for (cur = start, i = 0; i < slicelen;
1531 cur += step, i++) {
1532 PyObject* item = self->extra->children[cur];
1533 Py_INCREF(item);
1534 PyList_SET_ITEM(list, i, item);
1535 }
1536
1537 return list;
1538 }
1539 }
1540 else {
1541 PyErr_SetString(PyExc_TypeError,
1542 "element indices must be integers");
1543 return NULL;
1544 }
1545}
1546
1547static int
1548element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1549{
1550 ElementObject* self = (ElementObject*) self_;
1551
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001552 if (PyIndex_Check(item)) {
1553 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001554
1555 if (i == -1 && PyErr_Occurred()) {
1556 return -1;
1557 }
1558 if (i < 0 && self->extra)
1559 i += self->extra->length;
1560 return element_setitem(self_, i, value);
1561 }
1562 else if (PySlice_Check(item)) {
1563 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1564
1565 PyObject* recycle = NULL;
1566 PyObject* seq = NULL;
1567
1568 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001569 create_extra(self, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001570
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001571 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001572 self->extra->length,
1573 &start, &stop, &step, &slicelen) < 0) {
1574 return -1;
1575 }
1576
Eli Bendersky865756a2012-03-09 13:38:15 +02001577 if (value == NULL) {
1578 /* Delete slice */
1579 size_t cur;
1580 Py_ssize_t i;
1581
1582 if (slicelen <= 0)
1583 return 0;
1584
1585 /* Since we're deleting, the direction of the range doesn't matter,
1586 * so for simplicity make it always ascending.
1587 */
1588 if (step < 0) {
1589 stop = start + 1;
1590 start = stop + step * (slicelen - 1) - 1;
1591 step = -step;
1592 }
1593
1594 assert((size_t)slicelen <= PY_SIZE_MAX / sizeof(PyObject *));
1595
1596 /* recycle is a list that will contain all the children
1597 * scheduled for removal.
1598 */
1599 if (!(recycle = PyList_New(slicelen))) {
1600 PyErr_NoMemory();
1601 return -1;
1602 }
1603
1604 /* This loop walks over all the children that have to be deleted,
1605 * with cur pointing at them. num_moved is the amount of children
1606 * until the next deleted child that have to be "shifted down" to
1607 * occupy the deleted's places.
1608 * Note that in the ith iteration, shifting is done i+i places down
1609 * because i children were already removed.
1610 */
1611 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1612 /* Compute how many children have to be moved, clipping at the
1613 * list end.
1614 */
1615 Py_ssize_t num_moved = step - 1;
1616 if (cur + step >= (size_t)self->extra->length) {
1617 num_moved = self->extra->length - cur - 1;
1618 }
1619
1620 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1621
1622 memmove(
1623 self->extra->children + cur - i,
1624 self->extra->children + cur + 1,
1625 num_moved * sizeof(PyObject *));
1626 }
1627
1628 /* Leftover "tail" after the last removed child */
1629 cur = start + (size_t)slicelen * step;
1630 if (cur < (size_t)self->extra->length) {
1631 memmove(
1632 self->extra->children + cur - slicelen,
1633 self->extra->children + cur,
1634 (self->extra->length - cur) * sizeof(PyObject *));
1635 }
1636
1637 self->extra->length -= slicelen;
1638
1639 /* Discard the recycle list with all the deleted sub-elements */
1640 Py_XDECREF(recycle);
1641 return 0;
1642 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001643 else {
Eli Bendersky865756a2012-03-09 13:38:15 +02001644 /* A new slice is actually being assigned */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001645 seq = PySequence_Fast(value, "");
1646 if (!seq) {
1647 PyErr_Format(
1648 PyExc_TypeError,
1649 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1650 );
1651 return -1;
1652 }
1653 newlen = PySequence_Size(seq);
1654 }
1655
1656 if (step != 1 && newlen != slicelen)
1657 {
1658 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001659 "attempt to assign sequence of size %zd "
1660 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001661 newlen, slicelen
1662 );
1663 return -1;
1664 }
1665
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001666 /* Resize before creating the recycle bin, to prevent refleaks. */
1667 if (newlen > slicelen) {
1668 if (element_resize(self, newlen - slicelen) < 0) {
1669 if (seq) {
1670 Py_DECREF(seq);
1671 }
1672 return -1;
1673 }
1674 }
1675
1676 if (slicelen > 0) {
1677 /* to avoid recursive calls to this method (via decref), move
1678 old items to the recycle bin here, and get rid of them when
1679 we're done modifying the element */
1680 recycle = PyList_New(slicelen);
1681 if (!recycle) {
1682 if (seq) {
1683 Py_DECREF(seq);
1684 }
1685 return -1;
1686 }
1687 for (cur = start, i = 0; i < slicelen;
1688 cur += step, i++)
1689 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1690 }
1691
1692 if (newlen < slicelen) {
1693 /* delete slice */
1694 for (i = stop; i < self->extra->length; i++)
1695 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1696 } else if (newlen > slicelen) {
1697 /* insert slice */
1698 for (i = self->extra->length-1; i >= stop; i--)
1699 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1700 }
1701
1702 /* replace the slice */
1703 for (cur = start, i = 0; i < newlen;
1704 cur += step, i++) {
1705 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1706 Py_INCREF(element);
1707 self->extra->children[cur] = element;
1708 }
1709
1710 self->extra->length += newlen - slicelen;
1711
1712 if (seq) {
1713 Py_DECREF(seq);
1714 }
1715
1716 /* discard the recycle bin, and everything in it */
1717 Py_XDECREF(recycle);
1718
1719 return 0;
1720 }
1721 else {
1722 PyErr_SetString(PyExc_TypeError,
1723 "element indices must be integers");
1724 return -1;
1725 }
1726}
1727
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001728static PyMethodDef element_methods[] = {
1729
Eli Bendersky0192ba32012-03-30 16:38:33 +03001730 {"clear", (PyCFunction) element_clearmethod, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001731
Eli Benderskya8736902013-01-05 06:26:39 -08001732 {"get", (PyCFunction) element_get, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001733 {"set", (PyCFunction) element_set, METH_VARARGS},
1734
Eli Bendersky737b1732012-05-29 06:02:56 +03001735 {"find", (PyCFunction) element_find, METH_VARARGS | METH_KEYWORDS},
1736 {"findtext", (PyCFunction) element_findtext, METH_VARARGS | METH_KEYWORDS},
1737 {"findall", (PyCFunction) element_findall, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001738
1739 {"append", (PyCFunction) element_append, METH_VARARGS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001740 {"extend", (PyCFunction) element_extend, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001741 {"insert", (PyCFunction) element_insert, METH_VARARGS},
1742 {"remove", (PyCFunction) element_remove, METH_VARARGS},
1743
Eli Benderskya8736902013-01-05 06:26:39 -08001744 {"iter", (PyCFunction) element_iter, METH_VARARGS | METH_KEYWORDS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001745 {"itertext", (PyCFunction) element_itertext, METH_VARARGS},
Eli Bendersky737b1732012-05-29 06:02:56 +03001746 {"iterfind", (PyCFunction) element_iterfind, METH_VARARGS | METH_KEYWORDS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001747
Eli Benderskya8736902013-01-05 06:26:39 -08001748 {"getiterator", (PyCFunction) element_iter, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001749 {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS},
1750
1751 {"items", (PyCFunction) element_items, METH_VARARGS},
1752 {"keys", (PyCFunction) element_keys, METH_VARARGS},
1753
1754 {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS},
1755
1756 {"__copy__", (PyCFunction) element_copy, METH_VARARGS},
1757 {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS},
Martin v. Löwisbce16662012-06-17 10:41:22 +02001758 {"__sizeof__", element_sizeof, METH_NOARGS},
Eli Bendersky698bdb22013-01-10 06:01:06 -08001759 {"__getstate__", (PyCFunction)element_getstate, METH_NOARGS},
1760 {"__setstate__", (PyCFunction)element_setstate, METH_O},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001761
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001762 {NULL, NULL}
1763};
1764
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001765static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001766element_getattro(ElementObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001767{
1768 PyObject* res;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001769 char *name = "";
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001770
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001771 if (PyUnicode_Check(nameobj))
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001772 name = _PyUnicode_AsString(nameobj);
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001773
Alexander Belopolskye239d232010-12-08 23:31:48 +00001774 if (name == NULL)
1775 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001776
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001777 /* handle common attributes first */
1778 if (strcmp(name, "tag") == 0) {
1779 res = self->tag;
1780 Py_INCREF(res);
1781 return res;
1782 } else if (strcmp(name, "text") == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001783 res = element_get_text(self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001784 Py_INCREF(res);
1785 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001786 }
1787
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001788 /* methods */
1789 res = PyObject_GenericGetAttr((PyObject*) self, nameobj);
1790 if (res)
1791 return res;
1792
1793 /* less common attributes */
1794 if (strcmp(name, "tail") == 0) {
1795 PyErr_Clear();
1796 res = element_get_tail(self);
1797 } else if (strcmp(name, "attrib") == 0) {
1798 PyErr_Clear();
1799 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001800 create_extra(self, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001801 res = element_get_attrib(self);
1802 }
1803
1804 if (!res)
1805 return NULL;
1806
1807 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001808 return res;
1809}
1810
Eli Benderskyef9683b2013-05-18 07:52:34 -07001811static int
Eli Benderskyb20df952012-05-20 06:33:29 +03001812element_setattro(ElementObject* self, PyObject* nameobj, PyObject* value)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001813{
Eli Benderskyb20df952012-05-20 06:33:29 +03001814 char *name = "";
1815 if (PyUnicode_Check(nameobj))
1816 name = _PyUnicode_AsString(nameobj);
1817
Eli Benderskyef9683b2013-05-18 07:52:34 -07001818 if (name == NULL) {
1819 return -1;
1820 } else if (strcmp(name, "tag") == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001821 Py_DECREF(self->tag);
1822 self->tag = value;
1823 Py_INCREF(self->tag);
1824 } else if (strcmp(name, "text") == 0) {
1825 Py_DECREF(JOIN_OBJ(self->text));
1826 self->text = value;
1827 Py_INCREF(self->text);
1828 } else if (strcmp(name, "tail") == 0) {
1829 Py_DECREF(JOIN_OBJ(self->tail));
1830 self->tail = value;
1831 Py_INCREF(self->tail);
1832 } else if (strcmp(name, "attrib") == 0) {
1833 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001834 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001835 Py_DECREF(self->extra->attrib);
1836 self->extra->attrib = value;
1837 Py_INCREF(self->extra->attrib);
1838 } else {
Eli Benderskyef9683b2013-05-18 07:52:34 -07001839 PyErr_SetString(PyExc_AttributeError,
Eli Bendersky6a55dc32013-05-19 16:59:59 -07001840 "Can't set arbitrary attributes on Element");
Eli Benderskyef9683b2013-05-18 07:52:34 -07001841 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001842 }
1843
Eli Benderskyef9683b2013-05-18 07:52:34 -07001844 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001845}
1846
1847static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001848 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001849 0, /* sq_concat */
1850 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001851 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001852 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001853 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001854 0,
1855};
1856
1857static PyMappingMethods element_as_mapping = {
1858 (lenfunc) element_length,
1859 (binaryfunc) element_subscr,
1860 (objobjargproc) element_ass_subscr,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001861};
1862
Neal Norwitz227b5332006-03-22 09:28:35 +00001863static PyTypeObject Element_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001864 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08001865 "xml.etree.ElementTree.Element", sizeof(ElementObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001866 /* methods */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001867 (destructor)element_dealloc, /* tp_dealloc */
1868 0, /* tp_print */
1869 0, /* tp_getattr */
Eli Benderskyb20df952012-05-20 06:33:29 +03001870 0, /* tp_setattr */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001871 0, /* tp_reserved */
1872 (reprfunc)element_repr, /* tp_repr */
1873 0, /* tp_as_number */
1874 &element_as_sequence, /* tp_as_sequence */
1875 &element_as_mapping, /* tp_as_mapping */
1876 0, /* tp_hash */
1877 0, /* tp_call */
1878 0, /* tp_str */
1879 (getattrofunc)element_getattro, /* tp_getattro */
Eli Benderskyb20df952012-05-20 06:33:29 +03001880 (setattrofunc)element_setattro, /* tp_setattro */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001881 0, /* tp_as_buffer */
Eli Bendersky0192ba32012-03-30 16:38:33 +03001882 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
1883 /* tp_flags */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001884 0, /* tp_doc */
Eli Bendersky0192ba32012-03-30 16:38:33 +03001885 (traverseproc)element_gc_traverse, /* tp_traverse */
1886 (inquiry)element_gc_clear, /* tp_clear */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001887 0, /* tp_richcompare */
Eli Benderskyebf37a22012-04-03 22:02:37 +03001888 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001889 0, /* tp_iter */
1890 0, /* tp_iternext */
1891 element_methods, /* tp_methods */
1892 0, /* tp_members */
1893 0, /* tp_getset */
1894 0, /* tp_base */
1895 0, /* tp_dict */
1896 0, /* tp_descr_get */
1897 0, /* tp_descr_set */
1898 0, /* tp_dictoffset */
1899 (initproc)element_init, /* tp_init */
1900 PyType_GenericAlloc, /* tp_alloc */
1901 element_new, /* tp_new */
1902 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001903};
1904
Eli Bendersky64d11e62012-06-15 07:42:50 +03001905/******************************* Element iterator ****************************/
1906
1907/* ElementIterObject represents the iteration state over an XML element in
1908 * pre-order traversal. To keep track of which sub-element should be returned
1909 * next, a stack of parents is maintained. This is a standard stack-based
1910 * iterative pre-order traversal of a tree.
1911 * The stack is managed using a single-linked list starting at parent_stack.
1912 * Each stack node contains the saved parent to which we should return after
1913 * the current one is exhausted, and the next child to examine in that parent.
1914 */
1915typedef struct ParentLocator_t {
1916 ElementObject *parent;
1917 Py_ssize_t child_index;
1918 struct ParentLocator_t *next;
1919} ParentLocator;
1920
1921typedef struct {
1922 PyObject_HEAD
1923 ParentLocator *parent_stack;
1924 ElementObject *root_element;
1925 PyObject *sought_tag;
1926 int root_done;
1927 int gettext;
1928} ElementIterObject;
1929
1930
1931static void
1932elementiter_dealloc(ElementIterObject *it)
1933{
1934 ParentLocator *p = it->parent_stack;
1935 while (p) {
1936 ParentLocator *temp = p;
1937 Py_XDECREF(p->parent);
1938 p = p->next;
1939 PyObject_Free(temp);
1940 }
1941
1942 Py_XDECREF(it->sought_tag);
1943 Py_XDECREF(it->root_element);
1944
1945 PyObject_GC_UnTrack(it);
1946 PyObject_GC_Del(it);
1947}
1948
1949static int
1950elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
1951{
1952 ParentLocator *p = it->parent_stack;
1953 while (p) {
1954 Py_VISIT(p->parent);
1955 p = p->next;
1956 }
1957
1958 Py_VISIT(it->root_element);
1959 Py_VISIT(it->sought_tag);
1960 return 0;
1961}
1962
1963/* Helper function for elementiter_next. Add a new parent to the parent stack.
1964 */
1965static ParentLocator *
1966parent_stack_push_new(ParentLocator *stack, ElementObject *parent)
1967{
1968 ParentLocator *new_node = PyObject_Malloc(sizeof(ParentLocator));
1969 if (new_node) {
1970 new_node->parent = parent;
1971 Py_INCREF(parent);
1972 new_node->child_index = 0;
1973 new_node->next = stack;
1974 }
1975 return new_node;
1976}
1977
1978static PyObject *
1979elementiter_next(ElementIterObject *it)
1980{
1981 /* Sub-element iterator.
Eli Bendersky45839902013-01-13 05:14:47 -08001982 *
Eli Bendersky64d11e62012-06-15 07:42:50 +03001983 * A short note on gettext: this function serves both the iter() and
1984 * itertext() methods to avoid code duplication. However, there are a few
1985 * small differences in the way these iterations work. Namely:
1986 * - itertext() only yields text from nodes that have it, and continues
1987 * iterating when a node doesn't have text (so it doesn't return any
1988 * node like iter())
1989 * - itertext() also has to handle tail, after finishing with all the
1990 * children of a node.
1991 */
Eli Bendersky113da642012-06-15 07:52:49 +03001992 ElementObject *cur_parent;
1993 Py_ssize_t child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03001994
1995 while (1) {
1996 /* Handle the case reached in the beginning and end of iteration, where
1997 * the parent stack is empty. The root_done flag gives us indication
1998 * whether we've just started iterating (so root_done is 0), in which
1999 * case the root is returned. If root_done is 1 and we're here, the
2000 * iterator is exhausted.
2001 */
2002 if (!it->parent_stack->parent) {
2003 if (it->root_done) {
2004 PyErr_SetNone(PyExc_StopIteration);
2005 return NULL;
2006 } else {
2007 it->parent_stack = parent_stack_push_new(it->parent_stack,
2008 it->root_element);
2009 if (!it->parent_stack) {
2010 PyErr_NoMemory();
2011 return NULL;
2012 }
2013
2014 it->root_done = 1;
2015 if (it->sought_tag == Py_None ||
2016 PyObject_RichCompareBool(it->root_element->tag,
2017 it->sought_tag, Py_EQ) == 1) {
2018 if (it->gettext) {
Eli Benderskye6174ca2013-01-10 06:27:53 -08002019 PyObject *text = element_get_text(it->root_element);
2020 if (!text)
2021 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002022 if (PyObject_IsTrue(text)) {
2023 Py_INCREF(text);
2024 return text;
2025 }
2026 } else {
2027 Py_INCREF(it->root_element);
2028 return (PyObject *)it->root_element;
2029 }
2030 }
2031 }
2032 }
2033
2034 /* See if there are children left to traverse in the current parent. If
2035 * yes, visit the next child. If not, pop the stack and try again.
2036 */
Eli Bendersky113da642012-06-15 07:52:49 +03002037 cur_parent = it->parent_stack->parent;
2038 child_index = it->parent_stack->child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002039 if (cur_parent->extra && child_index < cur_parent->extra->length) {
2040 ElementObject *child = (ElementObject *)
2041 cur_parent->extra->children[child_index];
2042 it->parent_stack->child_index++;
2043 it->parent_stack = parent_stack_push_new(it->parent_stack,
2044 child);
2045 if (!it->parent_stack) {
2046 PyErr_NoMemory();
2047 return NULL;
2048 }
2049
2050 if (it->gettext) {
Eli Benderskye6174ca2013-01-10 06:27:53 -08002051 PyObject *text = element_get_text(child);
2052 if (!text)
2053 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002054 if (PyObject_IsTrue(text)) {
2055 Py_INCREF(text);
2056 return text;
2057 }
2058 } else if (it->sought_tag == Py_None ||
2059 PyObject_RichCompareBool(child->tag,
2060 it->sought_tag, Py_EQ) == 1) {
2061 Py_INCREF(child);
2062 return (PyObject *)child;
2063 }
2064 else
2065 continue;
2066 }
2067 else {
Eli Benderskye6174ca2013-01-10 06:27:53 -08002068 PyObject *tail;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002069 ParentLocator *next = it->parent_stack->next;
Eli Benderskye6174ca2013-01-10 06:27:53 -08002070 if (it->gettext) {
2071 tail = element_get_tail(cur_parent);
2072 if (!tail)
2073 return NULL;
2074 }
2075 else
2076 tail = Py_None;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002077 Py_XDECREF(it->parent_stack->parent);
2078 PyObject_Free(it->parent_stack);
2079 it->parent_stack = next;
2080
2081 /* Note that extra condition on it->parent_stack->parent here;
2082 * this is because itertext() is supposed to only return *inner*
2083 * text, not text following the element it began iteration with.
2084 */
2085 if (it->parent_stack->parent && PyObject_IsTrue(tail)) {
2086 Py_INCREF(tail);
2087 return tail;
2088 }
2089 }
2090 }
2091
2092 return NULL;
2093}
2094
2095
2096static PyTypeObject ElementIter_Type = {
2097 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002098 /* Using the module's name since the pure-Python implementation does not
2099 have such a type. */
Eli Bendersky64d11e62012-06-15 07:42:50 +03002100 "_elementtree._element_iterator", /* tp_name */
2101 sizeof(ElementIterObject), /* tp_basicsize */
2102 0, /* tp_itemsize */
2103 /* methods */
2104 (destructor)elementiter_dealloc, /* tp_dealloc */
2105 0, /* tp_print */
2106 0, /* tp_getattr */
2107 0, /* tp_setattr */
2108 0, /* tp_reserved */
2109 0, /* tp_repr */
2110 0, /* tp_as_number */
2111 0, /* tp_as_sequence */
2112 0, /* tp_as_mapping */
2113 0, /* tp_hash */
2114 0, /* tp_call */
2115 0, /* tp_str */
2116 0, /* tp_getattro */
2117 0, /* tp_setattro */
2118 0, /* tp_as_buffer */
2119 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2120 0, /* tp_doc */
2121 (traverseproc)elementiter_traverse, /* tp_traverse */
2122 0, /* tp_clear */
2123 0, /* tp_richcompare */
2124 0, /* tp_weaklistoffset */
2125 PyObject_SelfIter, /* tp_iter */
2126 (iternextfunc)elementiter_next, /* tp_iternext */
2127 0, /* tp_methods */
2128 0, /* tp_members */
2129 0, /* tp_getset */
2130 0, /* tp_base */
2131 0, /* tp_dict */
2132 0, /* tp_descr_get */
2133 0, /* tp_descr_set */
2134 0, /* tp_dictoffset */
2135 0, /* tp_init */
2136 0, /* tp_alloc */
2137 0, /* tp_new */
2138};
2139
2140
2141static PyObject *
2142create_elementiter(ElementObject *self, PyObject *tag, int gettext)
2143{
2144 ElementIterObject *it;
2145 PyObject *star = NULL;
2146
2147 it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
2148 if (!it)
2149 return NULL;
2150 if (!(it->parent_stack = PyObject_Malloc(sizeof(ParentLocator)))) {
2151 PyObject_GC_Del(it);
2152 return NULL;
2153 }
2154
2155 it->parent_stack->parent = NULL;
2156 it->parent_stack->child_index = 0;
2157 it->parent_stack->next = NULL;
2158
2159 if (PyUnicode_Check(tag))
2160 star = PyUnicode_FromString("*");
2161 else if (PyBytes_Check(tag))
2162 star = PyBytes_FromString("*");
2163
2164 if (star && PyObject_RichCompareBool(tag, star, Py_EQ) == 1)
2165 tag = Py_None;
2166
2167 Py_XDECREF(star);
2168 it->sought_tag = tag;
2169 it->root_done = 0;
2170 it->gettext = gettext;
2171 it->root_element = self;
2172
2173 Py_INCREF(self);
2174 Py_INCREF(tag);
2175
2176 PyObject_GC_Track(it);
2177 return (PyObject *)it;
2178}
2179
2180
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002181/* ==================================================================== */
2182/* the tree builder type */
2183
2184typedef struct {
2185 PyObject_HEAD
2186
Eli Bendersky58d548d2012-05-29 15:45:16 +03002187 PyObject *root; /* root node (first created node) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002188
Antoine Pitrouee329312012-10-04 19:53:29 +02002189 PyObject *this; /* current node */
2190 PyObject *last; /* most recently created node */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002191
Eli Bendersky58d548d2012-05-29 15:45:16 +03002192 PyObject *data; /* data collector (string or list), or NULL */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002193
Eli Bendersky58d548d2012-05-29 15:45:16 +03002194 PyObject *stack; /* element stack */
2195 Py_ssize_t index; /* current stack size (0 means empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002196
Eli Bendersky48d358b2012-05-30 17:57:50 +03002197 PyObject *element_factory;
2198
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002199 /* element tracing */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002200 PyObject *events; /* list of events, or NULL if not collecting */
2201 PyObject *start_event_obj; /* event objects (NULL to ignore) */
2202 PyObject *end_event_obj;
2203 PyObject *start_ns_event_obj;
2204 PyObject *end_ns_event_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002205} TreeBuilderObject;
2206
Neal Norwitz227b5332006-03-22 09:28:35 +00002207static PyTypeObject TreeBuilder_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002208
Christian Heimes90aa7642007-12-19 02:45:37 +00002209#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002210
2211/* -------------------------------------------------------------------- */
2212/* constructor and destructor */
2213
Eli Bendersky58d548d2012-05-29 15:45:16 +03002214static PyObject *
2215treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002216{
Eli Bendersky58d548d2012-05-29 15:45:16 +03002217 TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
2218 if (t != NULL) {
2219 t->root = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002220
Eli Bendersky58d548d2012-05-29 15:45:16 +03002221 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002222 t->this = Py_None;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002223 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002224 t->last = Py_None;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002225
Eli Bendersky58d548d2012-05-29 15:45:16 +03002226 t->data = NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002227 t->element_factory = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002228 t->stack = PyList_New(20);
2229 if (!t->stack) {
2230 Py_DECREF(t->this);
2231 Py_DECREF(t->last);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002232 Py_DECREF((PyObject *) t);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002233 return NULL;
2234 }
2235 t->index = 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002236
Eli Bendersky58d548d2012-05-29 15:45:16 +03002237 t->events = NULL;
2238 t->start_event_obj = t->end_event_obj = NULL;
2239 t->start_ns_event_obj = t->end_ns_event_obj = NULL;
2240 }
2241 return (PyObject *)t;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002242}
2243
Eli Bendersky58d548d2012-05-29 15:45:16 +03002244static int
2245treebuilder_init(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002246{
Eli Benderskyc68e1362012-06-03 06:09:42 +03002247 static char *kwlist[] = {"element_factory", 0};
Eli Bendersky48d358b2012-05-30 17:57:50 +03002248 PyObject *element_factory = NULL;
2249 TreeBuilderObject *self_tb = (TreeBuilderObject *)self;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002250 PyObject *tmp;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002251
2252 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:TreeBuilder", kwlist,
2253 &element_factory)) {
2254 return -1;
2255 }
2256
2257 if (element_factory) {
2258 Py_INCREF(element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002259 tmp = self_tb->element_factory;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002260 self_tb->element_factory = element_factory;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002261 Py_XDECREF(tmp);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002262 }
2263
Eli Bendersky58d548d2012-05-29 15:45:16 +03002264 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002265}
2266
Eli Bendersky48d358b2012-05-30 17:57:50 +03002267static int
2268treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
2269{
2270 Py_VISIT(self->root);
2271 Py_VISIT(self->this);
2272 Py_VISIT(self->last);
2273 Py_VISIT(self->data);
2274 Py_VISIT(self->stack);
2275 Py_VISIT(self->element_factory);
2276 return 0;
2277}
2278
2279static int
2280treebuilder_gc_clear(TreeBuilderObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002281{
Antoine Pitrouc1948842012-10-01 23:40:37 +02002282 Py_CLEAR(self->end_ns_event_obj);
2283 Py_CLEAR(self->start_ns_event_obj);
2284 Py_CLEAR(self->end_event_obj);
2285 Py_CLEAR(self->start_event_obj);
2286 Py_CLEAR(self->events);
2287 Py_CLEAR(self->stack);
2288 Py_CLEAR(self->data);
2289 Py_CLEAR(self->last);
2290 Py_CLEAR(self->this);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002291 Py_CLEAR(self->element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002292 Py_CLEAR(self->root);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002293 return 0;
2294}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002295
Eli Bendersky48d358b2012-05-30 17:57:50 +03002296static void
2297treebuilder_dealloc(TreeBuilderObject *self)
2298{
2299 PyObject_GC_UnTrack(self);
2300 treebuilder_gc_clear(self);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002301 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002302}
2303
2304/* -------------------------------------------------------------------- */
Antoine Pitrouee329312012-10-04 19:53:29 +02002305/* helpers for handling of arbitrary element-like objects */
2306
2307static int
2308treebuilder_set_element_text_or_tail(PyObject *element, PyObject *data,
2309 PyObject **dest, _Py_Identifier *name)
2310{
2311 if (Element_CheckExact(element)) {
2312 Py_DECREF(JOIN_OBJ(*dest));
2313 *dest = JOIN_SET(data, PyList_CheckExact(data));
2314 return 0;
2315 }
2316 else {
2317 PyObject *joined = list_join(data);
2318 int r;
2319 if (joined == NULL)
2320 return -1;
2321 r = _PyObject_SetAttrId(element, name, joined);
2322 Py_DECREF(joined);
2323 return r;
2324 }
2325}
2326
2327/* These two functions steal a reference to data */
2328static int
2329treebuilder_set_element_text(PyObject *element, PyObject *data)
2330{
2331 _Py_IDENTIFIER(text);
2332 return treebuilder_set_element_text_or_tail(
2333 element, data, &((ElementObject *) element)->text, &PyId_text);
2334}
2335
2336static int
2337treebuilder_set_element_tail(PyObject *element, PyObject *data)
2338{
2339 _Py_IDENTIFIER(tail);
2340 return treebuilder_set_element_text_or_tail(
2341 element, data, &((ElementObject *) element)->tail, &PyId_tail);
2342}
2343
2344static int
2345treebuilder_add_subelement(PyObject *element, PyObject *child)
2346{
2347 _Py_IDENTIFIER(append);
2348 if (Element_CheckExact(element)) {
2349 ElementObject *elem = (ElementObject *) element;
2350 return element_add_subelement(elem, child);
2351 }
2352 else {
2353 PyObject *res;
2354 res = _PyObject_CallMethodId(element, &PyId_append, "O", child);
2355 if (res == NULL)
2356 return -1;
2357 Py_DECREF(res);
2358 return 0;
2359 }
2360}
2361
2362/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002363/* handlers */
2364
2365LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002366treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
2367 PyObject* attrib)
2368{
2369 PyObject* node;
2370 PyObject* this;
2371
2372 if (self->data) {
2373 if (self->this == self->last) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002374 if (treebuilder_set_element_text(self->last, self->data))
2375 return NULL;
2376 }
2377 else {
2378 if (treebuilder_set_element_tail(self->last, self->data))
2379 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002380 }
2381 self->data = NULL;
2382 }
2383
Eli Bendersky08231a92013-05-18 15:47:16 -07002384 if (self->element_factory && self->element_factory != Py_None) {
Eli Bendersky48d358b2012-05-30 17:57:50 +03002385 node = PyObject_CallFunction(self->element_factory, "OO", tag, attrib);
2386 } else {
2387 node = create_new_element(tag, attrib);
2388 }
2389 if (!node) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002390 return NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002391 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002392
Antoine Pitrouee329312012-10-04 19:53:29 +02002393 this = self->this;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002394
2395 if (this != Py_None) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002396 if (treebuilder_add_subelement(this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002397 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002398 } else {
2399 if (self->root) {
2400 PyErr_SetString(
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002401 elementtree_parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002402 "multiple elements on top level"
2403 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002404 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002405 }
2406 Py_INCREF(node);
2407 self->root = node;
2408 }
2409
2410 if (self->index < PyList_GET_SIZE(self->stack)) {
2411 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002412 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002413 Py_INCREF(this);
2414 } else {
2415 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002416 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002417 }
2418 self->index++;
2419
2420 Py_DECREF(this);
2421 Py_INCREF(node);
Antoine Pitrouee329312012-10-04 19:53:29 +02002422 self->this = node;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002423
2424 Py_DECREF(self->last);
2425 Py_INCREF(node);
Antoine Pitrouee329312012-10-04 19:53:29 +02002426 self->last = node;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002427
2428 if (self->start_event_obj) {
2429 PyObject* res;
2430 PyObject* action = self->start_event_obj;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002431 res = PyTuple_Pack(2, action, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002432 if (res) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002433 PyList_Append(self->events, res);
2434 Py_DECREF(res);
2435 } else
2436 PyErr_Clear(); /* FIXME: propagate error */
2437 }
2438
2439 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002440
2441 error:
2442 Py_DECREF(node);
2443 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002444}
2445
2446LOCAL(PyObject*)
2447treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
2448{
2449 if (!self->data) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002450 if (self->last == Py_None) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00002451 /* ignore calls to data before the first call to start */
2452 Py_RETURN_NONE;
2453 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002454 /* store the first item as is */
2455 Py_INCREF(data); self->data = data;
2456 } else {
2457 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00002458 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
2459 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002460 /* XXX this code path unused in Python 3? */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002461 /* expat often generates single character data sections; handle
2462 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00002463 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
2464 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002465 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00002466 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002467 } else if (PyList_CheckExact(self->data)) {
2468 if (PyList_Append(self->data, data) < 0)
2469 return NULL;
2470 } else {
2471 PyObject* list = PyList_New(2);
2472 if (!list)
2473 return NULL;
2474 PyList_SET_ITEM(list, 0, self->data);
2475 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2476 self->data = list;
2477 }
2478 }
2479
2480 Py_RETURN_NONE;
2481}
2482
2483LOCAL(PyObject*)
2484treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2485{
2486 PyObject* item;
2487
2488 if (self->data) {
2489 if (self->this == self->last) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002490 if (treebuilder_set_element_text(self->last, self->data))
2491 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002492 } else {
Antoine Pitrouee329312012-10-04 19:53:29 +02002493 if (treebuilder_set_element_tail(self->last, self->data))
2494 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002495 }
2496 self->data = NULL;
2497 }
2498
2499 if (self->index == 0) {
2500 PyErr_SetString(
2501 PyExc_IndexError,
2502 "pop from empty stack"
2503 );
2504 return NULL;
2505 }
2506
2507 self->index--;
2508
2509 item = PyList_GET_ITEM(self->stack, self->index);
2510 Py_INCREF(item);
2511
2512 Py_DECREF(self->last);
2513
Antoine Pitrouee329312012-10-04 19:53:29 +02002514 self->last = self->this;
2515 self->this = item;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002516
2517 if (self->end_event_obj) {
2518 PyObject* res;
2519 PyObject* action = self->end_event_obj;
2520 PyObject* node = (PyObject*) self->last;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002521 res = PyTuple_Pack(2, action, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002522 if (res) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002523 PyList_Append(self->events, res);
2524 Py_DECREF(res);
2525 } else
2526 PyErr_Clear(); /* FIXME: propagate error */
2527 }
2528
2529 Py_INCREF(self->last);
2530 return (PyObject*) self->last;
2531}
2532
2533LOCAL(void)
2534treebuilder_handle_namespace(TreeBuilderObject* self, int start,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002535 PyObject *prefix, PyObject *uri)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002536{
2537 PyObject* res;
2538 PyObject* action;
2539 PyObject* parcel;
2540
2541 if (!self->events)
2542 return;
2543
2544 if (start) {
2545 if (!self->start_ns_event_obj)
2546 return;
2547 action = self->start_ns_event_obj;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002548 parcel = Py_BuildValue("OO", prefix, uri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002549 if (!parcel)
2550 return;
2551 Py_INCREF(action);
2552 } else {
2553 if (!self->end_ns_event_obj)
2554 return;
2555 action = self->end_ns_event_obj;
2556 Py_INCREF(action);
2557 parcel = Py_None;
2558 Py_INCREF(parcel);
2559 }
2560
2561 res = PyTuple_New(2);
2562
2563 if (res) {
2564 PyTuple_SET_ITEM(res, 0, action);
2565 PyTuple_SET_ITEM(res, 1, parcel);
2566 PyList_Append(self->events, res);
2567 Py_DECREF(res);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002568 }
2569 else {
2570 Py_DECREF(action);
2571 Py_DECREF(parcel);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002572 PyErr_Clear(); /* FIXME: propagate error */
Antoine Pitrouc1948842012-10-01 23:40:37 +02002573 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002574}
2575
2576/* -------------------------------------------------------------------- */
2577/* methods (in alphabetical order) */
2578
2579static PyObject*
2580treebuilder_data(TreeBuilderObject* self, PyObject* args)
2581{
2582 PyObject* data;
2583 if (!PyArg_ParseTuple(args, "O:data", &data))
2584 return NULL;
2585
2586 return treebuilder_handle_data(self, data);
2587}
2588
2589static PyObject*
2590treebuilder_end(TreeBuilderObject* self, PyObject* args)
2591{
2592 PyObject* tag;
2593 if (!PyArg_ParseTuple(args, "O:end", &tag))
2594 return NULL;
2595
2596 return treebuilder_handle_end(self, tag);
2597}
2598
2599LOCAL(PyObject*)
2600treebuilder_done(TreeBuilderObject* self)
2601{
2602 PyObject* res;
2603
2604 /* FIXME: check stack size? */
2605
2606 if (self->root)
2607 res = self->root;
2608 else
2609 res = Py_None;
2610
2611 Py_INCREF(res);
2612 return res;
2613}
2614
2615static PyObject*
2616treebuilder_close(TreeBuilderObject* self, PyObject* args)
2617{
2618 if (!PyArg_ParseTuple(args, ":close"))
2619 return NULL;
2620
2621 return treebuilder_done(self);
2622}
2623
2624static PyObject*
2625treebuilder_start(TreeBuilderObject* self, PyObject* args)
2626{
2627 PyObject* tag;
2628 PyObject* attrib = Py_None;
2629 if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib))
2630 return NULL;
2631
2632 return treebuilder_handle_start(self, tag, attrib);
2633}
2634
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002635static PyMethodDef treebuilder_methods[] = {
2636 {"data", (PyCFunction) treebuilder_data, METH_VARARGS},
2637 {"start", (PyCFunction) treebuilder_start, METH_VARARGS},
2638 {"end", (PyCFunction) treebuilder_end, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002639 {"close", (PyCFunction) treebuilder_close, METH_VARARGS},
2640 {NULL, NULL}
2641};
2642
Neal Norwitz227b5332006-03-22 09:28:35 +00002643static PyTypeObject TreeBuilder_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002644 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002645 "xml.etree.ElementTree.TreeBuilder", sizeof(TreeBuilderObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002646 /* methods */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002647 (destructor)treebuilder_dealloc, /* tp_dealloc */
2648 0, /* tp_print */
2649 0, /* tp_getattr */
2650 0, /* tp_setattr */
2651 0, /* tp_reserved */
2652 0, /* tp_repr */
2653 0, /* tp_as_number */
2654 0, /* tp_as_sequence */
2655 0, /* tp_as_mapping */
2656 0, /* tp_hash */
2657 0, /* tp_call */
2658 0, /* tp_str */
2659 0, /* tp_getattro */
2660 0, /* tp_setattro */
2661 0, /* tp_as_buffer */
Eli Bendersky48d358b2012-05-30 17:57:50 +03002662 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
2663 /* tp_flags */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002664 0, /* tp_doc */
Eli Bendersky48d358b2012-05-30 17:57:50 +03002665 (traverseproc)treebuilder_gc_traverse, /* tp_traverse */
2666 (inquiry)treebuilder_gc_clear, /* tp_clear */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002667 0, /* tp_richcompare */
2668 0, /* tp_weaklistoffset */
2669 0, /* tp_iter */
2670 0, /* tp_iternext */
2671 treebuilder_methods, /* tp_methods */
2672 0, /* tp_members */
2673 0, /* tp_getset */
2674 0, /* tp_base */
2675 0, /* tp_dict */
2676 0, /* tp_descr_get */
2677 0, /* tp_descr_set */
2678 0, /* tp_dictoffset */
2679 (initproc)treebuilder_init, /* tp_init */
2680 PyType_GenericAlloc, /* tp_alloc */
2681 treebuilder_new, /* tp_new */
2682 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002683};
2684
2685/* ==================================================================== */
2686/* the expat interface */
2687
2688#if defined(USE_EXPAT)
2689
2690#include "expat.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002691#include "pyexpat.h"
Eli Bendersky20d41742012-06-01 09:48:37 +03002692static struct PyExpat_CAPI *expat_capi;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002693#define EXPAT(func) (expat_capi->func)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002694
Eli Bendersky52467b12012-06-01 07:13:08 +03002695static XML_Memory_Handling_Suite ExpatMemoryHandler = {
2696 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
2697
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002698typedef struct {
2699 PyObject_HEAD
2700
2701 XML_Parser parser;
2702
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002703 PyObject *target;
2704 PyObject *entity;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002705
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002706 PyObject *names;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002707
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002708 PyObject *handle_start;
2709 PyObject *handle_data;
2710 PyObject *handle_end;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002711
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002712 PyObject *handle_comment;
2713 PyObject *handle_pi;
2714 PyObject *handle_doctype;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002715
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002716 PyObject *handle_close;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002717
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002718} XMLParserObject;
2719
Neal Norwitz227b5332006-03-22 09:28:35 +00002720static PyTypeObject XMLParser_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002721
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002722#define XMLParser_CheckExact(op) (Py_TYPE(op) == &XMLParser_Type)
2723
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002724/* helpers */
2725
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002726LOCAL(PyObject*)
2727makeuniversal(XMLParserObject* self, const char* string)
2728{
2729 /* convert a UTF-8 tag/attribute name from the expat parser
2730 to a universal name string */
2731
Antoine Pitrouc1948842012-10-01 23:40:37 +02002732 Py_ssize_t size = (Py_ssize_t) strlen(string);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002733 PyObject* key;
2734 PyObject* value;
2735
2736 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00002737 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002738 if (!key)
2739 return NULL;
2740
2741 value = PyDict_GetItem(self->names, key);
2742
2743 if (value) {
2744 Py_INCREF(value);
2745 } else {
2746 /* new name. convert to universal name, and decode as
2747 necessary */
2748
2749 PyObject* tag;
2750 char* p;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002751 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002752
2753 /* look for namespace separator */
2754 for (i = 0; i < size; i++)
2755 if (string[i] == '}')
2756 break;
2757 if (i != size) {
2758 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002759 tag = PyBytes_FromStringAndSize(NULL, size+1);
2760 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002761 p[0] = '{';
2762 memcpy(p+1, string, size);
2763 size++;
2764 } else {
2765 /* plain name; use key as tag */
2766 Py_INCREF(key);
2767 tag = key;
2768 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002769
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002770 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002771 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00002772 value = PyUnicode_DecodeUTF8(p, size, "strict");
2773 Py_DECREF(tag);
2774 if (!value) {
2775 Py_DECREF(key);
2776 return NULL;
2777 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002778
2779 /* add to names dictionary */
2780 if (PyDict_SetItem(self->names, key, value) < 0) {
2781 Py_DECREF(key);
2782 Py_DECREF(value);
2783 return NULL;
2784 }
2785 }
2786
2787 Py_DECREF(key);
2788 return value;
2789}
2790
Eli Bendersky5b77d812012-03-16 08:20:05 +02002791/* Set the ParseError exception with the given parameters.
2792 * If message is not NULL, it's used as the error string. Otherwise, the
2793 * message string is the default for the given error_code.
2794*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002795static void
Eli Bendersky5b77d812012-03-16 08:20:05 +02002796expat_set_error(enum XML_Error error_code, int line, int column, char *message)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002797{
Eli Bendersky5b77d812012-03-16 08:20:05 +02002798 PyObject *errmsg, *error, *position, *code;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002799
Victor Stinner499dfcf2011-03-21 13:26:24 +01002800 errmsg = PyUnicode_FromFormat("%s: line %d, column %d",
Eli Bendersky5b77d812012-03-16 08:20:05 +02002801 message ? message : EXPAT(ErrorString)(error_code),
2802 line, column);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002803 if (errmsg == NULL)
2804 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002805
Victor Stinner499dfcf2011-03-21 13:26:24 +01002806 error = PyObject_CallFunction(elementtree_parseerror_obj, "O", errmsg);
2807 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002808 if (!error)
2809 return;
2810
Eli Bendersky5b77d812012-03-16 08:20:05 +02002811 /* Add code and position attributes */
2812 code = PyLong_FromLong((long)error_code);
2813 if (!code) {
2814 Py_DECREF(error);
2815 return;
2816 }
2817 if (PyObject_SetAttrString(error, "code", code) == -1) {
2818 Py_DECREF(error);
2819 Py_DECREF(code);
2820 return;
2821 }
2822 Py_DECREF(code);
2823
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002824 position = Py_BuildValue("(ii)", line, column);
2825 if (!position) {
2826 Py_DECREF(error);
2827 return;
2828 }
2829 if (PyObject_SetAttrString(error, "position", position) == -1) {
2830 Py_DECREF(error);
2831 Py_DECREF(position);
2832 return;
2833 }
2834 Py_DECREF(position);
2835
2836 PyErr_SetObject(elementtree_parseerror_obj, error);
2837 Py_DECREF(error);
2838}
2839
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002840/* -------------------------------------------------------------------- */
2841/* handlers */
2842
2843static void
2844expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2845 int data_len)
2846{
2847 PyObject* key;
2848 PyObject* value;
2849 PyObject* res;
2850
2851 if (data_len < 2 || data_in[0] != '&')
2852 return;
2853
Neal Norwitz0269b912007-08-08 06:56:02 +00002854 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002855 if (!key)
2856 return;
2857
2858 value = PyDict_GetItem(self->entity, key);
2859
2860 if (value) {
2861 if (TreeBuilder_CheckExact(self->target))
2862 res = treebuilder_handle_data(
2863 (TreeBuilderObject*) self->target, value
2864 );
2865 else if (self->handle_data)
2866 res = PyObject_CallFunction(self->handle_data, "O", value);
2867 else
2868 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002869 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002870 } else if (!PyErr_Occurred()) {
2871 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00002872 char message[128] = "undefined entity ";
2873 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002874 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02002875 XML_ERROR_UNDEFINED_ENTITY,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002876 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02002877 EXPAT(GetErrorColumnNumber)(self->parser),
2878 message
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002879 );
2880 }
2881
2882 Py_DECREF(key);
2883}
2884
2885static void
2886expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2887 const XML_Char **attrib_in)
2888{
2889 PyObject* res;
2890 PyObject* tag;
2891 PyObject* attrib;
2892 int ok;
2893
2894 /* tag name */
2895 tag = makeuniversal(self, tag_in);
2896 if (!tag)
2897 return; /* parser will look for errors */
2898
2899 /* attributes */
2900 if (attrib_in[0]) {
2901 attrib = PyDict_New();
2902 if (!attrib)
2903 return;
2904 while (attrib_in[0] && attrib_in[1]) {
2905 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00002906 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002907 if (!key || !value) {
2908 Py_XDECREF(value);
2909 Py_XDECREF(key);
2910 Py_DECREF(attrib);
2911 return;
2912 }
2913 ok = PyDict_SetItem(attrib, key, value);
2914 Py_DECREF(value);
2915 Py_DECREF(key);
2916 if (ok < 0) {
2917 Py_DECREF(attrib);
2918 return;
2919 }
2920 attrib_in += 2;
2921 }
2922 } else {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002923 /* Pass an empty dictionary on */
Eli Bendersky48d358b2012-05-30 17:57:50 +03002924 attrib = PyDict_New();
2925 if (!attrib)
2926 return;
2927 }
2928
2929 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002930 /* shortcut */
2931 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2932 tag, attrib);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002933 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002934 else if (self->handle_start) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002935 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002936 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002937 res = NULL;
2938
2939 Py_DECREF(tag);
2940 Py_DECREF(attrib);
2941
2942 Py_XDECREF(res);
2943}
2944
2945static void
2946expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2947 int data_len)
2948{
2949 PyObject* data;
2950 PyObject* res;
2951
Neal Norwitz0269b912007-08-08 06:56:02 +00002952 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002953 if (!data)
2954 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002955
2956 if (TreeBuilder_CheckExact(self->target))
2957 /* shortcut */
2958 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
2959 else if (self->handle_data)
2960 res = PyObject_CallFunction(self->handle_data, "O", data);
2961 else
2962 res = NULL;
2963
2964 Py_DECREF(data);
2965
2966 Py_XDECREF(res);
2967}
2968
2969static void
2970expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
2971{
2972 PyObject* tag;
2973 PyObject* res = NULL;
2974
2975 if (TreeBuilder_CheckExact(self->target))
2976 /* shortcut */
2977 /* the standard tree builder doesn't look at the end tag */
2978 res = treebuilder_handle_end(
2979 (TreeBuilderObject*) self->target, Py_None
2980 );
2981 else if (self->handle_end) {
2982 tag = makeuniversal(self, tag_in);
2983 if (tag) {
2984 res = PyObject_CallFunction(self->handle_end, "O", tag);
2985 Py_DECREF(tag);
2986 }
2987 }
2988
2989 Py_XDECREF(res);
2990}
2991
2992static void
2993expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
2994 const XML_Char *uri)
2995{
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002996 PyObject* sprefix = NULL;
2997 PyObject* suri = NULL;
2998
2999 suri = PyUnicode_DecodeUTF8(uri, strlen(uri), "strict");
3000 if (!suri)
3001 return;
3002
3003 if (prefix)
3004 sprefix = PyUnicode_DecodeUTF8(prefix, strlen(prefix), "strict");
3005 else
3006 sprefix = PyUnicode_FromString("");
3007 if (!sprefix) {
3008 Py_DECREF(suri);
3009 return;
3010 }
3011
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003012 treebuilder_handle_namespace(
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003013 (TreeBuilderObject*) self->target, 1, sprefix, suri
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003014 );
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003015
3016 Py_DECREF(sprefix);
3017 Py_DECREF(suri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003018}
3019
3020static void
3021expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
3022{
3023 treebuilder_handle_namespace(
3024 (TreeBuilderObject*) self->target, 0, NULL, NULL
3025 );
3026}
3027
3028static void
3029expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
3030{
3031 PyObject* comment;
3032 PyObject* res;
3033
3034 if (self->handle_comment) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003035 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003036 if (comment) {
3037 res = PyObject_CallFunction(self->handle_comment, "O", comment);
3038 Py_XDECREF(res);
3039 Py_DECREF(comment);
3040 }
3041 }
3042}
3043
Eli Bendersky45839902013-01-13 05:14:47 -08003044static void
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003045expat_start_doctype_handler(XMLParserObject *self,
3046 const XML_Char *doctype_name,
3047 const XML_Char *sysid,
3048 const XML_Char *pubid,
3049 int has_internal_subset)
3050{
3051 PyObject *self_pyobj = (PyObject *)self;
3052 PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
3053 PyObject *parser_doctype = NULL;
3054 PyObject *res = NULL;
3055
3056 doctype_name_obj = makeuniversal(self, doctype_name);
3057 if (!doctype_name_obj)
3058 return;
3059
3060 if (sysid) {
3061 sysid_obj = makeuniversal(self, sysid);
3062 if (!sysid_obj) {
3063 Py_DECREF(doctype_name_obj);
3064 return;
3065 }
3066 } else {
3067 Py_INCREF(Py_None);
3068 sysid_obj = Py_None;
3069 }
3070
3071 if (pubid) {
3072 pubid_obj = makeuniversal(self, pubid);
3073 if (!pubid_obj) {
3074 Py_DECREF(doctype_name_obj);
3075 Py_DECREF(sysid_obj);
3076 return;
3077 }
3078 } else {
3079 Py_INCREF(Py_None);
3080 pubid_obj = Py_None;
3081 }
3082
3083 /* If the target has a handler for doctype, call it. */
3084 if (self->handle_doctype) {
3085 res = PyObject_CallFunction(self->handle_doctype, "OOO",
3086 doctype_name_obj, pubid_obj, sysid_obj);
3087 Py_CLEAR(res);
3088 }
3089
3090 /* Now see if the parser itself has a doctype method. If yes and it's
3091 * a subclass, call it but warn about deprecation. If it's not a subclass
3092 * (i.e. vanilla XMLParser), do nothing.
3093 */
3094 parser_doctype = PyObject_GetAttrString(self_pyobj, "doctype");
3095 if (parser_doctype) {
3096 if (!XMLParser_CheckExact(self_pyobj)) {
3097 if (PyErr_WarnEx(PyExc_DeprecationWarning,
3098 "This method of XMLParser is deprecated. Define"
3099 " doctype() method on the TreeBuilder target.",
3100 1) < 0) {
3101 goto clear;
3102 }
3103 res = PyObject_CallFunction(parser_doctype, "OOO",
3104 doctype_name_obj, pubid_obj, sysid_obj);
3105 Py_CLEAR(res);
3106 }
3107 }
3108
3109clear:
3110 Py_XDECREF(parser_doctype);
3111 Py_DECREF(doctype_name_obj);
3112 Py_DECREF(pubid_obj);
3113 Py_DECREF(sysid_obj);
3114}
3115
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003116static void
3117expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
3118 const XML_Char* data_in)
3119{
3120 PyObject* target;
3121 PyObject* data;
3122 PyObject* res;
3123
3124 if (self->handle_pi) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003125 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3126 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003127 if (target && data) {
3128 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
3129 Py_XDECREF(res);
3130 Py_DECREF(data);
3131 Py_DECREF(target);
3132 } else {
3133 Py_XDECREF(data);
3134 Py_XDECREF(target);
3135 }
3136 }
3137}
3138
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003139static int
3140expat_unknown_encoding_handler(XMLParserObject *self, const XML_Char *name,
3141 XML_Encoding *info)
3142{
3143 PyObject* u;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003144 unsigned char s[256];
3145 int i;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003146 void *data;
3147 unsigned int kind;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003148
3149 memset(info, 0, sizeof(XML_Encoding));
3150
3151 for (i = 0; i < 256; i++)
3152 s[i] = i;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003153
Fredrik Lundhc3389992005-12-25 11:40:19 +00003154 u = PyUnicode_Decode((char*) s, 256, name, "replace");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003155 if (!u)
3156 return XML_STATUS_ERROR;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003157 if (PyUnicode_READY(u))
3158 return XML_STATUS_ERROR;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003159
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003160 if (PyUnicode_GET_LENGTH(u) != 256) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003161 Py_DECREF(u);
3162 return XML_STATUS_ERROR;
3163 }
3164
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003165 kind = PyUnicode_KIND(u);
3166 data = PyUnicode_DATA(u);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003167 for (i = 0; i < 256; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003168 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
3169 if (ch != Py_UNICODE_REPLACEMENT_CHARACTER)
3170 info->map[i] = ch;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003171 else
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003172 info->map[i] = -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003173 }
3174
3175 Py_DECREF(u);
3176
3177 return XML_STATUS_OK;
3178}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003179
3180/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003181
Eli Bendersky52467b12012-06-01 07:13:08 +03003182static PyObject *
3183xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003184{
Eli Bendersky52467b12012-06-01 07:13:08 +03003185 XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
3186 if (self) {
3187 self->parser = NULL;
3188 self->target = self->entity = self->names = NULL;
3189 self->handle_start = self->handle_data = self->handle_end = NULL;
3190 self->handle_comment = self->handle_pi = self->handle_close = NULL;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003191 self->handle_doctype = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003192 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003193 return (PyObject *)self;
3194}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003195
Eli Bendersky52467b12012-06-01 07:13:08 +03003196static int
3197xmlparser_init(PyObject *self, PyObject *args, PyObject *kwds)
3198{
3199 XMLParserObject *self_xp = (XMLParserObject *)self;
3200 PyObject *target = NULL, *html = NULL;
3201 char *encoding = NULL;
Eli Benderskyc68e1362012-06-03 06:09:42 +03003202 static char *kwlist[] = {"html", "target", "encoding", 0};
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003203
Eli Bendersky52467b12012-06-01 07:13:08 +03003204 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|OOz:XMLParser", kwlist,
3205 &html, &target, &encoding)) {
3206 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003207 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003208
Eli Bendersky52467b12012-06-01 07:13:08 +03003209 self_xp->entity = PyDict_New();
3210 if (!self_xp->entity)
3211 return -1;
3212
3213 self_xp->names = PyDict_New();
3214 if (!self_xp->names) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02003215 Py_CLEAR(self_xp->entity);
Eli Bendersky52467b12012-06-01 07:13:08 +03003216 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003217 }
3218
Eli Bendersky52467b12012-06-01 07:13:08 +03003219 self_xp->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
3220 if (!self_xp->parser) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02003221 Py_CLEAR(self_xp->entity);
3222 Py_CLEAR(self_xp->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003223 PyErr_NoMemory();
Eli Bendersky52467b12012-06-01 07:13:08 +03003224 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003225 }
3226
Eli Bendersky52467b12012-06-01 07:13:08 +03003227 if (target) {
3228 Py_INCREF(target);
3229 } else {
Eli Bendersky58d548d2012-05-29 15:45:16 +03003230 target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003231 if (!target) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02003232 Py_CLEAR(self_xp->entity);
3233 Py_CLEAR(self_xp->names);
Eli Bendersky52467b12012-06-01 07:13:08 +03003234 EXPAT(ParserFree)(self_xp->parser);
3235 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003236 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003237 }
3238 self_xp->target = target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003239
Eli Bendersky52467b12012-06-01 07:13:08 +03003240 self_xp->handle_start = PyObject_GetAttrString(target, "start");
3241 self_xp->handle_data = PyObject_GetAttrString(target, "data");
3242 self_xp->handle_end = PyObject_GetAttrString(target, "end");
3243 self_xp->handle_comment = PyObject_GetAttrString(target, "comment");
3244 self_xp->handle_pi = PyObject_GetAttrString(target, "pi");
3245 self_xp->handle_close = PyObject_GetAttrString(target, "close");
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003246 self_xp->handle_doctype = PyObject_GetAttrString(target, "doctype");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003247
3248 PyErr_Clear();
Eli Bendersky45839902013-01-13 05:14:47 -08003249
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003250 /* configure parser */
Eli Bendersky52467b12012-06-01 07:13:08 +03003251 EXPAT(SetUserData)(self_xp->parser, self_xp);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003252 EXPAT(SetElementHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003253 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003254 (XML_StartElementHandler) expat_start_handler,
3255 (XML_EndElementHandler) expat_end_handler
3256 );
3257 EXPAT(SetDefaultHandlerExpand)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003258 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003259 (XML_DefaultHandler) expat_default_handler
3260 );
3261 EXPAT(SetCharacterDataHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003262 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003263 (XML_CharacterDataHandler) expat_data_handler
3264 );
Eli Bendersky52467b12012-06-01 07:13:08 +03003265 if (self_xp->handle_comment)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003266 EXPAT(SetCommentHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003267 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003268 (XML_CommentHandler) expat_comment_handler
3269 );
Eli Bendersky52467b12012-06-01 07:13:08 +03003270 if (self_xp->handle_pi)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003271 EXPAT(SetProcessingInstructionHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003272 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003273 (XML_ProcessingInstructionHandler) expat_pi_handler
3274 );
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003275 EXPAT(SetStartDoctypeDeclHandler)(
3276 self_xp->parser,
3277 (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
3278 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003279 EXPAT(SetUnknownEncodingHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003280 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003281 (XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL
3282 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003283
Eli Bendersky52467b12012-06-01 07:13:08 +03003284 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003285}
3286
Eli Bendersky52467b12012-06-01 07:13:08 +03003287static int
3288xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
3289{
3290 Py_VISIT(self->handle_close);
3291 Py_VISIT(self->handle_pi);
3292 Py_VISIT(self->handle_comment);
3293 Py_VISIT(self->handle_end);
3294 Py_VISIT(self->handle_data);
3295 Py_VISIT(self->handle_start);
3296
3297 Py_VISIT(self->target);
3298 Py_VISIT(self->entity);
3299 Py_VISIT(self->names);
3300
3301 return 0;
3302}
3303
3304static int
3305xmlparser_gc_clear(XMLParserObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003306{
3307 EXPAT(ParserFree)(self->parser);
3308
Antoine Pitrouc1948842012-10-01 23:40:37 +02003309 Py_CLEAR(self->handle_close);
3310 Py_CLEAR(self->handle_pi);
3311 Py_CLEAR(self->handle_comment);
3312 Py_CLEAR(self->handle_end);
3313 Py_CLEAR(self->handle_data);
3314 Py_CLEAR(self->handle_start);
3315 Py_CLEAR(self->handle_doctype);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003316
Antoine Pitrouc1948842012-10-01 23:40:37 +02003317 Py_CLEAR(self->target);
3318 Py_CLEAR(self->entity);
3319 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003320
Eli Bendersky52467b12012-06-01 07:13:08 +03003321 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003322}
3323
Eli Bendersky52467b12012-06-01 07:13:08 +03003324static void
3325xmlparser_dealloc(XMLParserObject* self)
3326{
3327 PyObject_GC_UnTrack(self);
3328 xmlparser_gc_clear(self);
3329 Py_TYPE(self)->tp_free((PyObject *)self);
3330}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003331
3332LOCAL(PyObject*)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003333expat_parse(XMLParserObject* self, const char* data, int data_len, int final)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003334{
3335 int ok;
3336
3337 ok = EXPAT(Parse)(self->parser, data, data_len, final);
3338
3339 if (PyErr_Occurred())
3340 return NULL;
3341
3342 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003343 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02003344 EXPAT(GetErrorCode)(self->parser),
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003345 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02003346 EXPAT(GetErrorColumnNumber)(self->parser),
3347 NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003348 );
3349 return NULL;
3350 }
3351
3352 Py_RETURN_NONE;
3353}
3354
3355static PyObject*
3356xmlparser_close(XMLParserObject* self, PyObject* args)
3357{
3358 /* end feeding data to parser */
3359
3360 PyObject* res;
3361 if (!PyArg_ParseTuple(args, ":close"))
3362 return NULL;
3363
3364 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003365 if (!res)
3366 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003367
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003368 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003369 Py_DECREF(res);
3370 return treebuilder_done((TreeBuilderObject*) self->target);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003371 } if (self->handle_close) {
3372 Py_DECREF(res);
3373 return PyObject_CallFunction(self->handle_close, "");
3374 } else
3375 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003376}
3377
3378static PyObject*
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003379xmlparser_feed(XMLParserObject* self, PyObject* arg)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003380{
3381 /* feed data to parser */
3382
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003383 if (PyUnicode_Check(arg)) {
3384 Py_ssize_t data_len;
3385 const char *data = PyUnicode_AsUTF8AndSize(arg, &data_len);
3386 if (data == NULL)
3387 return NULL;
3388 if (data_len > INT_MAX) {
3389 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3390 return NULL;
3391 }
3392 /* Explicitly set UTF-8 encoding. Return code ignored. */
3393 (void)EXPAT(SetEncoding)(self->parser, "utf-8");
3394 return expat_parse(self, data, (int)data_len, 0);
3395 }
3396 else {
3397 Py_buffer view;
3398 PyObject *res;
3399 if (PyObject_GetBuffer(arg, &view, PyBUF_SIMPLE) < 0)
3400 return NULL;
3401 if (view.len > INT_MAX) {
3402 PyBuffer_Release(&view);
3403 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3404 return NULL;
3405 }
3406 res = expat_parse(self, view.buf, (int)view.len, 0);
3407 PyBuffer_Release(&view);
3408 return res;
3409 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003410}
3411
3412static PyObject*
3413xmlparser_parse(XMLParserObject* self, PyObject* args)
3414{
3415 /* (internal) parse until end of input stream */
3416
3417 PyObject* reader;
3418 PyObject* buffer;
Eli Benderskyf996e772012-03-16 05:53:30 +02003419 PyObject* temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003420 PyObject* res;
3421
3422 PyObject* fileobj;
3423 if (!PyArg_ParseTuple(args, "O:_parse", &fileobj))
3424 return NULL;
3425
3426 reader = PyObject_GetAttrString(fileobj, "read");
3427 if (!reader)
3428 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003429
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003430 /* read from open file object */
3431 for (;;) {
3432
3433 buffer = PyObject_CallFunction(reader, "i", 64*1024);
3434
3435 if (!buffer) {
3436 /* read failed (e.g. due to KeyboardInterrupt) */
3437 Py_DECREF(reader);
3438 return NULL;
3439 }
3440
Eli Benderskyf996e772012-03-16 05:53:30 +02003441 if (PyUnicode_CheckExact(buffer)) {
3442 /* A unicode object is encoded into bytes using UTF-8 */
3443 if (PyUnicode_GET_SIZE(buffer) == 0) {
3444 Py_DECREF(buffer);
3445 break;
3446 }
3447 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
Antoine Pitrouc1948842012-10-01 23:40:37 +02003448 Py_DECREF(buffer);
Eli Benderskyf996e772012-03-16 05:53:30 +02003449 if (!temp) {
3450 /* Propagate exception from PyUnicode_AsEncodedString */
Eli Benderskyf996e772012-03-16 05:53:30 +02003451 Py_DECREF(reader);
3452 return NULL;
3453 }
Eli Benderskyf996e772012-03-16 05:53:30 +02003454 buffer = temp;
3455 }
3456 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003457 Py_DECREF(buffer);
3458 break;
3459 }
3460
3461 res = expat_parse(
Christian Heimes72b710a2008-05-26 13:28:38 +00003462 self, PyBytes_AS_STRING(buffer), PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003463 );
3464
3465 Py_DECREF(buffer);
3466
3467 if (!res) {
3468 Py_DECREF(reader);
3469 return NULL;
3470 }
3471 Py_DECREF(res);
3472
3473 }
3474
3475 Py_DECREF(reader);
3476
3477 res = expat_parse(self, "", 0, 1);
3478
3479 if (res && TreeBuilder_CheckExact(self->target)) {
3480 Py_DECREF(res);
3481 return treebuilder_done((TreeBuilderObject*) self->target);
3482 }
3483
3484 return res;
3485}
3486
3487static PyObject*
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003488xmlparser_doctype(XMLParserObject *self, PyObject *args)
3489{
3490 Py_RETURN_NONE;
3491}
3492
3493static PyObject*
3494xmlparser_setevents(XMLParserObject *self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003495{
3496 /* activate element event reporting */
3497
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003498 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003499 TreeBuilderObject* target;
3500
3501 PyObject* events; /* event collector */
3502 PyObject* event_set = Py_None;
3503 if (!PyArg_ParseTuple(args, "O!|O:_setevents", &PyList_Type, &events,
3504 &event_set))
3505 return NULL;
3506
3507 if (!TreeBuilder_CheckExact(self->target)) {
3508 PyErr_SetString(
3509 PyExc_TypeError,
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003510 "event handling only supported for ElementTree.TreeBuilder "
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003511 "targets"
3512 );
3513 return NULL;
3514 }
3515
3516 target = (TreeBuilderObject*) self->target;
3517
3518 Py_INCREF(events);
3519 Py_XDECREF(target->events);
3520 target->events = events;
3521
3522 /* clear out existing events */
Antoine Pitrouc1948842012-10-01 23:40:37 +02003523 Py_CLEAR(target->start_event_obj);
3524 Py_CLEAR(target->end_event_obj);
3525 Py_CLEAR(target->start_ns_event_obj);
3526 Py_CLEAR(target->end_ns_event_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003527
3528 if (event_set == Py_None) {
3529 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003530 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003531 Py_RETURN_NONE;
3532 }
3533
3534 if (!PyTuple_Check(event_set)) /* FIXME: handle arbitrary sequences */
3535 goto error;
3536
3537 for (i = 0; i < PyTuple_GET_SIZE(event_set); i++) {
3538 PyObject* item = PyTuple_GET_ITEM(event_set, i);
3539 char* event;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003540 if (PyUnicode_Check(item)) {
3541 event = _PyUnicode_AsString(item);
Victor Stinner0477bf32010-03-22 12:11:44 +00003542 if (event == NULL)
3543 goto error;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003544 } else if (PyBytes_Check(item))
3545 event = PyBytes_AS_STRING(item);
3546 else {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003547 goto error;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003548 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003549 if (strcmp(event, "start") == 0) {
3550 Py_INCREF(item);
3551 target->start_event_obj = item;
3552 } else if (strcmp(event, "end") == 0) {
3553 Py_INCREF(item);
3554 Py_XDECREF(target->end_event_obj);
3555 target->end_event_obj = item;
3556 } else if (strcmp(event, "start-ns") == 0) {
3557 Py_INCREF(item);
3558 Py_XDECREF(target->start_ns_event_obj);
3559 target->start_ns_event_obj = item;
3560 EXPAT(SetNamespaceDeclHandler)(
3561 self->parser,
3562 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3563 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3564 );
3565 } else if (strcmp(event, "end-ns") == 0) {
3566 Py_INCREF(item);
3567 Py_XDECREF(target->end_ns_event_obj);
3568 target->end_ns_event_obj = item;
3569 EXPAT(SetNamespaceDeclHandler)(
3570 self->parser,
3571 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3572 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3573 );
3574 } else {
3575 PyErr_Format(
3576 PyExc_ValueError,
3577 "unknown event '%s'", event
3578 );
3579 return NULL;
3580 }
3581 }
3582
3583 Py_RETURN_NONE;
3584
3585 error:
3586 PyErr_SetString(
3587 PyExc_TypeError,
3588 "invalid event tuple"
3589 );
3590 return NULL;
3591}
3592
3593static PyMethodDef xmlparser_methods[] = {
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003594 {"feed", (PyCFunction) xmlparser_feed, METH_O},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003595 {"close", (PyCFunction) xmlparser_close, METH_VARARGS},
3596 {"_parse", (PyCFunction) xmlparser_parse, METH_VARARGS},
3597 {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003598 {"doctype", (PyCFunction) xmlparser_doctype, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003599 {NULL, NULL}
3600};
3601
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003602static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003603xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003604{
Alexander Belopolskye239d232010-12-08 23:31:48 +00003605 if (PyUnicode_Check(nameobj)) {
3606 PyObject* res;
3607 if (PyUnicode_CompareWithASCIIString(nameobj, "entity") == 0)
3608 res = self->entity;
3609 else if (PyUnicode_CompareWithASCIIString(nameobj, "target") == 0)
3610 res = self->target;
3611 else if (PyUnicode_CompareWithASCIIString(nameobj, "version") == 0) {
3612 return PyUnicode_FromFormat(
3613 "Expat %d.%d.%d", XML_MAJOR_VERSION,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003614 XML_MINOR_VERSION, XML_MICRO_VERSION);
Alexander Belopolskye239d232010-12-08 23:31:48 +00003615 }
3616 else
3617 goto generic;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003618
Alexander Belopolskye239d232010-12-08 23:31:48 +00003619 Py_INCREF(res);
3620 return res;
3621 }
3622 generic:
3623 return PyObject_GenericGetAttr((PyObject*) self, nameobj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003624}
3625
Neal Norwitz227b5332006-03-22 09:28:35 +00003626static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003627 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08003628 "xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003629 /* methods */
Eli Bendersky52467b12012-06-01 07:13:08 +03003630 (destructor)xmlparser_dealloc, /* tp_dealloc */
3631 0, /* tp_print */
3632 0, /* tp_getattr */
3633 0, /* tp_setattr */
3634 0, /* tp_reserved */
3635 0, /* tp_repr */
3636 0, /* tp_as_number */
3637 0, /* tp_as_sequence */
3638 0, /* tp_as_mapping */
3639 0, /* tp_hash */
3640 0, /* tp_call */
3641 0, /* tp_str */
3642 (getattrofunc)xmlparser_getattro, /* tp_getattro */
3643 0, /* tp_setattro */
3644 0, /* tp_as_buffer */
3645 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3646 /* tp_flags */
3647 0, /* tp_doc */
3648 (traverseproc)xmlparser_gc_traverse, /* tp_traverse */
3649 (inquiry)xmlparser_gc_clear, /* tp_clear */
3650 0, /* tp_richcompare */
3651 0, /* tp_weaklistoffset */
3652 0, /* tp_iter */
3653 0, /* tp_iternext */
3654 xmlparser_methods, /* tp_methods */
3655 0, /* tp_members */
3656 0, /* tp_getset */
3657 0, /* tp_base */
3658 0, /* tp_dict */
3659 0, /* tp_descr_get */
3660 0, /* tp_descr_set */
3661 0, /* tp_dictoffset */
3662 (initproc)xmlparser_init, /* tp_init */
3663 PyType_GenericAlloc, /* tp_alloc */
3664 xmlparser_new, /* tp_new */
3665 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003666};
3667
3668#endif
3669
3670/* ==================================================================== */
3671/* python module interface */
3672
3673static PyMethodDef _functions[] = {
Eli Benderskya8736902013-01-05 06:26:39 -08003674 {"SubElement", (PyCFunction) subelement, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003675 {NULL, NULL}
3676};
3677
Martin v. Löwis1a214512008-06-11 05:26:20 +00003678
3679static struct PyModuleDef _elementtreemodule = {
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003680 PyModuleDef_HEAD_INIT,
3681 "_elementtree",
3682 NULL,
3683 -1,
3684 _functions,
3685 NULL,
3686 NULL,
3687 NULL,
3688 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00003689};
3690
Neal Norwitzf6657e62006-12-28 04:47:50 +00003691PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00003692PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003693{
Eli Bendersky64d11e62012-06-15 07:42:50 +03003694 PyObject *m, *temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003695
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003696 /* Initialize object types */
3697 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003698 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003699 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003700 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003701#if defined(USE_EXPAT)
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003702 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003703 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003704#endif
3705
Martin v. Löwis1a214512008-06-11 05:26:20 +00003706 m = PyModule_Create(&_elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003707 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00003708 return NULL;
3709
Eli Bendersky828efde2012-04-05 05:40:58 +03003710 if (!(temp = PyImport_ImportModule("copy")))
3711 return NULL;
3712 elementtree_deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
3713 Py_XDECREF(temp);
3714
3715 if (!(elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
3716 return NULL;
3717
Eli Bendersky20d41742012-06-01 09:48:37 +03003718 /* link against pyexpat */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003719 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
3720 if (expat_capi) {
3721 /* check that it's usable */
3722 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
3723 expat_capi->size < sizeof(struct PyExpat_CAPI) ||
3724 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
3725 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
Eli Bendersky52467b12012-06-01 07:13:08 +03003726 expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
Eli Benderskyef391ac2012-07-21 20:28:46 +03003727 PyErr_SetString(PyExc_ImportError,
3728 "pyexpat version is incompatible");
3729 return NULL;
Eli Bendersky52467b12012-06-01 07:13:08 +03003730 }
Eli Benderskyef391ac2012-07-21 20:28:46 +03003731 } else {
Eli Bendersky52467b12012-06-01 07:13:08 +03003732 return NULL;
Eli Benderskyef391ac2012-07-21 20:28:46 +03003733 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003734
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003735 elementtree_parseerror_obj = PyErr_NewException(
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003736 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003737 );
3738 Py_INCREF(elementtree_parseerror_obj);
3739 PyModule_AddObject(m, "ParseError", elementtree_parseerror_obj);
3740
Eli Bendersky092af1f2012-03-04 07:14:03 +02003741 Py_INCREF((PyObject *)&Element_Type);
3742 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
3743
Eli Bendersky58d548d2012-05-29 15:45:16 +03003744 Py_INCREF((PyObject *)&TreeBuilder_Type);
3745 PyModule_AddObject(m, "TreeBuilder", (PyObject *)&TreeBuilder_Type);
3746
Eli Bendersky52467b12012-06-01 07:13:08 +03003747#if defined(USE_EXPAT)
3748 Py_INCREF((PyObject *)&XMLParser_Type);
3749 PyModule_AddObject(m, "XMLParser", (PyObject *)&XMLParser_Type);
3750#endif
3751
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003752 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003753}