blob: 4b53037637ea557fb2122dbf9373064c837d29c0 [file] [log] [blame]
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001/*
2 * ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003 * $Id: _elementtree.c 3473 2009-01-11 22:53:55Z fredrik $
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
5 * elementtree accelerator
6 *
7 * History:
8 * 1999-06-20 fl created (as part of sgmlop)
9 * 2001-05-29 fl effdom edition
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000010 * 2003-02-27 fl elementtree edition (alpha)
11 * 2004-06-03 fl updates for elementtree 1.2
Florent Xiclunaf15351d2010-03-13 23:24:31 +000012 * 2005-01-05 fl major optimization effort
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000013 * 2005-01-11 fl first public release (cElementTree 0.8)
14 * 2005-01-12 fl split element object into base and extras
15 * 2005-01-13 fl use tagged pointers for tail/text (cElementTree 0.9)
16 * 2005-01-17 fl added treebuilder close method
17 * 2005-01-17 fl fixed crash in getchildren
18 * 2005-01-18 fl removed observer api, added iterparse (cElementTree 0.9.3)
19 * 2005-01-23 fl revised iterparse api; added namespace event support (0.9.8)
20 * 2005-01-26 fl added VERSION module property (cElementTree 1.0)
21 * 2005-01-28 fl added remove method (1.0.1)
22 * 2005-03-01 fl added iselement function; fixed makeelement aliasing (1.0.2)
23 * 2005-03-13 fl export Comment and ProcessingInstruction/PI helpers
24 * 2005-03-26 fl added Comment and PI support to XMLParser
25 * 2005-03-27 fl event optimizations; complain about bogus events
26 * 2005-08-08 fl fixed read error handling in parse
27 * 2005-08-11 fl added runtime test for copy workaround (1.0.3)
28 * 2005-12-13 fl added expat_capi support (for xml.etree) (1.0.4)
29 * 2005-12-16 fl added support for non-standard encodings
Fredrik Lundh44ed4db2006-03-12 21:06:35 +000030 * 2006-03-08 fl fixed a couple of potential null-refs and leaks
31 * 2006-03-12 fl merge in 2.5 ssize_t changes
Florent Xiclunaf15351d2010-03-13 23:24:31 +000032 * 2007-08-25 fl call custom builder's close method from XMLParser
33 * 2007-08-31 fl added iter, extend from ET 1.3
34 * 2007-09-01 fl fixed ParseError exception, setslice source type, etc
35 * 2007-09-03 fl fixed handling of negative insert indexes
36 * 2007-09-04 fl added itertext from ET 1.3
37 * 2007-09-06 fl added position attribute to ParseError exception
38 * 2008-06-06 fl delay error reporting in iterparse (from Hrvoje Niksic)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000039 *
Florent Xiclunaf15351d2010-03-13 23:24:31 +000040 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
41 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000042 *
43 * info@pythonware.com
44 * http://www.pythonware.com
45 */
46
Fredrik Lundh6d52b552005-12-16 22:06:43 +000047/* Licensed to PSF under a Contributor Agreement. */
Florent Xiclunaf15351d2010-03-13 23:24:31 +000048/* See http://www.python.org/psf/license for licensing details. */
Fredrik Lundh6d52b552005-12-16 22:06:43 +000049
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000050#include "Python.h"
Eli Benderskyebf37a22012-04-03 22:02:37 +030051#include "structmember.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000052
Thomas Wouters00ee7ba2006-08-21 19:07:27 +000053#define VERSION "1.0.6"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000054
55/* -------------------------------------------------------------------- */
56/* configuration */
57
58/* Leave defined to include the expat-based XMLParser type */
59#define USE_EXPAT
60
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000061/* An element can hold this many children without extra memory
62 allocations. */
63#define STATIC_CHILDREN 4
64
65/* For best performance, chose a value so that 80-90% of all nodes
66 have no more than the given number of children. Set this to zero
67 to minimize the size of the element structure itself (this only
68 helps if you have lots of leaf nodes with attributes). */
69
70/* Also note that pymalloc always allocates blocks in multiples of
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010071 eight bytes. For the current C version of ElementTree, this means
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000072 that the number of children should be an even number, at least on
73 32-bit platforms. */
74
75/* -------------------------------------------------------------------- */
76
77#if 0
78static int memory = 0;
79#define ALLOC(size, comment)\
80do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
81#define RELEASE(size, comment)\
82do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
83#else
84#define ALLOC(size, comment)
85#define RELEASE(size, comment)
86#endif
87
88/* compiler tweaks */
89#if defined(_MSC_VER)
90#define LOCAL(type) static __inline type __fastcall
91#else
92#define LOCAL(type) static type
93#endif
94
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000095/* macros used to store 'join' flags in string object pointers. note
96 that all use of text and tail as object pointers must be wrapped in
97 JOIN_OBJ. see comments in the ElementObject definition for more
98 info. */
99#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
100#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
Antoine Pitrouca8aa4a2012-09-20 20:56:47 +0200101#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~(Py_uintptr_t)1))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000102
103/* glue functions (see the init function for details) */
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000104static PyObject* elementtree_parseerror_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000105static PyObject* elementtree_deepcopy_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000106static PyObject* elementpath_obj;
107
108/* helpers */
109
110LOCAL(PyObject*)
111deepcopy(PyObject* object, PyObject* memo)
112{
113 /* do a deep copy of the given object */
114
115 PyObject* args;
116 PyObject* result;
117
118 if (!elementtree_deepcopy_obj) {
119 PyErr_SetString(
120 PyExc_RuntimeError,
121 "deepcopy helper not found"
122 );
123 return NULL;
124 }
125
Antoine Pitrouc1948842012-10-01 23:40:37 +0200126 args = PyTuple_Pack(2, object, memo);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000127 if (!args)
128 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000129 result = PyObject_CallObject(elementtree_deepcopy_obj, args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000130 Py_DECREF(args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000131 return result;
132}
133
134LOCAL(PyObject*)
135list_join(PyObject* list)
136{
137 /* join list elements (destroying the list in the process) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000138 PyObject* joiner;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000139 PyObject* result;
140
Antoine Pitrouc1948842012-10-01 23:40:37 +0200141 joiner = PyUnicode_FromStringAndSize("", 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000142 if (!joiner)
143 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200144 result = PyUnicode_Join(joiner, list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000145 Py_DECREF(joiner);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200146 if (result)
147 Py_DECREF(list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000148 return result;
149}
150
Eli Bendersky48d358b2012-05-30 17:57:50 +0300151/* Is the given object an empty dictionary?
152*/
153static int
154is_empty_dict(PyObject *obj)
155{
156 return PyDict_CheckExact(obj) && PyDict_Size(obj) == 0;
157}
158
159
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000160/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200161/* the Element type */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000162
163typedef struct {
164
165 /* attributes (a dictionary object), or None if no attributes */
166 PyObject* attrib;
167
168 /* child elements */
169 int length; /* actual number of items */
170 int allocated; /* allocated items */
171
172 /* this either points to _children or to a malloced buffer */
173 PyObject* *children;
174
175 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100176
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000177} ElementObjectExtra;
178
179typedef struct {
180 PyObject_HEAD
181
182 /* element tag (a string). */
183 PyObject* tag;
184
185 /* text before first child. note that this is a tagged pointer;
186 use JOIN_OBJ to get the object pointer. the join flag is used
187 to distinguish lists created by the tree builder from lists
188 assigned to the attribute by application code; the former
189 should be joined before being returned to the user, the latter
190 should be left intact. */
191 PyObject* text;
192
193 /* text after this element, in parent. note that this is a tagged
194 pointer; use JOIN_OBJ to get the object pointer. */
195 PyObject* tail;
196
197 ElementObjectExtra* extra;
198
Eli Benderskyebf37a22012-04-03 22:02:37 +0300199 PyObject *weakreflist; /* For tp_weaklistoffset */
200
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000201} ElementObject;
202
Neal Norwitz227b5332006-03-22 09:28:35 +0000203static PyTypeObject Element_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000204
Christian Heimes90aa7642007-12-19 02:45:37 +0000205#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000206
207/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200208/* Element constructors and destructor */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000209
210LOCAL(int)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200211create_extra(ElementObject* self, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000212{
213 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
214 if (!self->extra)
215 return -1;
216
217 if (!attrib)
218 attrib = Py_None;
219
220 Py_INCREF(attrib);
221 self->extra->attrib = attrib;
222
223 self->extra->length = 0;
224 self->extra->allocated = STATIC_CHILDREN;
225 self->extra->children = self->extra->_children;
226
227 return 0;
228}
229
230LOCAL(void)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200231dealloc_extra(ElementObject* self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000232{
Eli Bendersky08b85292012-04-04 15:55:07 +0300233 ElementObjectExtra *myextra;
234 int i;
235
Eli Benderskyebf37a22012-04-03 22:02:37 +0300236 if (!self->extra)
237 return;
238
239 /* Avoid DECREFs calling into this code again (cycles, etc.)
240 */
Eli Bendersky08b85292012-04-04 15:55:07 +0300241 myextra = self->extra;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300242 self->extra = NULL;
243
244 Py_DECREF(myextra->attrib);
245
Eli Benderskyebf37a22012-04-03 22:02:37 +0300246 for (i = 0; i < myextra->length; i++)
247 Py_DECREF(myextra->children[i]);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000248
Eli Benderskyebf37a22012-04-03 22:02:37 +0300249 if (myextra->children != myextra->_children)
250 PyObject_Free(myextra->children);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000251
Eli Benderskyebf37a22012-04-03 22:02:37 +0300252 PyObject_Free(myextra);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000253}
254
Eli Bendersky092af1f2012-03-04 07:14:03 +0200255/* Convenience internal function to create new Element objects with the given
256 * tag and attributes.
257*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000258LOCAL(PyObject*)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200259create_new_element(PyObject* tag, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000260{
261 ElementObject* self;
262
Eli Bendersky0192ba32012-03-30 16:38:33 +0300263 self = PyObject_GC_New(ElementObject, &Element_Type);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000264 if (self == NULL)
265 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000266 self->extra = NULL;
267
Eli Bendersky48d358b2012-05-30 17:57:50 +0300268 if (attrib != Py_None && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200269 if (create_extra(self, attrib) < 0) {
Thomas Wouters477c8d52006-05-27 19:21:47 +0000270 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000271 return NULL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000272 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000273 }
274
275 Py_INCREF(tag);
276 self->tag = tag;
277
278 Py_INCREF(Py_None);
279 self->text = Py_None;
280
281 Py_INCREF(Py_None);
282 self->tail = Py_None;
283
Eli Benderskyebf37a22012-04-03 22:02:37 +0300284 self->weakreflist = NULL;
285
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000286 ALLOC(sizeof(ElementObject), "create element");
Eli Bendersky0192ba32012-03-30 16:38:33 +0300287 PyObject_GC_Track(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000288 return (PyObject*) self;
289}
290
Eli Bendersky092af1f2012-03-04 07:14:03 +0200291static PyObject *
292element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
293{
294 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
295 if (e != NULL) {
296 Py_INCREF(Py_None);
297 e->tag = Py_None;
298
299 Py_INCREF(Py_None);
300 e->text = Py_None;
301
302 Py_INCREF(Py_None);
303 e->tail = Py_None;
304
305 e->extra = NULL;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300306 e->weakreflist = NULL;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200307 }
308 return (PyObject *)e;
309}
310
Eli Bendersky737b1732012-05-29 06:02:56 +0300311/* Helper function for extracting the attrib dictionary from a keywords dict.
312 * This is required by some constructors/functions in this module that can
313 * either accept attrib as a keyword argument or all attributes splashed
314 * directly into *kwds.
315 * If there is no 'attrib' keyword, return an empty dict.
316 */
317static PyObject*
318get_attrib_from_keywords(PyObject *kwds)
319{
320 PyObject *attrib_str = PyUnicode_FromString("attrib");
321 PyObject *attrib = PyDict_GetItem(kwds, attrib_str);
322
323 if (attrib) {
324 /* If attrib was found in kwds, copy its value and remove it from
325 * kwds
326 */
327 if (!PyDict_Check(attrib)) {
328 Py_DECREF(attrib_str);
329 PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
330 Py_TYPE(attrib)->tp_name);
331 return NULL;
332 }
333 attrib = PyDict_Copy(attrib);
334 PyDict_DelItem(kwds, attrib_str);
335 } else {
336 attrib = PyDict_New();
337 }
338
339 Py_DECREF(attrib_str);
340
341 if (attrib)
342 PyDict_Update(attrib, kwds);
343 return attrib;
344}
345
Eli Bendersky092af1f2012-03-04 07:14:03 +0200346static int
347element_init(PyObject *self, PyObject *args, PyObject *kwds)
348{
349 PyObject *tag;
350 PyObject *tmp;
351 PyObject *attrib = NULL;
352 ElementObject *self_elem;
353
354 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
355 return -1;
356
Eli Bendersky737b1732012-05-29 06:02:56 +0300357 if (attrib) {
358 /* attrib passed as positional arg */
359 attrib = PyDict_Copy(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200360 if (!attrib)
361 return -1;
Eli Bendersky737b1732012-05-29 06:02:56 +0300362 if (kwds) {
363 if (PyDict_Update(attrib, kwds) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200364 Py_DECREF(attrib);
Eli Bendersky737b1732012-05-29 06:02:56 +0300365 return -1;
366 }
367 }
368 } else if (kwds) {
369 /* have keywords args */
370 attrib = get_attrib_from_keywords(kwds);
371 if (!attrib)
372 return -1;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200373 }
374
375 self_elem = (ElementObject *)self;
376
Antoine Pitrouc1948842012-10-01 23:40:37 +0200377 if (attrib != NULL && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200378 if (create_extra(self_elem, attrib) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200379 Py_DECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200380 return -1;
381 }
382 }
383
Eli Bendersky48d358b2012-05-30 17:57:50 +0300384 /* We own a reference to attrib here and it's no longer needed. */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200385 Py_XDECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200386
387 /* Replace the objects already pointed to by tag, text and tail. */
388 tmp = self_elem->tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200389 Py_INCREF(tag);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200390 self_elem->tag = tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200391 Py_DECREF(tmp);
392
393 tmp = self_elem->text;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200394 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200395 self_elem->text = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200396 Py_DECREF(JOIN_OBJ(tmp));
397
398 tmp = self_elem->tail;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200399 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200400 self_elem->tail = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200401 Py_DECREF(JOIN_OBJ(tmp));
402
403 return 0;
404}
405
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000406LOCAL(int)
407element_resize(ElementObject* self, int extra)
408{
409 int size;
410 PyObject* *children;
411
412 /* make sure self->children can hold the given number of extra
413 elements. set an exception and return -1 if allocation failed */
414
415 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200416 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000417
418 size = self->extra->length + extra;
419
420 if (size > self->extra->allocated) {
421 /* use Python 2.4's list growth strategy */
422 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000423 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100424 * which needs at least 4 bytes.
425 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000426 * be safe.
427 */
428 size = size ? size : 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000429 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000430 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100431 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000432 * false alarm always assume at least one child to be safe.
433 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000434 children = PyObject_Realloc(self->extra->children,
435 size * sizeof(PyObject*));
436 if (!children)
437 goto nomemory;
438 } else {
439 children = PyObject_Malloc(size * sizeof(PyObject*));
440 if (!children)
441 goto nomemory;
442 /* copy existing children from static area to malloc buffer */
443 memcpy(children, self->extra->children,
444 self->extra->length * sizeof(PyObject*));
445 }
446 self->extra->children = children;
447 self->extra->allocated = size;
448 }
449
450 return 0;
451
452 nomemory:
453 PyErr_NoMemory();
454 return -1;
455}
456
457LOCAL(int)
458element_add_subelement(ElementObject* self, PyObject* element)
459{
460 /* add a child element to a parent */
461
462 if (element_resize(self, 1) < 0)
463 return -1;
464
465 Py_INCREF(element);
466 self->extra->children[self->extra->length] = element;
467
468 self->extra->length++;
469
470 return 0;
471}
472
473LOCAL(PyObject*)
474element_get_attrib(ElementObject* self)
475{
476 /* return borrowed reference to attrib dictionary */
477 /* note: this function assumes that the extra section exists */
478
479 PyObject* res = self->extra->attrib;
480
481 if (res == Py_None) {
482 /* create missing dictionary */
483 res = PyDict_New();
484 if (!res)
485 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200486 Py_DECREF(Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000487 self->extra->attrib = res;
488 }
489
490 return res;
491}
492
493LOCAL(PyObject*)
494element_get_text(ElementObject* self)
495{
496 /* return borrowed reference to text attribute */
497
498 PyObject* res = self->text;
499
500 if (JOIN_GET(res)) {
501 res = JOIN_OBJ(res);
502 if (PyList_CheckExact(res)) {
503 res = list_join(res);
504 if (!res)
505 return NULL;
506 self->text = res;
507 }
508 }
509
510 return res;
511}
512
513LOCAL(PyObject*)
514element_get_tail(ElementObject* self)
515{
516 /* return borrowed reference to text attribute */
517
518 PyObject* res = self->tail;
519
520 if (JOIN_GET(res)) {
521 res = JOIN_OBJ(res);
522 if (PyList_CheckExact(res)) {
523 res = list_join(res);
524 if (!res)
525 return NULL;
526 self->tail = res;
527 }
528 }
529
530 return res;
531}
532
533static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300534subelement(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000535{
536 PyObject* elem;
537
538 ElementObject* parent;
539 PyObject* tag;
540 PyObject* attrib = NULL;
541 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
542 &Element_Type, &parent, &tag,
543 &PyDict_Type, &attrib))
544 return NULL;
545
Eli Bendersky737b1732012-05-29 06:02:56 +0300546 if (attrib) {
547 /* attrib passed as positional arg */
548 attrib = PyDict_Copy(attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000549 if (!attrib)
550 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300551 if (kwds) {
552 if (PyDict_Update(attrib, kwds) < 0) {
553 return NULL;
554 }
555 }
556 } else if (kwds) {
557 /* have keyword args */
558 attrib = get_attrib_from_keywords(kwds);
559 if (!attrib)
560 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000561 } else {
Eli Bendersky737b1732012-05-29 06:02:56 +0300562 /* no attrib arg, no kwds, so no attribute */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000563 Py_INCREF(Py_None);
564 attrib = Py_None;
565 }
566
Eli Bendersky092af1f2012-03-04 07:14:03 +0200567 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000568
569 Py_DECREF(attrib);
570
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000571 if (element_add_subelement(parent, elem) < 0) {
572 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000573 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000574 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000575
576 return elem;
577}
578
Eli Bendersky0192ba32012-03-30 16:38:33 +0300579static int
580element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
581{
582 Py_VISIT(self->tag);
583 Py_VISIT(JOIN_OBJ(self->text));
584 Py_VISIT(JOIN_OBJ(self->tail));
585
586 if (self->extra) {
587 int i;
588 Py_VISIT(self->extra->attrib);
589
590 for (i = 0; i < self->extra->length; ++i)
591 Py_VISIT(self->extra->children[i]);
592 }
593 return 0;
594}
595
596static int
597element_gc_clear(ElementObject *self)
598{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300599 Py_CLEAR(self->tag);
Eli Benderskyebf37a22012-04-03 22:02:37 +0300600
601 /* The following is like Py_CLEAR for self->text and self->tail, but
602 * written explicitily because the real pointers hide behind access
603 * macros.
604 */
605 if (self->text) {
606 PyObject *tmp = JOIN_OBJ(self->text);
607 self->text = NULL;
608 Py_DECREF(tmp);
609 }
610
611 if (self->tail) {
612 PyObject *tmp = JOIN_OBJ(self->tail);
613 self->tail = NULL;
614 Py_DECREF(tmp);
615 }
Eli Bendersky0192ba32012-03-30 16:38:33 +0300616
617 /* After dropping all references from extra, it's no longer valid anyway,
Eli Benderskyebf37a22012-04-03 22:02:37 +0300618 * so fully deallocate it.
Eli Bendersky0192ba32012-03-30 16:38:33 +0300619 */
Eli Benderskyebf37a22012-04-03 22:02:37 +0300620 dealloc_extra(self);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300621 return 0;
622}
623
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000624static void
625element_dealloc(ElementObject* self)
626{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300627 PyObject_GC_UnTrack(self);
Eli Benderskyebf37a22012-04-03 22:02:37 +0300628
629 if (self->weakreflist != NULL)
630 PyObject_ClearWeakRefs((PyObject *) self);
631
Eli Bendersky0192ba32012-03-30 16:38:33 +0300632 /* element_gc_clear clears all references and deallocates extra
633 */
634 element_gc_clear(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000635
636 RELEASE(sizeof(ElementObject), "destroy element");
Eli Bendersky092af1f2012-03-04 07:14:03 +0200637 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000638}
639
640/* -------------------------------------------------------------------- */
641/* methods (in alphabetical order) */
642
643static PyObject*
644element_append(ElementObject* self, PyObject* args)
645{
646 PyObject* element;
647 if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element))
648 return NULL;
649
650 if (element_add_subelement(self, element) < 0)
651 return NULL;
652
653 Py_RETURN_NONE;
654}
655
656static PyObject*
Eli Bendersky0192ba32012-03-30 16:38:33 +0300657element_clearmethod(ElementObject* self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000658{
659 if (!PyArg_ParseTuple(args, ":clear"))
660 return NULL;
661
Eli Benderskyebf37a22012-04-03 22:02:37 +0300662 dealloc_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000663
664 Py_INCREF(Py_None);
665 Py_DECREF(JOIN_OBJ(self->text));
666 self->text = Py_None;
667
668 Py_INCREF(Py_None);
669 Py_DECREF(JOIN_OBJ(self->tail));
670 self->tail = Py_None;
671
672 Py_RETURN_NONE;
673}
674
675static PyObject*
676element_copy(ElementObject* self, PyObject* args)
677{
678 int i;
679 ElementObject* element;
680
681 if (!PyArg_ParseTuple(args, ":__copy__"))
682 return NULL;
683
Eli Bendersky092af1f2012-03-04 07:14:03 +0200684 element = (ElementObject*) create_new_element(
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000685 self->tag, (self->extra) ? self->extra->attrib : Py_None
686 );
687 if (!element)
688 return NULL;
689
690 Py_DECREF(JOIN_OBJ(element->text));
691 element->text = self->text;
692 Py_INCREF(JOIN_OBJ(element->text));
693
694 Py_DECREF(JOIN_OBJ(element->tail));
695 element->tail = self->tail;
696 Py_INCREF(JOIN_OBJ(element->tail));
697
698 if (self->extra) {
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100699
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000700 if (element_resize(element, self->extra->length) < 0) {
701 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000702 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000703 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000704
705 for (i = 0; i < self->extra->length; i++) {
706 Py_INCREF(self->extra->children[i]);
707 element->extra->children[i] = self->extra->children[i];
708 }
709
710 element->extra->length = self->extra->length;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100711
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000712 }
713
714 return (PyObject*) element;
715}
716
717static PyObject*
718element_deepcopy(ElementObject* self, PyObject* args)
719{
720 int i;
721 ElementObject* element;
722 PyObject* tag;
723 PyObject* attrib;
724 PyObject* text;
725 PyObject* tail;
726 PyObject* id;
727
728 PyObject* memo;
729 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
730 return NULL;
731
732 tag = deepcopy(self->tag, memo);
733 if (!tag)
734 return NULL;
735
736 if (self->extra) {
737 attrib = deepcopy(self->extra->attrib, memo);
738 if (!attrib) {
739 Py_DECREF(tag);
740 return NULL;
741 }
742 } else {
743 Py_INCREF(Py_None);
744 attrib = Py_None;
745 }
746
Eli Bendersky092af1f2012-03-04 07:14:03 +0200747 element = (ElementObject*) create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000748
749 Py_DECREF(tag);
750 Py_DECREF(attrib);
751
752 if (!element)
753 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100754
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000755 text = deepcopy(JOIN_OBJ(self->text), memo);
756 if (!text)
757 goto error;
758 Py_DECREF(element->text);
759 element->text = JOIN_SET(text, JOIN_GET(self->text));
760
761 tail = deepcopy(JOIN_OBJ(self->tail), memo);
762 if (!tail)
763 goto error;
764 Py_DECREF(element->tail);
765 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
766
767 if (self->extra) {
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100768
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000769 if (element_resize(element, self->extra->length) < 0)
770 goto error;
771
772 for (i = 0; i < self->extra->length; i++) {
773 PyObject* child = deepcopy(self->extra->children[i], memo);
774 if (!child) {
775 element->extra->length = i;
776 goto error;
777 }
778 element->extra->children[i] = child;
779 }
780
781 element->extra->length = self->extra->length;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100782
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000783 }
784
785 /* add object to memo dictionary (so deepcopy won't visit it again) */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200786 id = PyLong_FromSsize_t((Py_uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000787 if (!id)
788 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000789
790 i = PyDict_SetItem(memo, id, (PyObject*) element);
791
792 Py_DECREF(id);
793
794 if (i < 0)
795 goto error;
796
797 return (PyObject*) element;
798
799 error:
800 Py_DECREF(element);
801 return NULL;
802}
803
Martin v. Löwisbce16662012-06-17 10:41:22 +0200804static PyObject*
805element_sizeof(PyObject* _self, PyObject* args)
806{
807 ElementObject *self = (ElementObject*)_self;
808 Py_ssize_t result = sizeof(ElementObject);
809 if (self->extra) {
810 result += sizeof(ElementObjectExtra);
811 if (self->extra->children != self->extra->_children)
812 result += sizeof(PyObject*) * self->extra->allocated;
813 }
814 return PyLong_FromSsize_t(result);
815}
816
Eli Bendersky698bdb22013-01-10 06:01:06 -0800817/* dict keys for getstate/setstate. */
818#define PICKLED_TAG "tag"
819#define PICKLED_CHILDREN "_children"
820#define PICKLED_ATTRIB "attrib"
821#define PICKLED_TAIL "tail"
822#define PICKLED_TEXT "text"
823
824/* __getstate__ returns a fabricated instance dict as in the pure-Python
825 * Element implementation, for interoperability/interchangeability. This
826 * makes the pure-Python implementation details an API, but (a) there aren't
827 * any unnecessary structures there; and (b) it buys compatibility with 3.2
828 * pickles. See issue #16076.
829 */
830static PyObject *
831element_getstate(ElementObject *self)
832{
833 int i, noattrib;
834 PyObject *instancedict = NULL, *children;
835
836 /* Build a list of children. */
837 children = PyList_New(self->extra ? self->extra->length : 0);
838 if (!children)
839 return NULL;
840 for (i = 0; i < PyList_GET_SIZE(children); i++) {
841 PyObject *child = self->extra->children[i];
842 Py_INCREF(child);
843 PyList_SET_ITEM(children, i, child);
844 }
845
846 /* Construct the state object. */
847 noattrib = (self->extra == NULL || self->extra->attrib == Py_None);
848 if (noattrib)
849 instancedict = Py_BuildValue("{sOsOs{}sOsO}",
850 PICKLED_TAG, self->tag,
851 PICKLED_CHILDREN, children,
852 PICKLED_ATTRIB,
853 PICKLED_TEXT, self->text,
854 PICKLED_TAIL, self->tail);
855 else
856 instancedict = Py_BuildValue("{sOsOsOsOsO}",
857 PICKLED_TAG, self->tag,
858 PICKLED_CHILDREN, children,
859 PICKLED_ATTRIB, self->extra->attrib,
860 PICKLED_TEXT, self->text,
861 PICKLED_TAIL, self->tail);
862 if (instancedict)
863 return instancedict;
864 else {
865 for (i = 0; i < PyList_GET_SIZE(children); i++)
866 Py_DECREF(PyList_GET_ITEM(children, i));
867 Py_DECREF(children);
868
869 return NULL;
870 }
871}
872
873static PyObject *
874element_setstate_from_attributes(ElementObject *self,
875 PyObject *tag,
876 PyObject *attrib,
877 PyObject *text,
878 PyObject *tail,
879 PyObject *children)
880{
881 Py_ssize_t i, nchildren;
882
883 if (!tag) {
884 PyErr_SetString(PyExc_TypeError, "tag may not be NULL");
885 return NULL;
886 }
887 if (!text) {
888 Py_INCREF(Py_None);
889 text = Py_None;
890 }
891 if (!tail) {
892 Py_INCREF(Py_None);
893 tail = Py_None;
894 }
895
896 Py_CLEAR(self->tag);
897 self->tag = tag;
898 Py_INCREF(self->tag);
899
900 Py_CLEAR(self->text);
901 self->text = text;
902 Py_INCREF(self->text);
903
904 Py_CLEAR(self->tail);
905 self->tail = tail;
906 Py_INCREF(self->tail);
907
908 /* Handle ATTRIB and CHILDREN. */
909 if (!children && !attrib)
910 Py_RETURN_NONE;
911
912 /* Compute 'nchildren'. */
913 if (children) {
914 if (!PyList_Check(children)) {
915 PyErr_SetString(PyExc_TypeError, "'_children' is not a list");
916 return NULL;
917 }
918 nchildren = PyList_Size(children);
919 }
920 else {
921 nchildren = 0;
922 }
923
924 /* Allocate 'extra'. */
925 if (element_resize(self, nchildren)) {
926 return NULL;
927 }
928 assert(self->extra && self->extra->allocated >= nchildren);
929
930 /* Copy children */
931 for (i = 0; i < nchildren; i++) {
932 self->extra->children[i] = PyList_GET_ITEM(children, i);
933 Py_INCREF(self->extra->children[i]);
934 }
935
936 self->extra->length = nchildren;
937 self->extra->allocated = nchildren;
938
939 /* Stash attrib. */
940 if (attrib) {
941 Py_CLEAR(self->extra->attrib);
942 self->extra->attrib = attrib;
943 Py_INCREF(attrib);
944 }
945
946 Py_RETURN_NONE;
947}
948
949/* __setstate__ for Element instance from the Python implementation.
950 * 'state' should be the instance dict.
951 */
952static PyObject *
953element_setstate_from_Python(ElementObject *self, PyObject *state)
954{
955 static char *kwlist[] = {PICKLED_TAG, PICKLED_ATTRIB, PICKLED_TEXT,
956 PICKLED_TAIL, PICKLED_CHILDREN, 0};
957 PyObject *args;
958 PyObject *tag, *attrib, *text, *tail, *children;
959 int error;
960
961 /* More instance dict members than we know to handle? */
962 tag = attrib = text = tail = children = NULL;
963 args = PyTuple_New(0);
964 error = ! PyArg_ParseTupleAndKeywords(args, state, "|$OOOOO", kwlist, &tag,
965 &attrib, &text, &tail, &children);
966 Py_DECREF(args);
967 if (error)
968 return NULL;
969 else
970 return element_setstate_from_attributes(self, tag, attrib, text,
971 tail, children);
972}
973
974static PyObject *
975element_setstate(ElementObject *self, PyObject *state)
976{
977 if (!PyDict_CheckExact(state)) {
978 PyErr_Format(PyExc_TypeError,
979 "Don't know how to unpickle \"%.200R\" as an Element",
980 state);
981 return NULL;
982 }
983 else
984 return element_setstate_from_Python(self, state);
985}
986
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000987LOCAL(int)
988checkpath(PyObject* tag)
989{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000990 Py_ssize_t i;
991 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000992
993 /* check if a tag contains an xpath character */
994
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000995#define PATHCHAR(ch) \
996 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000997
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000998 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200999 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
1000 void *data = PyUnicode_DATA(tag);
1001 unsigned int kind = PyUnicode_KIND(tag);
1002 for (i = 0; i < len; i++) {
1003 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1004 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001005 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001006 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001007 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001008 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001009 return 1;
1010 }
1011 return 0;
1012 }
Christian Heimes72b710a2008-05-26 13:28:38 +00001013 if (PyBytes_Check(tag)) {
1014 char *p = PyBytes_AS_STRING(tag);
1015 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001016 if (p[i] == '{')
1017 check = 0;
1018 else if (p[i] == '}')
1019 check = 1;
1020 else if (check && PATHCHAR(p[i]))
1021 return 1;
1022 }
1023 return 0;
1024 }
1025
1026 return 1; /* unknown type; might be path expression */
1027}
1028
1029static PyObject*
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001030element_extend(ElementObject* self, PyObject* args)
1031{
1032 PyObject* seq;
1033 Py_ssize_t i, seqlen = 0;
1034
1035 PyObject* seq_in;
1036 if (!PyArg_ParseTuple(args, "O:extend", &seq_in))
1037 return NULL;
1038
1039 seq = PySequence_Fast(seq_in, "");
1040 if (!seq) {
1041 PyErr_Format(
1042 PyExc_TypeError,
1043 "expected sequence, not \"%.200s\"", Py_TYPE(seq_in)->tp_name
1044 );
1045 return NULL;
1046 }
1047
1048 seqlen = PySequence_Size(seq);
1049 for (i = 0; i < seqlen; i++) {
1050 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001051 if (!PyObject_IsInstance(element, (PyObject *)&Element_Type)) {
1052 Py_DECREF(seq);
1053 PyErr_Format(
1054 PyExc_TypeError,
1055 "expected an Element, not \"%.200s\"",
1056 Py_TYPE(element)->tp_name);
1057 return NULL;
1058 }
1059
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001060 if (element_add_subelement(self, element) < 0) {
1061 Py_DECREF(seq);
1062 return NULL;
1063 }
1064 }
1065
1066 Py_DECREF(seq);
1067
1068 Py_RETURN_NONE;
1069}
1070
1071static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001072element_find(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001073{
1074 int i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001075 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001076 PyObject* namespaces = Py_None;
Eli Bendersky737b1732012-05-29 06:02:56 +03001077 static char *kwlist[] = {"path", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001078
Eli Bendersky737b1732012-05-29 06:02:56 +03001079 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:find", kwlist,
1080 &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001081 return NULL;
1082
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001083 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001084 _Py_IDENTIFIER(find);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001085 return _PyObject_CallMethodId(
1086 elementpath_obj, &PyId_find, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001087 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001088 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001089
1090 if (!self->extra)
1091 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001092
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001093 for (i = 0; i < self->extra->length; i++) {
1094 PyObject* item = self->extra->children[i];
1095 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +00001096 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001097 Py_INCREF(item);
1098 return item;
1099 }
1100 }
1101
1102 Py_RETURN_NONE;
1103}
1104
1105static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001106element_findtext(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001107{
1108 int i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001109 PyObject* tag;
1110 PyObject* default_value = Py_None;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001111 PyObject* namespaces = Py_None;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001112 _Py_IDENTIFIER(findtext);
Eli Bendersky737b1732012-05-29 06:02:56 +03001113 static char *kwlist[] = {"path", "default", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001114
Eli Bendersky737b1732012-05-29 06:02:56 +03001115 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO:findtext", kwlist,
1116 &tag, &default_value, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001117 return NULL;
1118
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001119 if (checkpath(tag) || namespaces != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001120 return _PyObject_CallMethodId(
1121 elementpath_obj, &PyId_findtext, "OOOO", self, tag, default_value, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001122 );
1123
1124 if (!self->extra) {
1125 Py_INCREF(default_value);
1126 return default_value;
1127 }
1128
1129 for (i = 0; i < self->extra->length; i++) {
1130 ElementObject* item = (ElementObject*) self->extra->children[i];
Mark Dickinson211c6252009-02-01 10:28:51 +00001131 if (Element_CheckExact(item) && (PyObject_RichCompareBool(item->tag, tag, Py_EQ) == 1)) {
1132
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001133 PyObject* text = element_get_text(item);
1134 if (text == Py_None)
Christian Heimes72b710a2008-05-26 13:28:38 +00001135 return PyBytes_FromString("");
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001136 Py_XINCREF(text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001137 return text;
1138 }
1139 }
1140
1141 Py_INCREF(default_value);
1142 return default_value;
1143}
1144
1145static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001146element_findall(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001147{
1148 int i;
1149 PyObject* out;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001150 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001151 PyObject* namespaces = Py_None;
Eli Bendersky737b1732012-05-29 06:02:56 +03001152 static char *kwlist[] = {"path", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001153
Eli Bendersky737b1732012-05-29 06:02:56 +03001154 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:findall", kwlist,
1155 &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001156 return NULL;
1157
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001158 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001159 _Py_IDENTIFIER(findall);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001160 return _PyObject_CallMethodId(
1161 elementpath_obj, &PyId_findall, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001162 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001163 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001164
1165 out = PyList_New(0);
1166 if (!out)
1167 return NULL;
1168
1169 if (!self->extra)
1170 return out;
1171
1172 for (i = 0; i < self->extra->length; i++) {
1173 PyObject* item = self->extra->children[i];
1174 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +00001175 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001176 if (PyList_Append(out, item) < 0) {
1177 Py_DECREF(out);
1178 return NULL;
1179 }
1180 }
1181 }
1182
1183 return out;
1184}
1185
1186static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001187element_iterfind(ElementObject *self, PyObject *args, PyObject *kwds)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001188{
1189 PyObject* tag;
1190 PyObject* namespaces = Py_None;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001191 _Py_IDENTIFIER(iterfind);
Eli Bendersky737b1732012-05-29 06:02:56 +03001192 static char *kwlist[] = {"path", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001193
Eli Bendersky737b1732012-05-29 06:02:56 +03001194 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:iterfind", kwlist,
1195 &tag, &namespaces))
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001196 return NULL;
1197
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001198 return _PyObject_CallMethodId(
1199 elementpath_obj, &PyId_iterfind, "OOO", self, tag, namespaces
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001200 );
1201}
1202
1203static PyObject*
Eli Benderskya8736902013-01-05 06:26:39 -08001204element_get(ElementObject* self, PyObject* args, PyObject* kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001205{
1206 PyObject* value;
Eli Benderskya8736902013-01-05 06:26:39 -08001207 static char* kwlist[] = {"key", "default", 0};
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001208
1209 PyObject* key;
1210 PyObject* default_value = Py_None;
Eli Benderskya8736902013-01-05 06:26:39 -08001211
1212 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:get", kwlist, &key,
1213 &default_value))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001214 return NULL;
1215
1216 if (!self->extra || self->extra->attrib == Py_None)
1217 value = default_value;
1218 else {
1219 value = PyDict_GetItem(self->extra->attrib, key);
1220 if (!value)
1221 value = default_value;
1222 }
1223
1224 Py_INCREF(value);
1225 return value;
1226}
1227
1228static PyObject*
1229element_getchildren(ElementObject* self, PyObject* args)
1230{
1231 int i;
1232 PyObject* list;
1233
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001234 /* FIXME: report as deprecated? */
1235
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001236 if (!PyArg_ParseTuple(args, ":getchildren"))
1237 return NULL;
1238
1239 if (!self->extra)
1240 return PyList_New(0);
1241
1242 list = PyList_New(self->extra->length);
1243 if (!list)
1244 return NULL;
1245
1246 for (i = 0; i < self->extra->length; i++) {
1247 PyObject* item = self->extra->children[i];
1248 Py_INCREF(item);
1249 PyList_SET_ITEM(list, i, item);
1250 }
1251
1252 return list;
1253}
1254
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001255
Eli Bendersky64d11e62012-06-15 07:42:50 +03001256static PyObject *
1257create_elementiter(ElementObject *self, PyObject *tag, int gettext);
1258
1259
1260static PyObject *
Eli Benderskya8736902013-01-05 06:26:39 -08001261element_iter(ElementObject *self, PyObject *args, PyObject *kwds)
Eli Bendersky64d11e62012-06-15 07:42:50 +03001262{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001263 PyObject* tag = Py_None;
Eli Benderskya8736902013-01-05 06:26:39 -08001264 static char* kwlist[] = {"tag", 0};
1265
1266 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:iter", kwlist, &tag))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001267 return NULL;
1268
Eli Bendersky64d11e62012-06-15 07:42:50 +03001269 return create_elementiter(self, tag, 0);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001270}
1271
1272
1273static PyObject*
1274element_itertext(ElementObject* self, PyObject* args)
1275{
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001276 if (!PyArg_ParseTuple(args, ":itertext"))
1277 return NULL;
1278
Eli Bendersky64d11e62012-06-15 07:42:50 +03001279 return create_elementiter(self, Py_None, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001280}
1281
Eli Bendersky64d11e62012-06-15 07:42:50 +03001282
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001283static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001284element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001285{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001286 ElementObject* self = (ElementObject*) self_;
1287
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001288 if (!self->extra || index < 0 || index >= self->extra->length) {
1289 PyErr_SetString(
1290 PyExc_IndexError,
1291 "child index out of range"
1292 );
1293 return NULL;
1294 }
1295
1296 Py_INCREF(self->extra->children[index]);
1297 return self->extra->children[index];
1298}
1299
1300static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001301element_insert(ElementObject* self, PyObject* args)
1302{
1303 int i;
1304
1305 int index;
1306 PyObject* element;
1307 if (!PyArg_ParseTuple(args, "iO!:insert", &index,
1308 &Element_Type, &element))
1309 return NULL;
1310
1311 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001312 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001313
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001314 if (index < 0) {
1315 index += self->extra->length;
1316 if (index < 0)
1317 index = 0;
1318 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001319 if (index > self->extra->length)
1320 index = self->extra->length;
1321
1322 if (element_resize(self, 1) < 0)
1323 return NULL;
1324
1325 for (i = self->extra->length; i > index; i--)
1326 self->extra->children[i] = self->extra->children[i-1];
1327
1328 Py_INCREF(element);
1329 self->extra->children[index] = element;
1330
1331 self->extra->length++;
1332
1333 Py_RETURN_NONE;
1334}
1335
1336static PyObject*
1337element_items(ElementObject* self, PyObject* args)
1338{
1339 if (!PyArg_ParseTuple(args, ":items"))
1340 return NULL;
1341
1342 if (!self->extra || self->extra->attrib == Py_None)
1343 return PyList_New(0);
1344
1345 return PyDict_Items(self->extra->attrib);
1346}
1347
1348static PyObject*
1349element_keys(ElementObject* self, PyObject* args)
1350{
1351 if (!PyArg_ParseTuple(args, ":keys"))
1352 return NULL;
1353
1354 if (!self->extra || self->extra->attrib == Py_None)
1355 return PyList_New(0);
1356
1357 return PyDict_Keys(self->extra->attrib);
1358}
1359
Martin v. Löwis18e16552006-02-15 17:27:45 +00001360static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001361element_length(ElementObject* self)
1362{
1363 if (!self->extra)
1364 return 0;
1365
1366 return self->extra->length;
1367}
1368
1369static PyObject*
1370element_makeelement(PyObject* self, PyObject* args, PyObject* kw)
1371{
1372 PyObject* elem;
1373
1374 PyObject* tag;
1375 PyObject* attrib;
1376 if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib))
1377 return NULL;
1378
1379 attrib = PyDict_Copy(attrib);
1380 if (!attrib)
1381 return NULL;
1382
Eli Bendersky092af1f2012-03-04 07:14:03 +02001383 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001384
1385 Py_DECREF(attrib);
1386
1387 return elem;
1388}
1389
1390static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001391element_remove(ElementObject* self, PyObject* args)
1392{
1393 int i;
1394
1395 PyObject* element;
1396 if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element))
1397 return NULL;
1398
1399 if (!self->extra) {
1400 /* element has no children, so raise exception */
1401 PyErr_SetString(
1402 PyExc_ValueError,
1403 "list.remove(x): x not in list"
1404 );
1405 return NULL;
1406 }
1407
1408 for (i = 0; i < self->extra->length; i++) {
1409 if (self->extra->children[i] == element)
1410 break;
Mark Dickinson211c6252009-02-01 10:28:51 +00001411 if (PyObject_RichCompareBool(self->extra->children[i], element, Py_EQ) == 1)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001412 break;
1413 }
1414
1415 if (i == self->extra->length) {
1416 /* element is not in children, so raise exception */
1417 PyErr_SetString(
1418 PyExc_ValueError,
1419 "list.remove(x): x not in list"
1420 );
1421 return NULL;
1422 }
1423
1424 Py_DECREF(self->extra->children[i]);
1425
1426 self->extra->length--;
1427
1428 for (; i < self->extra->length; i++)
1429 self->extra->children[i] = self->extra->children[i+1];
1430
1431 Py_RETURN_NONE;
1432}
1433
1434static PyObject*
1435element_repr(ElementObject* self)
1436{
Eli Bendersky092af1f2012-03-04 07:14:03 +02001437 if (self->tag)
1438 return PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1439 else
1440 return PyUnicode_FromFormat("<Element at %p>", self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001441}
1442
1443static PyObject*
1444element_set(ElementObject* self, PyObject* args)
1445{
1446 PyObject* attrib;
1447
1448 PyObject* key;
1449 PyObject* value;
1450 if (!PyArg_ParseTuple(args, "OO:set", &key, &value))
1451 return NULL;
1452
1453 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001454 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001455
1456 attrib = element_get_attrib(self);
1457 if (!attrib)
1458 return NULL;
1459
1460 if (PyDict_SetItem(attrib, key, value) < 0)
1461 return NULL;
1462
1463 Py_RETURN_NONE;
1464}
1465
1466static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001467element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001468{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001469 ElementObject* self = (ElementObject*) self_;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001470 int i;
1471 PyObject* old;
1472
1473 if (!self->extra || index < 0 || index >= self->extra->length) {
1474 PyErr_SetString(
1475 PyExc_IndexError,
1476 "child assignment index out of range");
1477 return -1;
1478 }
1479
1480 old = self->extra->children[index];
1481
1482 if (item) {
1483 Py_INCREF(item);
1484 self->extra->children[index] = item;
1485 } else {
1486 self->extra->length--;
1487 for (i = index; i < self->extra->length; i++)
1488 self->extra->children[i] = self->extra->children[i+1];
1489 }
1490
1491 Py_DECREF(old);
1492
1493 return 0;
1494}
1495
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001496static PyObject*
1497element_subscr(PyObject* self_, PyObject* item)
1498{
1499 ElementObject* self = (ElementObject*) self_;
1500
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001501 if (PyIndex_Check(item)) {
1502 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001503
1504 if (i == -1 && PyErr_Occurred()) {
1505 return NULL;
1506 }
1507 if (i < 0 && self->extra)
1508 i += self->extra->length;
1509 return element_getitem(self_, i);
1510 }
1511 else if (PySlice_Check(item)) {
1512 Py_ssize_t start, stop, step, slicelen, cur, i;
1513 PyObject* list;
1514
1515 if (!self->extra)
1516 return PyList_New(0);
1517
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001518 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001519 self->extra->length,
1520 &start, &stop, &step, &slicelen) < 0) {
1521 return NULL;
1522 }
1523
1524 if (slicelen <= 0)
1525 return PyList_New(0);
1526 else {
1527 list = PyList_New(slicelen);
1528 if (!list)
1529 return NULL;
1530
1531 for (cur = start, i = 0; i < slicelen;
1532 cur += step, i++) {
1533 PyObject* item = self->extra->children[cur];
1534 Py_INCREF(item);
1535 PyList_SET_ITEM(list, i, item);
1536 }
1537
1538 return list;
1539 }
1540 }
1541 else {
1542 PyErr_SetString(PyExc_TypeError,
1543 "element indices must be integers");
1544 return NULL;
1545 }
1546}
1547
1548static int
1549element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1550{
1551 ElementObject* self = (ElementObject*) self_;
1552
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001553 if (PyIndex_Check(item)) {
1554 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001555
1556 if (i == -1 && PyErr_Occurred()) {
1557 return -1;
1558 }
1559 if (i < 0 && self->extra)
1560 i += self->extra->length;
1561 return element_setitem(self_, i, value);
1562 }
1563 else if (PySlice_Check(item)) {
1564 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1565
1566 PyObject* recycle = NULL;
1567 PyObject* seq = NULL;
1568
1569 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001570 create_extra(self, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001571
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001572 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001573 self->extra->length,
1574 &start, &stop, &step, &slicelen) < 0) {
1575 return -1;
1576 }
1577
Eli Bendersky865756a2012-03-09 13:38:15 +02001578 if (value == NULL) {
1579 /* Delete slice */
1580 size_t cur;
1581 Py_ssize_t i;
1582
1583 if (slicelen <= 0)
1584 return 0;
1585
1586 /* Since we're deleting, the direction of the range doesn't matter,
1587 * so for simplicity make it always ascending.
1588 */
1589 if (step < 0) {
1590 stop = start + 1;
1591 start = stop + step * (slicelen - 1) - 1;
1592 step = -step;
1593 }
1594
1595 assert((size_t)slicelen <= PY_SIZE_MAX / sizeof(PyObject *));
1596
1597 /* recycle is a list that will contain all the children
1598 * scheduled for removal.
1599 */
1600 if (!(recycle = PyList_New(slicelen))) {
1601 PyErr_NoMemory();
1602 return -1;
1603 }
1604
1605 /* This loop walks over all the children that have to be deleted,
1606 * with cur pointing at them. num_moved is the amount of children
1607 * until the next deleted child that have to be "shifted down" to
1608 * occupy the deleted's places.
1609 * Note that in the ith iteration, shifting is done i+i places down
1610 * because i children were already removed.
1611 */
1612 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1613 /* Compute how many children have to be moved, clipping at the
1614 * list end.
1615 */
1616 Py_ssize_t num_moved = step - 1;
1617 if (cur + step >= (size_t)self->extra->length) {
1618 num_moved = self->extra->length - cur - 1;
1619 }
1620
1621 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1622
1623 memmove(
1624 self->extra->children + cur - i,
1625 self->extra->children + cur + 1,
1626 num_moved * sizeof(PyObject *));
1627 }
1628
1629 /* Leftover "tail" after the last removed child */
1630 cur = start + (size_t)slicelen * step;
1631 if (cur < (size_t)self->extra->length) {
1632 memmove(
1633 self->extra->children + cur - slicelen,
1634 self->extra->children + cur,
1635 (self->extra->length - cur) * sizeof(PyObject *));
1636 }
1637
1638 self->extra->length -= slicelen;
1639
1640 /* Discard the recycle list with all the deleted sub-elements */
1641 Py_XDECREF(recycle);
1642 return 0;
1643 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001644 else {
Eli Bendersky865756a2012-03-09 13:38:15 +02001645 /* A new slice is actually being assigned */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001646 seq = PySequence_Fast(value, "");
1647 if (!seq) {
1648 PyErr_Format(
1649 PyExc_TypeError,
1650 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1651 );
1652 return -1;
1653 }
1654 newlen = PySequence_Size(seq);
1655 }
1656
1657 if (step != 1 && newlen != slicelen)
1658 {
1659 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001660 "attempt to assign sequence of size %zd "
1661 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001662 newlen, slicelen
1663 );
1664 return -1;
1665 }
1666
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001667 /* Resize before creating the recycle bin, to prevent refleaks. */
1668 if (newlen > slicelen) {
1669 if (element_resize(self, newlen - slicelen) < 0) {
1670 if (seq) {
1671 Py_DECREF(seq);
1672 }
1673 return -1;
1674 }
1675 }
1676
1677 if (slicelen > 0) {
1678 /* to avoid recursive calls to this method (via decref), move
1679 old items to the recycle bin here, and get rid of them when
1680 we're done modifying the element */
1681 recycle = PyList_New(slicelen);
1682 if (!recycle) {
1683 if (seq) {
1684 Py_DECREF(seq);
1685 }
1686 return -1;
1687 }
1688 for (cur = start, i = 0; i < slicelen;
1689 cur += step, i++)
1690 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1691 }
1692
1693 if (newlen < slicelen) {
1694 /* delete slice */
1695 for (i = stop; i < self->extra->length; i++)
1696 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1697 } else if (newlen > slicelen) {
1698 /* insert slice */
1699 for (i = self->extra->length-1; i >= stop; i--)
1700 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1701 }
1702
1703 /* replace the slice */
1704 for (cur = start, i = 0; i < newlen;
1705 cur += step, i++) {
1706 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1707 Py_INCREF(element);
1708 self->extra->children[cur] = element;
1709 }
1710
1711 self->extra->length += newlen - slicelen;
1712
1713 if (seq) {
1714 Py_DECREF(seq);
1715 }
1716
1717 /* discard the recycle bin, and everything in it */
1718 Py_XDECREF(recycle);
1719
1720 return 0;
1721 }
1722 else {
1723 PyErr_SetString(PyExc_TypeError,
1724 "element indices must be integers");
1725 return -1;
1726 }
1727}
1728
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001729static PyMethodDef element_methods[] = {
1730
Eli Bendersky0192ba32012-03-30 16:38:33 +03001731 {"clear", (PyCFunction) element_clearmethod, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001732
Eli Benderskya8736902013-01-05 06:26:39 -08001733 {"get", (PyCFunction) element_get, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001734 {"set", (PyCFunction) element_set, METH_VARARGS},
1735
Eli Bendersky737b1732012-05-29 06:02:56 +03001736 {"find", (PyCFunction) element_find, METH_VARARGS | METH_KEYWORDS},
1737 {"findtext", (PyCFunction) element_findtext, METH_VARARGS | METH_KEYWORDS},
1738 {"findall", (PyCFunction) element_findall, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001739
1740 {"append", (PyCFunction) element_append, METH_VARARGS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001741 {"extend", (PyCFunction) element_extend, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001742 {"insert", (PyCFunction) element_insert, METH_VARARGS},
1743 {"remove", (PyCFunction) element_remove, METH_VARARGS},
1744
Eli Benderskya8736902013-01-05 06:26:39 -08001745 {"iter", (PyCFunction) element_iter, METH_VARARGS | METH_KEYWORDS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001746 {"itertext", (PyCFunction) element_itertext, METH_VARARGS},
Eli Bendersky737b1732012-05-29 06:02:56 +03001747 {"iterfind", (PyCFunction) element_iterfind, METH_VARARGS | METH_KEYWORDS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001748
Eli Benderskya8736902013-01-05 06:26:39 -08001749 {"getiterator", (PyCFunction) element_iter, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001750 {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS},
1751
1752 {"items", (PyCFunction) element_items, METH_VARARGS},
1753 {"keys", (PyCFunction) element_keys, METH_VARARGS},
1754
1755 {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS},
1756
1757 {"__copy__", (PyCFunction) element_copy, METH_VARARGS},
1758 {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS},
Martin v. Löwisbce16662012-06-17 10:41:22 +02001759 {"__sizeof__", element_sizeof, METH_NOARGS},
Eli Bendersky698bdb22013-01-10 06:01:06 -08001760 {"__getstate__", (PyCFunction)element_getstate, METH_NOARGS},
1761 {"__setstate__", (PyCFunction)element_setstate, METH_O},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001762
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001763 {NULL, NULL}
1764};
1765
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001766static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001767element_getattro(ElementObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001768{
1769 PyObject* res;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001770 char *name = "";
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001771
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001772 if (PyUnicode_Check(nameobj))
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001773 name = _PyUnicode_AsString(nameobj);
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001774
Alexander Belopolskye239d232010-12-08 23:31:48 +00001775 if (name == NULL)
1776 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001777
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001778 /* handle common attributes first */
1779 if (strcmp(name, "tag") == 0) {
1780 res = self->tag;
1781 Py_INCREF(res);
1782 return res;
1783 } else if (strcmp(name, "text") == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001784 res = element_get_text(self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001785 Py_INCREF(res);
1786 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001787 }
1788
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001789 /* methods */
1790 res = PyObject_GenericGetAttr((PyObject*) self, nameobj);
1791 if (res)
1792 return res;
1793
1794 /* less common attributes */
1795 if (strcmp(name, "tail") == 0) {
1796 PyErr_Clear();
1797 res = element_get_tail(self);
1798 } else if (strcmp(name, "attrib") == 0) {
1799 PyErr_Clear();
1800 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001801 create_extra(self, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001802 res = element_get_attrib(self);
1803 }
1804
1805 if (!res)
1806 return NULL;
1807
1808 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001809 return res;
1810}
1811
Eli Benderskyb20df952012-05-20 06:33:29 +03001812static PyObject*
1813element_setattro(ElementObject* self, PyObject* nameobj, PyObject* value)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001814{
Eli Benderskyb20df952012-05-20 06:33:29 +03001815 char *name = "";
1816 if (PyUnicode_Check(nameobj))
1817 name = _PyUnicode_AsString(nameobj);
1818
1819 if (name == NULL)
1820 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001821
1822 if (strcmp(name, "tag") == 0) {
1823 Py_DECREF(self->tag);
1824 self->tag = value;
1825 Py_INCREF(self->tag);
1826 } else if (strcmp(name, "text") == 0) {
1827 Py_DECREF(JOIN_OBJ(self->text));
1828 self->text = value;
1829 Py_INCREF(self->text);
1830 } else if (strcmp(name, "tail") == 0) {
1831 Py_DECREF(JOIN_OBJ(self->tail));
1832 self->tail = value;
1833 Py_INCREF(self->tail);
1834 } else if (strcmp(name, "attrib") == 0) {
1835 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001836 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001837 Py_DECREF(self->extra->attrib);
1838 self->extra->attrib = value;
1839 Py_INCREF(self->extra->attrib);
1840 } else {
1841 PyErr_SetString(PyExc_AttributeError, name);
Eli Benderskyb20df952012-05-20 06:33:29 +03001842 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001843 }
1844
Eli Benderskyb20df952012-05-20 06:33:29 +03001845 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001846}
1847
1848static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001849 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001850 0, /* sq_concat */
1851 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001852 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001853 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001854 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001855 0,
1856};
1857
1858static PyMappingMethods element_as_mapping = {
1859 (lenfunc) element_length,
1860 (binaryfunc) element_subscr,
1861 (objobjargproc) element_ass_subscr,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001862};
1863
Neal Norwitz227b5332006-03-22 09:28:35 +00001864static PyTypeObject Element_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001865 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08001866 "xml.etree.ElementTree.Element", sizeof(ElementObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001867 /* methods */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001868 (destructor)element_dealloc, /* tp_dealloc */
1869 0, /* tp_print */
1870 0, /* tp_getattr */
Eli Benderskyb20df952012-05-20 06:33:29 +03001871 0, /* tp_setattr */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001872 0, /* tp_reserved */
1873 (reprfunc)element_repr, /* tp_repr */
1874 0, /* tp_as_number */
1875 &element_as_sequence, /* tp_as_sequence */
1876 &element_as_mapping, /* tp_as_mapping */
1877 0, /* tp_hash */
1878 0, /* tp_call */
1879 0, /* tp_str */
1880 (getattrofunc)element_getattro, /* tp_getattro */
Eli Benderskyb20df952012-05-20 06:33:29 +03001881 (setattrofunc)element_setattro, /* tp_setattro */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001882 0, /* tp_as_buffer */
Eli Bendersky0192ba32012-03-30 16:38:33 +03001883 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
1884 /* tp_flags */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001885 0, /* tp_doc */
Eli Bendersky0192ba32012-03-30 16:38:33 +03001886 (traverseproc)element_gc_traverse, /* tp_traverse */
1887 (inquiry)element_gc_clear, /* tp_clear */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001888 0, /* tp_richcompare */
Eli Benderskyebf37a22012-04-03 22:02:37 +03001889 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001890 0, /* tp_iter */
1891 0, /* tp_iternext */
1892 element_methods, /* tp_methods */
1893 0, /* tp_members */
1894 0, /* tp_getset */
1895 0, /* tp_base */
1896 0, /* tp_dict */
1897 0, /* tp_descr_get */
1898 0, /* tp_descr_set */
1899 0, /* tp_dictoffset */
1900 (initproc)element_init, /* tp_init */
1901 PyType_GenericAlloc, /* tp_alloc */
1902 element_new, /* tp_new */
1903 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001904};
1905
Eli Bendersky64d11e62012-06-15 07:42:50 +03001906/******************************* Element iterator ****************************/
1907
1908/* ElementIterObject represents the iteration state over an XML element in
1909 * pre-order traversal. To keep track of which sub-element should be returned
1910 * next, a stack of parents is maintained. This is a standard stack-based
1911 * iterative pre-order traversal of a tree.
1912 * The stack is managed using a single-linked list starting at parent_stack.
1913 * Each stack node contains the saved parent to which we should return after
1914 * the current one is exhausted, and the next child to examine in that parent.
1915 */
1916typedef struct ParentLocator_t {
1917 ElementObject *parent;
1918 Py_ssize_t child_index;
1919 struct ParentLocator_t *next;
1920} ParentLocator;
1921
1922typedef struct {
1923 PyObject_HEAD
1924 ParentLocator *parent_stack;
1925 ElementObject *root_element;
1926 PyObject *sought_tag;
1927 int root_done;
1928 int gettext;
1929} ElementIterObject;
1930
1931
1932static void
1933elementiter_dealloc(ElementIterObject *it)
1934{
1935 ParentLocator *p = it->parent_stack;
1936 while (p) {
1937 ParentLocator *temp = p;
1938 Py_XDECREF(p->parent);
1939 p = p->next;
1940 PyObject_Free(temp);
1941 }
1942
1943 Py_XDECREF(it->sought_tag);
1944 Py_XDECREF(it->root_element);
1945
1946 PyObject_GC_UnTrack(it);
1947 PyObject_GC_Del(it);
1948}
1949
1950static int
1951elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
1952{
1953 ParentLocator *p = it->parent_stack;
1954 while (p) {
1955 Py_VISIT(p->parent);
1956 p = p->next;
1957 }
1958
1959 Py_VISIT(it->root_element);
1960 Py_VISIT(it->sought_tag);
1961 return 0;
1962}
1963
1964/* Helper function for elementiter_next. Add a new parent to the parent stack.
1965 */
1966static ParentLocator *
1967parent_stack_push_new(ParentLocator *stack, ElementObject *parent)
1968{
1969 ParentLocator *new_node = PyObject_Malloc(sizeof(ParentLocator));
1970 if (new_node) {
1971 new_node->parent = parent;
1972 Py_INCREF(parent);
1973 new_node->child_index = 0;
1974 new_node->next = stack;
1975 }
1976 return new_node;
1977}
1978
1979static PyObject *
1980elementiter_next(ElementIterObject *it)
1981{
1982 /* Sub-element iterator.
1983 *
1984 * A short note on gettext: this function serves both the iter() and
1985 * itertext() methods to avoid code duplication. However, there are a few
1986 * small differences in the way these iterations work. Namely:
1987 * - itertext() only yields text from nodes that have it, and continues
1988 * iterating when a node doesn't have text (so it doesn't return any
1989 * node like iter())
1990 * - itertext() also has to handle tail, after finishing with all the
1991 * children of a node.
1992 */
Eli Bendersky113da642012-06-15 07:52:49 +03001993 ElementObject *cur_parent;
1994 Py_ssize_t child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03001995
1996 while (1) {
1997 /* Handle the case reached in the beginning and end of iteration, where
1998 * the parent stack is empty. The root_done flag gives us indication
1999 * whether we've just started iterating (so root_done is 0), in which
2000 * case the root is returned. If root_done is 1 and we're here, the
2001 * iterator is exhausted.
2002 */
2003 if (!it->parent_stack->parent) {
2004 if (it->root_done) {
2005 PyErr_SetNone(PyExc_StopIteration);
2006 return NULL;
2007 } else {
2008 it->parent_stack = parent_stack_push_new(it->parent_stack,
2009 it->root_element);
2010 if (!it->parent_stack) {
2011 PyErr_NoMemory();
2012 return NULL;
2013 }
2014
2015 it->root_done = 1;
2016 if (it->sought_tag == Py_None ||
2017 PyObject_RichCompareBool(it->root_element->tag,
2018 it->sought_tag, Py_EQ) == 1) {
2019 if (it->gettext) {
2020 PyObject *text = JOIN_OBJ(it->root_element->text);
2021 if (PyObject_IsTrue(text)) {
2022 Py_INCREF(text);
2023 return text;
2024 }
2025 } else {
2026 Py_INCREF(it->root_element);
2027 return (PyObject *)it->root_element;
2028 }
2029 }
2030 }
2031 }
2032
2033 /* See if there are children left to traverse in the current parent. If
2034 * yes, visit the next child. If not, pop the stack and try again.
2035 */
Eli Bendersky113da642012-06-15 07:52:49 +03002036 cur_parent = it->parent_stack->parent;
2037 child_index = it->parent_stack->child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002038 if (cur_parent->extra && child_index < cur_parent->extra->length) {
2039 ElementObject *child = (ElementObject *)
2040 cur_parent->extra->children[child_index];
2041 it->parent_stack->child_index++;
2042 it->parent_stack = parent_stack_push_new(it->parent_stack,
2043 child);
2044 if (!it->parent_stack) {
2045 PyErr_NoMemory();
2046 return NULL;
2047 }
2048
2049 if (it->gettext) {
2050 PyObject *text = JOIN_OBJ(child->text);
2051 if (PyObject_IsTrue(text)) {
2052 Py_INCREF(text);
2053 return text;
2054 }
2055 } else if (it->sought_tag == Py_None ||
2056 PyObject_RichCompareBool(child->tag,
2057 it->sought_tag, Py_EQ) == 1) {
2058 Py_INCREF(child);
2059 return (PyObject *)child;
2060 }
2061 else
2062 continue;
2063 }
2064 else {
2065 PyObject *tail = it->gettext ? JOIN_OBJ(cur_parent->tail) : Py_None;
2066 ParentLocator *next = it->parent_stack->next;
2067 Py_XDECREF(it->parent_stack->parent);
2068 PyObject_Free(it->parent_stack);
2069 it->parent_stack = next;
2070
2071 /* Note that extra condition on it->parent_stack->parent here;
2072 * this is because itertext() is supposed to only return *inner*
2073 * text, not text following the element it began iteration with.
2074 */
2075 if (it->parent_stack->parent && PyObject_IsTrue(tail)) {
2076 Py_INCREF(tail);
2077 return tail;
2078 }
2079 }
2080 }
2081
2082 return NULL;
2083}
2084
2085
2086static PyTypeObject ElementIter_Type = {
2087 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002088 /* Using the module's name since the pure-Python implementation does not
2089 have such a type. */
Eli Bendersky64d11e62012-06-15 07:42:50 +03002090 "_elementtree._element_iterator", /* tp_name */
2091 sizeof(ElementIterObject), /* tp_basicsize */
2092 0, /* tp_itemsize */
2093 /* methods */
2094 (destructor)elementiter_dealloc, /* tp_dealloc */
2095 0, /* tp_print */
2096 0, /* tp_getattr */
2097 0, /* tp_setattr */
2098 0, /* tp_reserved */
2099 0, /* tp_repr */
2100 0, /* tp_as_number */
2101 0, /* tp_as_sequence */
2102 0, /* tp_as_mapping */
2103 0, /* tp_hash */
2104 0, /* tp_call */
2105 0, /* tp_str */
2106 0, /* tp_getattro */
2107 0, /* tp_setattro */
2108 0, /* tp_as_buffer */
2109 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2110 0, /* tp_doc */
2111 (traverseproc)elementiter_traverse, /* tp_traverse */
2112 0, /* tp_clear */
2113 0, /* tp_richcompare */
2114 0, /* tp_weaklistoffset */
2115 PyObject_SelfIter, /* tp_iter */
2116 (iternextfunc)elementiter_next, /* tp_iternext */
2117 0, /* tp_methods */
2118 0, /* tp_members */
2119 0, /* tp_getset */
2120 0, /* tp_base */
2121 0, /* tp_dict */
2122 0, /* tp_descr_get */
2123 0, /* tp_descr_set */
2124 0, /* tp_dictoffset */
2125 0, /* tp_init */
2126 0, /* tp_alloc */
2127 0, /* tp_new */
2128};
2129
2130
2131static PyObject *
2132create_elementiter(ElementObject *self, PyObject *tag, int gettext)
2133{
2134 ElementIterObject *it;
2135 PyObject *star = NULL;
2136
2137 it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
2138 if (!it)
2139 return NULL;
2140 if (!(it->parent_stack = PyObject_Malloc(sizeof(ParentLocator)))) {
2141 PyObject_GC_Del(it);
2142 return NULL;
2143 }
2144
2145 it->parent_stack->parent = NULL;
2146 it->parent_stack->child_index = 0;
2147 it->parent_stack->next = NULL;
2148
2149 if (PyUnicode_Check(tag))
2150 star = PyUnicode_FromString("*");
2151 else if (PyBytes_Check(tag))
2152 star = PyBytes_FromString("*");
2153
2154 if (star && PyObject_RichCompareBool(tag, star, Py_EQ) == 1)
2155 tag = Py_None;
2156
2157 Py_XDECREF(star);
2158 it->sought_tag = tag;
2159 it->root_done = 0;
2160 it->gettext = gettext;
2161 it->root_element = self;
2162
2163 Py_INCREF(self);
2164 Py_INCREF(tag);
2165
2166 PyObject_GC_Track(it);
2167 return (PyObject *)it;
2168}
2169
2170
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002171/* ==================================================================== */
2172/* the tree builder type */
2173
2174typedef struct {
2175 PyObject_HEAD
2176
Eli Bendersky58d548d2012-05-29 15:45:16 +03002177 PyObject *root; /* root node (first created node) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002178
Antoine Pitrouee329312012-10-04 19:53:29 +02002179 PyObject *this; /* current node */
2180 PyObject *last; /* most recently created node */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002181
Eli Bendersky58d548d2012-05-29 15:45:16 +03002182 PyObject *data; /* data collector (string or list), or NULL */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002183
Eli Bendersky58d548d2012-05-29 15:45:16 +03002184 PyObject *stack; /* element stack */
2185 Py_ssize_t index; /* current stack size (0 means empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002186
Eli Bendersky48d358b2012-05-30 17:57:50 +03002187 PyObject *element_factory;
2188
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002189 /* element tracing */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002190 PyObject *events; /* list of events, or NULL if not collecting */
2191 PyObject *start_event_obj; /* event objects (NULL to ignore) */
2192 PyObject *end_event_obj;
2193 PyObject *start_ns_event_obj;
2194 PyObject *end_ns_event_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002195} TreeBuilderObject;
2196
Neal Norwitz227b5332006-03-22 09:28:35 +00002197static PyTypeObject TreeBuilder_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002198
Christian Heimes90aa7642007-12-19 02:45:37 +00002199#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002200
2201/* -------------------------------------------------------------------- */
2202/* constructor and destructor */
2203
Eli Bendersky58d548d2012-05-29 15:45:16 +03002204static PyObject *
2205treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002206{
Eli Bendersky58d548d2012-05-29 15:45:16 +03002207 TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
2208 if (t != NULL) {
2209 t->root = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002210
Eli Bendersky58d548d2012-05-29 15:45:16 +03002211 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002212 t->this = Py_None;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002213 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002214 t->last = Py_None;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002215
Eli Bendersky58d548d2012-05-29 15:45:16 +03002216 t->data = NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002217 t->element_factory = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002218 t->stack = PyList_New(20);
2219 if (!t->stack) {
2220 Py_DECREF(t->this);
2221 Py_DECREF(t->last);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002222 Py_DECREF((PyObject *) t);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002223 return NULL;
2224 }
2225 t->index = 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002226
Eli Bendersky58d548d2012-05-29 15:45:16 +03002227 t->events = NULL;
2228 t->start_event_obj = t->end_event_obj = NULL;
2229 t->start_ns_event_obj = t->end_ns_event_obj = NULL;
2230 }
2231 return (PyObject *)t;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002232}
2233
Eli Bendersky58d548d2012-05-29 15:45:16 +03002234static int
2235treebuilder_init(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002236{
Eli Benderskyc68e1362012-06-03 06:09:42 +03002237 static char *kwlist[] = {"element_factory", 0};
Eli Bendersky48d358b2012-05-30 17:57:50 +03002238 PyObject *element_factory = NULL;
2239 TreeBuilderObject *self_tb = (TreeBuilderObject *)self;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002240 PyObject *tmp;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002241
2242 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:TreeBuilder", kwlist,
2243 &element_factory)) {
2244 return -1;
2245 }
2246
2247 if (element_factory) {
2248 Py_INCREF(element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002249 tmp = self_tb->element_factory;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002250 self_tb->element_factory = element_factory;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002251 Py_XDECREF(tmp);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002252 }
2253
Eli Bendersky58d548d2012-05-29 15:45:16 +03002254 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002255}
2256
Eli Bendersky48d358b2012-05-30 17:57:50 +03002257static int
2258treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
2259{
2260 Py_VISIT(self->root);
2261 Py_VISIT(self->this);
2262 Py_VISIT(self->last);
2263 Py_VISIT(self->data);
2264 Py_VISIT(self->stack);
2265 Py_VISIT(self->element_factory);
2266 return 0;
2267}
2268
2269static int
2270treebuilder_gc_clear(TreeBuilderObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002271{
Antoine Pitrouc1948842012-10-01 23:40:37 +02002272 Py_CLEAR(self->end_ns_event_obj);
2273 Py_CLEAR(self->start_ns_event_obj);
2274 Py_CLEAR(self->end_event_obj);
2275 Py_CLEAR(self->start_event_obj);
2276 Py_CLEAR(self->events);
2277 Py_CLEAR(self->stack);
2278 Py_CLEAR(self->data);
2279 Py_CLEAR(self->last);
2280 Py_CLEAR(self->this);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002281 Py_CLEAR(self->element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002282 Py_CLEAR(self->root);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002283 return 0;
2284}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002285
Eli Bendersky48d358b2012-05-30 17:57:50 +03002286static void
2287treebuilder_dealloc(TreeBuilderObject *self)
2288{
2289 PyObject_GC_UnTrack(self);
2290 treebuilder_gc_clear(self);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002291 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002292}
2293
2294/* -------------------------------------------------------------------- */
Antoine Pitrouee329312012-10-04 19:53:29 +02002295/* helpers for handling of arbitrary element-like objects */
2296
2297static int
2298treebuilder_set_element_text_or_tail(PyObject *element, PyObject *data,
2299 PyObject **dest, _Py_Identifier *name)
2300{
2301 if (Element_CheckExact(element)) {
2302 Py_DECREF(JOIN_OBJ(*dest));
2303 *dest = JOIN_SET(data, PyList_CheckExact(data));
2304 return 0;
2305 }
2306 else {
2307 PyObject *joined = list_join(data);
2308 int r;
2309 if (joined == NULL)
2310 return -1;
2311 r = _PyObject_SetAttrId(element, name, joined);
2312 Py_DECREF(joined);
2313 return r;
2314 }
2315}
2316
2317/* These two functions steal a reference to data */
2318static int
2319treebuilder_set_element_text(PyObject *element, PyObject *data)
2320{
2321 _Py_IDENTIFIER(text);
2322 return treebuilder_set_element_text_or_tail(
2323 element, data, &((ElementObject *) element)->text, &PyId_text);
2324}
2325
2326static int
2327treebuilder_set_element_tail(PyObject *element, PyObject *data)
2328{
2329 _Py_IDENTIFIER(tail);
2330 return treebuilder_set_element_text_or_tail(
2331 element, data, &((ElementObject *) element)->tail, &PyId_tail);
2332}
2333
2334static int
2335treebuilder_add_subelement(PyObject *element, PyObject *child)
2336{
2337 _Py_IDENTIFIER(append);
2338 if (Element_CheckExact(element)) {
2339 ElementObject *elem = (ElementObject *) element;
2340 return element_add_subelement(elem, child);
2341 }
2342 else {
2343 PyObject *res;
2344 res = _PyObject_CallMethodId(element, &PyId_append, "O", child);
2345 if (res == NULL)
2346 return -1;
2347 Py_DECREF(res);
2348 return 0;
2349 }
2350}
2351
2352/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002353/* handlers */
2354
2355LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002356treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
2357 PyObject* attrib)
2358{
2359 PyObject* node;
2360 PyObject* this;
2361
2362 if (self->data) {
2363 if (self->this == self->last) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002364 if (treebuilder_set_element_text(self->last, self->data))
2365 return NULL;
2366 }
2367 else {
2368 if (treebuilder_set_element_tail(self->last, self->data))
2369 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002370 }
2371 self->data = NULL;
2372 }
2373
Eli Bendersky48d358b2012-05-30 17:57:50 +03002374 if (self->element_factory) {
2375 node = PyObject_CallFunction(self->element_factory, "OO", tag, attrib);
2376 } else {
2377 node = create_new_element(tag, attrib);
2378 }
2379 if (!node) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002380 return NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002381 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002382
Antoine Pitrouee329312012-10-04 19:53:29 +02002383 this = self->this;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002384
2385 if (this != Py_None) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002386 if (treebuilder_add_subelement(this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002387 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002388 } else {
2389 if (self->root) {
2390 PyErr_SetString(
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002391 elementtree_parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002392 "multiple elements on top level"
2393 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002394 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002395 }
2396 Py_INCREF(node);
2397 self->root = node;
2398 }
2399
2400 if (self->index < PyList_GET_SIZE(self->stack)) {
2401 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002402 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002403 Py_INCREF(this);
2404 } else {
2405 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002406 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002407 }
2408 self->index++;
2409
2410 Py_DECREF(this);
2411 Py_INCREF(node);
Antoine Pitrouee329312012-10-04 19:53:29 +02002412 self->this = node;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002413
2414 Py_DECREF(self->last);
2415 Py_INCREF(node);
Antoine Pitrouee329312012-10-04 19:53:29 +02002416 self->last = node;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002417
2418 if (self->start_event_obj) {
2419 PyObject* res;
2420 PyObject* action = self->start_event_obj;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002421 res = PyTuple_Pack(2, action, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002422 if (res) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002423 PyList_Append(self->events, res);
2424 Py_DECREF(res);
2425 } else
2426 PyErr_Clear(); /* FIXME: propagate error */
2427 }
2428
2429 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002430
2431 error:
2432 Py_DECREF(node);
2433 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002434}
2435
2436LOCAL(PyObject*)
2437treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
2438{
2439 if (!self->data) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002440 if (self->last == Py_None) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00002441 /* ignore calls to data before the first call to start */
2442 Py_RETURN_NONE;
2443 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002444 /* store the first item as is */
2445 Py_INCREF(data); self->data = data;
2446 } else {
2447 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00002448 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
2449 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002450 /* XXX this code path unused in Python 3? */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002451 /* expat often generates single character data sections; handle
2452 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00002453 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
2454 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002455 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00002456 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002457 } else if (PyList_CheckExact(self->data)) {
2458 if (PyList_Append(self->data, data) < 0)
2459 return NULL;
2460 } else {
2461 PyObject* list = PyList_New(2);
2462 if (!list)
2463 return NULL;
2464 PyList_SET_ITEM(list, 0, self->data);
2465 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2466 self->data = list;
2467 }
2468 }
2469
2470 Py_RETURN_NONE;
2471}
2472
2473LOCAL(PyObject*)
2474treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2475{
2476 PyObject* item;
2477
2478 if (self->data) {
2479 if (self->this == self->last) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002480 if (treebuilder_set_element_text(self->last, self->data))
2481 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002482 } else {
Antoine Pitrouee329312012-10-04 19:53:29 +02002483 if (treebuilder_set_element_tail(self->last, self->data))
2484 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002485 }
2486 self->data = NULL;
2487 }
2488
2489 if (self->index == 0) {
2490 PyErr_SetString(
2491 PyExc_IndexError,
2492 "pop from empty stack"
2493 );
2494 return NULL;
2495 }
2496
2497 self->index--;
2498
2499 item = PyList_GET_ITEM(self->stack, self->index);
2500 Py_INCREF(item);
2501
2502 Py_DECREF(self->last);
2503
Antoine Pitrouee329312012-10-04 19:53:29 +02002504 self->last = self->this;
2505 self->this = item;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002506
2507 if (self->end_event_obj) {
2508 PyObject* res;
2509 PyObject* action = self->end_event_obj;
2510 PyObject* node = (PyObject*) self->last;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002511 res = PyTuple_Pack(2, action, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002512 if (res) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002513 PyList_Append(self->events, res);
2514 Py_DECREF(res);
2515 } else
2516 PyErr_Clear(); /* FIXME: propagate error */
2517 }
2518
2519 Py_INCREF(self->last);
2520 return (PyObject*) self->last;
2521}
2522
2523LOCAL(void)
2524treebuilder_handle_namespace(TreeBuilderObject* self, int start,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002525 PyObject *prefix, PyObject *uri)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002526{
2527 PyObject* res;
2528 PyObject* action;
2529 PyObject* parcel;
2530
2531 if (!self->events)
2532 return;
2533
2534 if (start) {
2535 if (!self->start_ns_event_obj)
2536 return;
2537 action = self->start_ns_event_obj;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002538 parcel = Py_BuildValue("OO", prefix, uri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002539 if (!parcel)
2540 return;
2541 Py_INCREF(action);
2542 } else {
2543 if (!self->end_ns_event_obj)
2544 return;
2545 action = self->end_ns_event_obj;
2546 Py_INCREF(action);
2547 parcel = Py_None;
2548 Py_INCREF(parcel);
2549 }
2550
2551 res = PyTuple_New(2);
2552
2553 if (res) {
2554 PyTuple_SET_ITEM(res, 0, action);
2555 PyTuple_SET_ITEM(res, 1, parcel);
2556 PyList_Append(self->events, res);
2557 Py_DECREF(res);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002558 }
2559 else {
2560 Py_DECREF(action);
2561 Py_DECREF(parcel);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002562 PyErr_Clear(); /* FIXME: propagate error */
Antoine Pitrouc1948842012-10-01 23:40:37 +02002563 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002564}
2565
2566/* -------------------------------------------------------------------- */
2567/* methods (in alphabetical order) */
2568
2569static PyObject*
2570treebuilder_data(TreeBuilderObject* self, PyObject* args)
2571{
2572 PyObject* data;
2573 if (!PyArg_ParseTuple(args, "O:data", &data))
2574 return NULL;
2575
2576 return treebuilder_handle_data(self, data);
2577}
2578
2579static PyObject*
2580treebuilder_end(TreeBuilderObject* self, PyObject* args)
2581{
2582 PyObject* tag;
2583 if (!PyArg_ParseTuple(args, "O:end", &tag))
2584 return NULL;
2585
2586 return treebuilder_handle_end(self, tag);
2587}
2588
2589LOCAL(PyObject*)
2590treebuilder_done(TreeBuilderObject* self)
2591{
2592 PyObject* res;
2593
2594 /* FIXME: check stack size? */
2595
2596 if (self->root)
2597 res = self->root;
2598 else
2599 res = Py_None;
2600
2601 Py_INCREF(res);
2602 return res;
2603}
2604
2605static PyObject*
2606treebuilder_close(TreeBuilderObject* self, PyObject* args)
2607{
2608 if (!PyArg_ParseTuple(args, ":close"))
2609 return NULL;
2610
2611 return treebuilder_done(self);
2612}
2613
2614static PyObject*
2615treebuilder_start(TreeBuilderObject* self, PyObject* args)
2616{
2617 PyObject* tag;
2618 PyObject* attrib = Py_None;
2619 if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib))
2620 return NULL;
2621
2622 return treebuilder_handle_start(self, tag, attrib);
2623}
2624
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002625static PyMethodDef treebuilder_methods[] = {
2626 {"data", (PyCFunction) treebuilder_data, METH_VARARGS},
2627 {"start", (PyCFunction) treebuilder_start, METH_VARARGS},
2628 {"end", (PyCFunction) treebuilder_end, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002629 {"close", (PyCFunction) treebuilder_close, METH_VARARGS},
2630 {NULL, NULL}
2631};
2632
Neal Norwitz227b5332006-03-22 09:28:35 +00002633static PyTypeObject TreeBuilder_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002634 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002635 "xml.etree.ElementTree.TreeBuilder", sizeof(TreeBuilderObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002636 /* methods */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002637 (destructor)treebuilder_dealloc, /* tp_dealloc */
2638 0, /* tp_print */
2639 0, /* tp_getattr */
2640 0, /* tp_setattr */
2641 0, /* tp_reserved */
2642 0, /* tp_repr */
2643 0, /* tp_as_number */
2644 0, /* tp_as_sequence */
2645 0, /* tp_as_mapping */
2646 0, /* tp_hash */
2647 0, /* tp_call */
2648 0, /* tp_str */
2649 0, /* tp_getattro */
2650 0, /* tp_setattro */
2651 0, /* tp_as_buffer */
Eli Bendersky48d358b2012-05-30 17:57:50 +03002652 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
2653 /* tp_flags */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002654 0, /* tp_doc */
Eli Bendersky48d358b2012-05-30 17:57:50 +03002655 (traverseproc)treebuilder_gc_traverse, /* tp_traverse */
2656 (inquiry)treebuilder_gc_clear, /* tp_clear */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002657 0, /* tp_richcompare */
2658 0, /* tp_weaklistoffset */
2659 0, /* tp_iter */
2660 0, /* tp_iternext */
2661 treebuilder_methods, /* tp_methods */
2662 0, /* tp_members */
2663 0, /* tp_getset */
2664 0, /* tp_base */
2665 0, /* tp_dict */
2666 0, /* tp_descr_get */
2667 0, /* tp_descr_set */
2668 0, /* tp_dictoffset */
2669 (initproc)treebuilder_init, /* tp_init */
2670 PyType_GenericAlloc, /* tp_alloc */
2671 treebuilder_new, /* tp_new */
2672 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002673};
2674
2675/* ==================================================================== */
2676/* the expat interface */
2677
2678#if defined(USE_EXPAT)
2679
2680#include "expat.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002681#include "pyexpat.h"
Eli Bendersky20d41742012-06-01 09:48:37 +03002682static struct PyExpat_CAPI *expat_capi;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002683#define EXPAT(func) (expat_capi->func)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002684
Eli Bendersky52467b12012-06-01 07:13:08 +03002685static XML_Memory_Handling_Suite ExpatMemoryHandler = {
2686 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
2687
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002688typedef struct {
2689 PyObject_HEAD
2690
2691 XML_Parser parser;
2692
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002693 PyObject *target;
2694 PyObject *entity;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002695
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002696 PyObject *names;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002697
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002698 PyObject *handle_start;
2699 PyObject *handle_data;
2700 PyObject *handle_end;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002701
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002702 PyObject *handle_comment;
2703 PyObject *handle_pi;
2704 PyObject *handle_doctype;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002705
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002706 PyObject *handle_close;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002707
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002708} XMLParserObject;
2709
Neal Norwitz227b5332006-03-22 09:28:35 +00002710static PyTypeObject XMLParser_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002711
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002712#define XMLParser_CheckExact(op) (Py_TYPE(op) == &XMLParser_Type)
2713
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002714/* helpers */
2715
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002716LOCAL(PyObject*)
2717makeuniversal(XMLParserObject* self, const char* string)
2718{
2719 /* convert a UTF-8 tag/attribute name from the expat parser
2720 to a universal name string */
2721
Antoine Pitrouc1948842012-10-01 23:40:37 +02002722 Py_ssize_t size = (Py_ssize_t) strlen(string);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002723 PyObject* key;
2724 PyObject* value;
2725
2726 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00002727 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002728 if (!key)
2729 return NULL;
2730
2731 value = PyDict_GetItem(self->names, key);
2732
2733 if (value) {
2734 Py_INCREF(value);
2735 } else {
2736 /* new name. convert to universal name, and decode as
2737 necessary */
2738
2739 PyObject* tag;
2740 char* p;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002741 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002742
2743 /* look for namespace separator */
2744 for (i = 0; i < size; i++)
2745 if (string[i] == '}')
2746 break;
2747 if (i != size) {
2748 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002749 tag = PyBytes_FromStringAndSize(NULL, size+1);
2750 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002751 p[0] = '{';
2752 memcpy(p+1, string, size);
2753 size++;
2754 } else {
2755 /* plain name; use key as tag */
2756 Py_INCREF(key);
2757 tag = key;
2758 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002759
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002760 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002761 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00002762 value = PyUnicode_DecodeUTF8(p, size, "strict");
2763 Py_DECREF(tag);
2764 if (!value) {
2765 Py_DECREF(key);
2766 return NULL;
2767 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002768
2769 /* add to names dictionary */
2770 if (PyDict_SetItem(self->names, key, value) < 0) {
2771 Py_DECREF(key);
2772 Py_DECREF(value);
2773 return NULL;
2774 }
2775 }
2776
2777 Py_DECREF(key);
2778 return value;
2779}
2780
Eli Bendersky5b77d812012-03-16 08:20:05 +02002781/* Set the ParseError exception with the given parameters.
2782 * If message is not NULL, it's used as the error string. Otherwise, the
2783 * message string is the default for the given error_code.
2784*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002785static void
Eli Bendersky5b77d812012-03-16 08:20:05 +02002786expat_set_error(enum XML_Error error_code, int line, int column, char *message)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002787{
Eli Bendersky5b77d812012-03-16 08:20:05 +02002788 PyObject *errmsg, *error, *position, *code;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002789
Victor Stinner499dfcf2011-03-21 13:26:24 +01002790 errmsg = PyUnicode_FromFormat("%s: line %d, column %d",
Eli Bendersky5b77d812012-03-16 08:20:05 +02002791 message ? message : EXPAT(ErrorString)(error_code),
2792 line, column);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002793 if (errmsg == NULL)
2794 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002795
Victor Stinner499dfcf2011-03-21 13:26:24 +01002796 error = PyObject_CallFunction(elementtree_parseerror_obj, "O", errmsg);
2797 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002798 if (!error)
2799 return;
2800
Eli Bendersky5b77d812012-03-16 08:20:05 +02002801 /* Add code and position attributes */
2802 code = PyLong_FromLong((long)error_code);
2803 if (!code) {
2804 Py_DECREF(error);
2805 return;
2806 }
2807 if (PyObject_SetAttrString(error, "code", code) == -1) {
2808 Py_DECREF(error);
2809 Py_DECREF(code);
2810 return;
2811 }
2812 Py_DECREF(code);
2813
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002814 position = Py_BuildValue("(ii)", line, column);
2815 if (!position) {
2816 Py_DECREF(error);
2817 return;
2818 }
2819 if (PyObject_SetAttrString(error, "position", position) == -1) {
2820 Py_DECREF(error);
2821 Py_DECREF(position);
2822 return;
2823 }
2824 Py_DECREF(position);
2825
2826 PyErr_SetObject(elementtree_parseerror_obj, error);
2827 Py_DECREF(error);
2828}
2829
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002830/* -------------------------------------------------------------------- */
2831/* handlers */
2832
2833static void
2834expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2835 int data_len)
2836{
2837 PyObject* key;
2838 PyObject* value;
2839 PyObject* res;
2840
2841 if (data_len < 2 || data_in[0] != '&')
2842 return;
2843
Neal Norwitz0269b912007-08-08 06:56:02 +00002844 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002845 if (!key)
2846 return;
2847
2848 value = PyDict_GetItem(self->entity, key);
2849
2850 if (value) {
2851 if (TreeBuilder_CheckExact(self->target))
2852 res = treebuilder_handle_data(
2853 (TreeBuilderObject*) self->target, value
2854 );
2855 else if (self->handle_data)
2856 res = PyObject_CallFunction(self->handle_data, "O", value);
2857 else
2858 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002859 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002860 } else if (!PyErr_Occurred()) {
2861 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00002862 char message[128] = "undefined entity ";
2863 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002864 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02002865 XML_ERROR_UNDEFINED_ENTITY,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002866 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02002867 EXPAT(GetErrorColumnNumber)(self->parser),
2868 message
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002869 );
2870 }
2871
2872 Py_DECREF(key);
2873}
2874
2875static void
2876expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2877 const XML_Char **attrib_in)
2878{
2879 PyObject* res;
2880 PyObject* tag;
2881 PyObject* attrib;
2882 int ok;
2883
2884 /* tag name */
2885 tag = makeuniversal(self, tag_in);
2886 if (!tag)
2887 return; /* parser will look for errors */
2888
2889 /* attributes */
2890 if (attrib_in[0]) {
2891 attrib = PyDict_New();
2892 if (!attrib)
2893 return;
2894 while (attrib_in[0] && attrib_in[1]) {
2895 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00002896 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002897 if (!key || !value) {
2898 Py_XDECREF(value);
2899 Py_XDECREF(key);
2900 Py_DECREF(attrib);
2901 return;
2902 }
2903 ok = PyDict_SetItem(attrib, key, value);
2904 Py_DECREF(value);
2905 Py_DECREF(key);
2906 if (ok < 0) {
2907 Py_DECREF(attrib);
2908 return;
2909 }
2910 attrib_in += 2;
2911 }
2912 } else {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002913 /* Pass an empty dictionary on */
Eli Bendersky48d358b2012-05-30 17:57:50 +03002914 attrib = PyDict_New();
2915 if (!attrib)
2916 return;
2917 }
2918
2919 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002920 /* shortcut */
2921 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2922 tag, attrib);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002923 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002924 else if (self->handle_start) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002925 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002926 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002927 res = NULL;
2928
2929 Py_DECREF(tag);
2930 Py_DECREF(attrib);
2931
2932 Py_XDECREF(res);
2933}
2934
2935static void
2936expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2937 int data_len)
2938{
2939 PyObject* data;
2940 PyObject* res;
2941
Neal Norwitz0269b912007-08-08 06:56:02 +00002942 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002943 if (!data)
2944 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002945
2946 if (TreeBuilder_CheckExact(self->target))
2947 /* shortcut */
2948 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
2949 else if (self->handle_data)
2950 res = PyObject_CallFunction(self->handle_data, "O", data);
2951 else
2952 res = NULL;
2953
2954 Py_DECREF(data);
2955
2956 Py_XDECREF(res);
2957}
2958
2959static void
2960expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
2961{
2962 PyObject* tag;
2963 PyObject* res = NULL;
2964
2965 if (TreeBuilder_CheckExact(self->target))
2966 /* shortcut */
2967 /* the standard tree builder doesn't look at the end tag */
2968 res = treebuilder_handle_end(
2969 (TreeBuilderObject*) self->target, Py_None
2970 );
2971 else if (self->handle_end) {
2972 tag = makeuniversal(self, tag_in);
2973 if (tag) {
2974 res = PyObject_CallFunction(self->handle_end, "O", tag);
2975 Py_DECREF(tag);
2976 }
2977 }
2978
2979 Py_XDECREF(res);
2980}
2981
2982static void
2983expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
2984 const XML_Char *uri)
2985{
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002986 PyObject* sprefix = NULL;
2987 PyObject* suri = NULL;
2988
2989 suri = PyUnicode_DecodeUTF8(uri, strlen(uri), "strict");
2990 if (!suri)
2991 return;
2992
2993 if (prefix)
2994 sprefix = PyUnicode_DecodeUTF8(prefix, strlen(prefix), "strict");
2995 else
2996 sprefix = PyUnicode_FromString("");
2997 if (!sprefix) {
2998 Py_DECREF(suri);
2999 return;
3000 }
3001
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003002 treebuilder_handle_namespace(
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003003 (TreeBuilderObject*) self->target, 1, sprefix, suri
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003004 );
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003005
3006 Py_DECREF(sprefix);
3007 Py_DECREF(suri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003008}
3009
3010static void
3011expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
3012{
3013 treebuilder_handle_namespace(
3014 (TreeBuilderObject*) self->target, 0, NULL, NULL
3015 );
3016}
3017
3018static void
3019expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
3020{
3021 PyObject* comment;
3022 PyObject* res;
3023
3024 if (self->handle_comment) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003025 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003026 if (comment) {
3027 res = PyObject_CallFunction(self->handle_comment, "O", comment);
3028 Py_XDECREF(res);
3029 Py_DECREF(comment);
3030 }
3031 }
3032}
3033
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003034static void
3035expat_start_doctype_handler(XMLParserObject *self,
3036 const XML_Char *doctype_name,
3037 const XML_Char *sysid,
3038 const XML_Char *pubid,
3039 int has_internal_subset)
3040{
3041 PyObject *self_pyobj = (PyObject *)self;
3042 PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
3043 PyObject *parser_doctype = NULL;
3044 PyObject *res = NULL;
3045
3046 doctype_name_obj = makeuniversal(self, doctype_name);
3047 if (!doctype_name_obj)
3048 return;
3049
3050 if (sysid) {
3051 sysid_obj = makeuniversal(self, sysid);
3052 if (!sysid_obj) {
3053 Py_DECREF(doctype_name_obj);
3054 return;
3055 }
3056 } else {
3057 Py_INCREF(Py_None);
3058 sysid_obj = Py_None;
3059 }
3060
3061 if (pubid) {
3062 pubid_obj = makeuniversal(self, pubid);
3063 if (!pubid_obj) {
3064 Py_DECREF(doctype_name_obj);
3065 Py_DECREF(sysid_obj);
3066 return;
3067 }
3068 } else {
3069 Py_INCREF(Py_None);
3070 pubid_obj = Py_None;
3071 }
3072
3073 /* If the target has a handler for doctype, call it. */
3074 if (self->handle_doctype) {
3075 res = PyObject_CallFunction(self->handle_doctype, "OOO",
3076 doctype_name_obj, pubid_obj, sysid_obj);
3077 Py_CLEAR(res);
3078 }
3079
3080 /* Now see if the parser itself has a doctype method. If yes and it's
3081 * a subclass, call it but warn about deprecation. If it's not a subclass
3082 * (i.e. vanilla XMLParser), do nothing.
3083 */
3084 parser_doctype = PyObject_GetAttrString(self_pyobj, "doctype");
3085 if (parser_doctype) {
3086 if (!XMLParser_CheckExact(self_pyobj)) {
3087 if (PyErr_WarnEx(PyExc_DeprecationWarning,
3088 "This method of XMLParser is deprecated. Define"
3089 " doctype() method on the TreeBuilder target.",
3090 1) < 0) {
3091 goto clear;
3092 }
3093 res = PyObject_CallFunction(parser_doctype, "OOO",
3094 doctype_name_obj, pubid_obj, sysid_obj);
3095 Py_CLEAR(res);
3096 }
3097 }
3098
3099clear:
3100 Py_XDECREF(parser_doctype);
3101 Py_DECREF(doctype_name_obj);
3102 Py_DECREF(pubid_obj);
3103 Py_DECREF(sysid_obj);
3104}
3105
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003106static void
3107expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
3108 const XML_Char* data_in)
3109{
3110 PyObject* target;
3111 PyObject* data;
3112 PyObject* res;
3113
3114 if (self->handle_pi) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003115 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3116 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003117 if (target && data) {
3118 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
3119 Py_XDECREF(res);
3120 Py_DECREF(data);
3121 Py_DECREF(target);
3122 } else {
3123 Py_XDECREF(data);
3124 Py_XDECREF(target);
3125 }
3126 }
3127}
3128
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003129static int
3130expat_unknown_encoding_handler(XMLParserObject *self, const XML_Char *name,
3131 XML_Encoding *info)
3132{
3133 PyObject* u;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003134 unsigned char s[256];
3135 int i;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003136 void *data;
3137 unsigned int kind;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003138
3139 memset(info, 0, sizeof(XML_Encoding));
3140
3141 for (i = 0; i < 256; i++)
3142 s[i] = i;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003143
Fredrik Lundhc3389992005-12-25 11:40:19 +00003144 u = PyUnicode_Decode((char*) s, 256, name, "replace");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003145 if (!u)
3146 return XML_STATUS_ERROR;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003147 if (PyUnicode_READY(u))
3148 return XML_STATUS_ERROR;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003149
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003150 if (PyUnicode_GET_LENGTH(u) != 256) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003151 Py_DECREF(u);
3152 return XML_STATUS_ERROR;
3153 }
3154
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003155 kind = PyUnicode_KIND(u);
3156 data = PyUnicode_DATA(u);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003157 for (i = 0; i < 256; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003158 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
3159 if (ch != Py_UNICODE_REPLACEMENT_CHARACTER)
3160 info->map[i] = ch;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003161 else
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003162 info->map[i] = -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003163 }
3164
3165 Py_DECREF(u);
3166
3167 return XML_STATUS_OK;
3168}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003169
3170/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003171
Eli Bendersky52467b12012-06-01 07:13:08 +03003172static PyObject *
3173xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003174{
Eli Bendersky52467b12012-06-01 07:13:08 +03003175 XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
3176 if (self) {
3177 self->parser = NULL;
3178 self->target = self->entity = self->names = NULL;
3179 self->handle_start = self->handle_data = self->handle_end = NULL;
3180 self->handle_comment = self->handle_pi = self->handle_close = NULL;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003181 self->handle_doctype = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003182 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003183 return (PyObject *)self;
3184}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003185
Eli Bendersky52467b12012-06-01 07:13:08 +03003186static int
3187xmlparser_init(PyObject *self, PyObject *args, PyObject *kwds)
3188{
3189 XMLParserObject *self_xp = (XMLParserObject *)self;
3190 PyObject *target = NULL, *html = NULL;
3191 char *encoding = NULL;
Eli Benderskyc68e1362012-06-03 06:09:42 +03003192 static char *kwlist[] = {"html", "target", "encoding", 0};
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003193
Eli Bendersky52467b12012-06-01 07:13:08 +03003194 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|OOz:XMLParser", kwlist,
3195 &html, &target, &encoding)) {
3196 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003197 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003198
Eli Bendersky52467b12012-06-01 07:13:08 +03003199 self_xp->entity = PyDict_New();
3200 if (!self_xp->entity)
3201 return -1;
3202
3203 self_xp->names = PyDict_New();
3204 if (!self_xp->names) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02003205 Py_CLEAR(self_xp->entity);
Eli Bendersky52467b12012-06-01 07:13:08 +03003206 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003207 }
3208
Eli Bendersky52467b12012-06-01 07:13:08 +03003209 self_xp->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
3210 if (!self_xp->parser) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02003211 Py_CLEAR(self_xp->entity);
3212 Py_CLEAR(self_xp->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003213 PyErr_NoMemory();
Eli Bendersky52467b12012-06-01 07:13:08 +03003214 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003215 }
3216
Eli Bendersky52467b12012-06-01 07:13:08 +03003217 if (target) {
3218 Py_INCREF(target);
3219 } else {
Eli Bendersky58d548d2012-05-29 15:45:16 +03003220 target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003221 if (!target) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02003222 Py_CLEAR(self_xp->entity);
3223 Py_CLEAR(self_xp->names);
Eli Bendersky52467b12012-06-01 07:13:08 +03003224 EXPAT(ParserFree)(self_xp->parser);
3225 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003226 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003227 }
3228 self_xp->target = target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003229
Eli Bendersky52467b12012-06-01 07:13:08 +03003230 self_xp->handle_start = PyObject_GetAttrString(target, "start");
3231 self_xp->handle_data = PyObject_GetAttrString(target, "data");
3232 self_xp->handle_end = PyObject_GetAttrString(target, "end");
3233 self_xp->handle_comment = PyObject_GetAttrString(target, "comment");
3234 self_xp->handle_pi = PyObject_GetAttrString(target, "pi");
3235 self_xp->handle_close = PyObject_GetAttrString(target, "close");
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003236 self_xp->handle_doctype = PyObject_GetAttrString(target, "doctype");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003237
3238 PyErr_Clear();
Eli Bendersky52467b12012-06-01 07:13:08 +03003239
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003240 /* configure parser */
Eli Bendersky52467b12012-06-01 07:13:08 +03003241 EXPAT(SetUserData)(self_xp->parser, self_xp);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003242 EXPAT(SetElementHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003243 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003244 (XML_StartElementHandler) expat_start_handler,
3245 (XML_EndElementHandler) expat_end_handler
3246 );
3247 EXPAT(SetDefaultHandlerExpand)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003248 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003249 (XML_DefaultHandler) expat_default_handler
3250 );
3251 EXPAT(SetCharacterDataHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003252 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003253 (XML_CharacterDataHandler) expat_data_handler
3254 );
Eli Bendersky52467b12012-06-01 07:13:08 +03003255 if (self_xp->handle_comment)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003256 EXPAT(SetCommentHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003257 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003258 (XML_CommentHandler) expat_comment_handler
3259 );
Eli Bendersky52467b12012-06-01 07:13:08 +03003260 if (self_xp->handle_pi)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003261 EXPAT(SetProcessingInstructionHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003262 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003263 (XML_ProcessingInstructionHandler) expat_pi_handler
3264 );
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003265 EXPAT(SetStartDoctypeDeclHandler)(
3266 self_xp->parser,
3267 (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
3268 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003269 EXPAT(SetUnknownEncodingHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003270 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003271 (XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL
3272 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003273
Eli Bendersky52467b12012-06-01 07:13:08 +03003274 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003275}
3276
Eli Bendersky52467b12012-06-01 07:13:08 +03003277static int
3278xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
3279{
3280 Py_VISIT(self->handle_close);
3281 Py_VISIT(self->handle_pi);
3282 Py_VISIT(self->handle_comment);
3283 Py_VISIT(self->handle_end);
3284 Py_VISIT(self->handle_data);
3285 Py_VISIT(self->handle_start);
3286
3287 Py_VISIT(self->target);
3288 Py_VISIT(self->entity);
3289 Py_VISIT(self->names);
3290
3291 return 0;
3292}
3293
3294static int
3295xmlparser_gc_clear(XMLParserObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003296{
3297 EXPAT(ParserFree)(self->parser);
3298
Antoine Pitrouc1948842012-10-01 23:40:37 +02003299 Py_CLEAR(self->handle_close);
3300 Py_CLEAR(self->handle_pi);
3301 Py_CLEAR(self->handle_comment);
3302 Py_CLEAR(self->handle_end);
3303 Py_CLEAR(self->handle_data);
3304 Py_CLEAR(self->handle_start);
3305 Py_CLEAR(self->handle_doctype);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003306
Antoine Pitrouc1948842012-10-01 23:40:37 +02003307 Py_CLEAR(self->target);
3308 Py_CLEAR(self->entity);
3309 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003310
Eli Bendersky52467b12012-06-01 07:13:08 +03003311 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003312}
3313
Eli Bendersky52467b12012-06-01 07:13:08 +03003314static void
3315xmlparser_dealloc(XMLParserObject* self)
3316{
3317 PyObject_GC_UnTrack(self);
3318 xmlparser_gc_clear(self);
3319 Py_TYPE(self)->tp_free((PyObject *)self);
3320}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003321
3322LOCAL(PyObject*)
3323expat_parse(XMLParserObject* self, char* data, int data_len, int final)
3324{
3325 int ok;
3326
3327 ok = EXPAT(Parse)(self->parser, data, data_len, final);
3328
3329 if (PyErr_Occurred())
3330 return NULL;
3331
3332 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003333 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02003334 EXPAT(GetErrorCode)(self->parser),
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003335 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02003336 EXPAT(GetErrorColumnNumber)(self->parser),
3337 NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003338 );
3339 return NULL;
3340 }
3341
3342 Py_RETURN_NONE;
3343}
3344
3345static PyObject*
3346xmlparser_close(XMLParserObject* self, PyObject* args)
3347{
3348 /* end feeding data to parser */
3349
3350 PyObject* res;
3351 if (!PyArg_ParseTuple(args, ":close"))
3352 return NULL;
3353
3354 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003355 if (!res)
3356 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003357
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003358 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003359 Py_DECREF(res);
3360 return treebuilder_done((TreeBuilderObject*) self->target);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003361 } if (self->handle_close) {
3362 Py_DECREF(res);
3363 return PyObject_CallFunction(self->handle_close, "");
3364 } else
3365 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003366}
3367
3368static PyObject*
3369xmlparser_feed(XMLParserObject* self, PyObject* args)
3370{
3371 /* feed data to parser */
3372
3373 char* data;
3374 int data_len;
3375 if (!PyArg_ParseTuple(args, "s#:feed", &data, &data_len))
3376 return NULL;
3377
3378 return expat_parse(self, data, data_len, 0);
3379}
3380
3381static PyObject*
3382xmlparser_parse(XMLParserObject* self, PyObject* args)
3383{
3384 /* (internal) parse until end of input stream */
3385
3386 PyObject* reader;
3387 PyObject* buffer;
Eli Benderskyf996e772012-03-16 05:53:30 +02003388 PyObject* temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003389 PyObject* res;
3390
3391 PyObject* fileobj;
3392 if (!PyArg_ParseTuple(args, "O:_parse", &fileobj))
3393 return NULL;
3394
3395 reader = PyObject_GetAttrString(fileobj, "read");
3396 if (!reader)
3397 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003398
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003399 /* read from open file object */
3400 for (;;) {
3401
3402 buffer = PyObject_CallFunction(reader, "i", 64*1024);
3403
3404 if (!buffer) {
3405 /* read failed (e.g. due to KeyboardInterrupt) */
3406 Py_DECREF(reader);
3407 return NULL;
3408 }
3409
Eli Benderskyf996e772012-03-16 05:53:30 +02003410 if (PyUnicode_CheckExact(buffer)) {
3411 /* A unicode object is encoded into bytes using UTF-8 */
3412 if (PyUnicode_GET_SIZE(buffer) == 0) {
3413 Py_DECREF(buffer);
3414 break;
3415 }
3416 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
Antoine Pitrouc1948842012-10-01 23:40:37 +02003417 Py_DECREF(buffer);
Eli Benderskyf996e772012-03-16 05:53:30 +02003418 if (!temp) {
3419 /* Propagate exception from PyUnicode_AsEncodedString */
Eli Benderskyf996e772012-03-16 05:53:30 +02003420 Py_DECREF(reader);
3421 return NULL;
3422 }
Eli Benderskyf996e772012-03-16 05:53:30 +02003423 buffer = temp;
3424 }
3425 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003426 Py_DECREF(buffer);
3427 break;
3428 }
3429
3430 res = expat_parse(
Christian Heimes72b710a2008-05-26 13:28:38 +00003431 self, PyBytes_AS_STRING(buffer), PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003432 );
3433
3434 Py_DECREF(buffer);
3435
3436 if (!res) {
3437 Py_DECREF(reader);
3438 return NULL;
3439 }
3440 Py_DECREF(res);
3441
3442 }
3443
3444 Py_DECREF(reader);
3445
3446 res = expat_parse(self, "", 0, 1);
3447
3448 if (res && TreeBuilder_CheckExact(self->target)) {
3449 Py_DECREF(res);
3450 return treebuilder_done((TreeBuilderObject*) self->target);
3451 }
3452
3453 return res;
3454}
3455
3456static PyObject*
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003457xmlparser_doctype(XMLParserObject *self, PyObject *args)
3458{
3459 Py_RETURN_NONE;
3460}
3461
3462static PyObject*
3463xmlparser_setevents(XMLParserObject *self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003464{
3465 /* activate element event reporting */
3466
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003467 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003468 TreeBuilderObject* target;
3469
3470 PyObject* events; /* event collector */
3471 PyObject* event_set = Py_None;
3472 if (!PyArg_ParseTuple(args, "O!|O:_setevents", &PyList_Type, &events,
3473 &event_set))
3474 return NULL;
3475
3476 if (!TreeBuilder_CheckExact(self->target)) {
3477 PyErr_SetString(
3478 PyExc_TypeError,
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003479 "event handling only supported for ElementTree.TreeBuilder "
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003480 "targets"
3481 );
3482 return NULL;
3483 }
3484
3485 target = (TreeBuilderObject*) self->target;
3486
3487 Py_INCREF(events);
3488 Py_XDECREF(target->events);
3489 target->events = events;
3490
3491 /* clear out existing events */
Antoine Pitrouc1948842012-10-01 23:40:37 +02003492 Py_CLEAR(target->start_event_obj);
3493 Py_CLEAR(target->end_event_obj);
3494 Py_CLEAR(target->start_ns_event_obj);
3495 Py_CLEAR(target->end_ns_event_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003496
3497 if (event_set == Py_None) {
3498 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003499 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003500 Py_RETURN_NONE;
3501 }
3502
3503 if (!PyTuple_Check(event_set)) /* FIXME: handle arbitrary sequences */
3504 goto error;
3505
3506 for (i = 0; i < PyTuple_GET_SIZE(event_set); i++) {
3507 PyObject* item = PyTuple_GET_ITEM(event_set, i);
3508 char* event;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003509 if (PyUnicode_Check(item)) {
3510 event = _PyUnicode_AsString(item);
Victor Stinner0477bf32010-03-22 12:11:44 +00003511 if (event == NULL)
3512 goto error;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003513 } else if (PyBytes_Check(item))
3514 event = PyBytes_AS_STRING(item);
3515 else {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003516 goto error;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003517 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003518 if (strcmp(event, "start") == 0) {
3519 Py_INCREF(item);
3520 target->start_event_obj = item;
3521 } else if (strcmp(event, "end") == 0) {
3522 Py_INCREF(item);
3523 Py_XDECREF(target->end_event_obj);
3524 target->end_event_obj = item;
3525 } else if (strcmp(event, "start-ns") == 0) {
3526 Py_INCREF(item);
3527 Py_XDECREF(target->start_ns_event_obj);
3528 target->start_ns_event_obj = item;
3529 EXPAT(SetNamespaceDeclHandler)(
3530 self->parser,
3531 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3532 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3533 );
3534 } else if (strcmp(event, "end-ns") == 0) {
3535 Py_INCREF(item);
3536 Py_XDECREF(target->end_ns_event_obj);
3537 target->end_ns_event_obj = item;
3538 EXPAT(SetNamespaceDeclHandler)(
3539 self->parser,
3540 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3541 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3542 );
3543 } else {
3544 PyErr_Format(
3545 PyExc_ValueError,
3546 "unknown event '%s'", event
3547 );
3548 return NULL;
3549 }
3550 }
3551
3552 Py_RETURN_NONE;
3553
3554 error:
3555 PyErr_SetString(
3556 PyExc_TypeError,
3557 "invalid event tuple"
3558 );
3559 return NULL;
3560}
3561
3562static PyMethodDef xmlparser_methods[] = {
3563 {"feed", (PyCFunction) xmlparser_feed, METH_VARARGS},
3564 {"close", (PyCFunction) xmlparser_close, METH_VARARGS},
3565 {"_parse", (PyCFunction) xmlparser_parse, METH_VARARGS},
3566 {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003567 {"doctype", (PyCFunction) xmlparser_doctype, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003568 {NULL, NULL}
3569};
3570
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003571static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003572xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003573{
Alexander Belopolskye239d232010-12-08 23:31:48 +00003574 if (PyUnicode_Check(nameobj)) {
3575 PyObject* res;
3576 if (PyUnicode_CompareWithASCIIString(nameobj, "entity") == 0)
3577 res = self->entity;
3578 else if (PyUnicode_CompareWithASCIIString(nameobj, "target") == 0)
3579 res = self->target;
3580 else if (PyUnicode_CompareWithASCIIString(nameobj, "version") == 0) {
3581 return PyUnicode_FromFormat(
3582 "Expat %d.%d.%d", XML_MAJOR_VERSION,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003583 XML_MINOR_VERSION, XML_MICRO_VERSION);
Alexander Belopolskye239d232010-12-08 23:31:48 +00003584 }
3585 else
3586 goto generic;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003587
Alexander Belopolskye239d232010-12-08 23:31:48 +00003588 Py_INCREF(res);
3589 return res;
3590 }
3591 generic:
3592 return PyObject_GenericGetAttr((PyObject*) self, nameobj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003593}
3594
Neal Norwitz227b5332006-03-22 09:28:35 +00003595static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003596 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08003597 "xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003598 /* methods */
Eli Bendersky52467b12012-06-01 07:13:08 +03003599 (destructor)xmlparser_dealloc, /* tp_dealloc */
3600 0, /* tp_print */
3601 0, /* tp_getattr */
3602 0, /* tp_setattr */
3603 0, /* tp_reserved */
3604 0, /* tp_repr */
3605 0, /* tp_as_number */
3606 0, /* tp_as_sequence */
3607 0, /* tp_as_mapping */
3608 0, /* tp_hash */
3609 0, /* tp_call */
3610 0, /* tp_str */
3611 (getattrofunc)xmlparser_getattro, /* tp_getattro */
3612 0, /* tp_setattro */
3613 0, /* tp_as_buffer */
3614 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3615 /* tp_flags */
3616 0, /* tp_doc */
3617 (traverseproc)xmlparser_gc_traverse, /* tp_traverse */
3618 (inquiry)xmlparser_gc_clear, /* tp_clear */
3619 0, /* tp_richcompare */
3620 0, /* tp_weaklistoffset */
3621 0, /* tp_iter */
3622 0, /* tp_iternext */
3623 xmlparser_methods, /* tp_methods */
3624 0, /* tp_members */
3625 0, /* tp_getset */
3626 0, /* tp_base */
3627 0, /* tp_dict */
3628 0, /* tp_descr_get */
3629 0, /* tp_descr_set */
3630 0, /* tp_dictoffset */
3631 (initproc)xmlparser_init, /* tp_init */
3632 PyType_GenericAlloc, /* tp_alloc */
3633 xmlparser_new, /* tp_new */
3634 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003635};
3636
3637#endif
3638
3639/* ==================================================================== */
3640/* python module interface */
3641
3642static PyMethodDef _functions[] = {
Eli Benderskya8736902013-01-05 06:26:39 -08003643 {"SubElement", (PyCFunction) subelement, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003644 {NULL, NULL}
3645};
3646
Martin v. Löwis1a214512008-06-11 05:26:20 +00003647
3648static struct PyModuleDef _elementtreemodule = {
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003649 PyModuleDef_HEAD_INIT,
3650 "_elementtree",
3651 NULL,
3652 -1,
3653 _functions,
3654 NULL,
3655 NULL,
3656 NULL,
3657 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00003658};
3659
Neal Norwitzf6657e62006-12-28 04:47:50 +00003660PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00003661PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003662{
Eli Bendersky64d11e62012-06-15 07:42:50 +03003663 PyObject *m, *temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003664
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003665 /* Initialize object types */
3666 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003667 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003668 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003669 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003670#if defined(USE_EXPAT)
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003671 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003672 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003673#endif
3674
Martin v. Löwis1a214512008-06-11 05:26:20 +00003675 m = PyModule_Create(&_elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003676 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00003677 return NULL;
3678
Eli Bendersky828efde2012-04-05 05:40:58 +03003679 if (!(temp = PyImport_ImportModule("copy")))
3680 return NULL;
3681 elementtree_deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
3682 Py_XDECREF(temp);
3683
3684 if (!(elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
3685 return NULL;
3686
Eli Bendersky20d41742012-06-01 09:48:37 +03003687 /* link against pyexpat */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003688 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
3689 if (expat_capi) {
3690 /* check that it's usable */
3691 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
3692 expat_capi->size < sizeof(struct PyExpat_CAPI) ||
3693 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
3694 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
Eli Bendersky52467b12012-06-01 07:13:08 +03003695 expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
Eli Benderskyef391ac2012-07-21 20:28:46 +03003696 PyErr_SetString(PyExc_ImportError,
3697 "pyexpat version is incompatible");
3698 return NULL;
Eli Bendersky52467b12012-06-01 07:13:08 +03003699 }
Eli Benderskyef391ac2012-07-21 20:28:46 +03003700 } else {
Eli Bendersky52467b12012-06-01 07:13:08 +03003701 return NULL;
Eli Benderskyef391ac2012-07-21 20:28:46 +03003702 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003703
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003704 elementtree_parseerror_obj = PyErr_NewException(
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003705 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003706 );
3707 Py_INCREF(elementtree_parseerror_obj);
3708 PyModule_AddObject(m, "ParseError", elementtree_parseerror_obj);
3709
Eli Bendersky092af1f2012-03-04 07:14:03 +02003710 Py_INCREF((PyObject *)&Element_Type);
3711 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
3712
Eli Bendersky58d548d2012-05-29 15:45:16 +03003713 Py_INCREF((PyObject *)&TreeBuilder_Type);
3714 PyModule_AddObject(m, "TreeBuilder", (PyObject *)&TreeBuilder_Type);
3715
Eli Bendersky52467b12012-06-01 07:13:08 +03003716#if defined(USE_EXPAT)
3717 Py_INCREF((PyObject *)&XMLParser_Type);
3718 PyModule_AddObject(m, "XMLParser", (PyObject *)&XMLParser_Type);
3719#endif
3720
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003721 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003722}