blob: 2129fc2d1eb4368cf0c2a3f3c570492dadfeb73b [file] [log] [blame]
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001/*
2 * ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003 * $Id: _elementtree.c 3473 2009-01-11 22:53:55Z fredrik $
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
5 * elementtree accelerator
6 *
7 * History:
8 * 1999-06-20 fl created (as part of sgmlop)
9 * 2001-05-29 fl effdom edition
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000010 * 2003-02-27 fl elementtree edition (alpha)
11 * 2004-06-03 fl updates for elementtree 1.2
Florent Xiclunaf15351d2010-03-13 23:24:31 +000012 * 2005-01-05 fl major optimization effort
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000013 * 2005-01-11 fl first public release (cElementTree 0.8)
14 * 2005-01-12 fl split element object into base and extras
15 * 2005-01-13 fl use tagged pointers for tail/text (cElementTree 0.9)
16 * 2005-01-17 fl added treebuilder close method
17 * 2005-01-17 fl fixed crash in getchildren
18 * 2005-01-18 fl removed observer api, added iterparse (cElementTree 0.9.3)
19 * 2005-01-23 fl revised iterparse api; added namespace event support (0.9.8)
20 * 2005-01-26 fl added VERSION module property (cElementTree 1.0)
21 * 2005-01-28 fl added remove method (1.0.1)
22 * 2005-03-01 fl added iselement function; fixed makeelement aliasing (1.0.2)
23 * 2005-03-13 fl export Comment and ProcessingInstruction/PI helpers
24 * 2005-03-26 fl added Comment and PI support to XMLParser
25 * 2005-03-27 fl event optimizations; complain about bogus events
26 * 2005-08-08 fl fixed read error handling in parse
27 * 2005-08-11 fl added runtime test for copy workaround (1.0.3)
28 * 2005-12-13 fl added expat_capi support (for xml.etree) (1.0.4)
29 * 2005-12-16 fl added support for non-standard encodings
Fredrik Lundh44ed4db2006-03-12 21:06:35 +000030 * 2006-03-08 fl fixed a couple of potential null-refs and leaks
31 * 2006-03-12 fl merge in 2.5 ssize_t changes
Florent Xiclunaf15351d2010-03-13 23:24:31 +000032 * 2007-08-25 fl call custom builder's close method from XMLParser
33 * 2007-08-31 fl added iter, extend from ET 1.3
34 * 2007-09-01 fl fixed ParseError exception, setslice source type, etc
35 * 2007-09-03 fl fixed handling of negative insert indexes
36 * 2007-09-04 fl added itertext from ET 1.3
37 * 2007-09-06 fl added position attribute to ParseError exception
38 * 2008-06-06 fl delay error reporting in iterparse (from Hrvoje Niksic)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000039 *
Florent Xiclunaf15351d2010-03-13 23:24:31 +000040 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
41 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000042 *
43 * info@pythonware.com
44 * http://www.pythonware.com
45 */
46
Fredrik Lundh6d52b552005-12-16 22:06:43 +000047/* Licensed to PSF under a Contributor Agreement. */
Florent Xiclunaf15351d2010-03-13 23:24:31 +000048/* See http://www.python.org/psf/license for licensing details. */
Fredrik Lundh6d52b552005-12-16 22:06:43 +000049
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000050#include "Python.h"
Eli Benderskyebf37a22012-04-03 22:02:37 +030051#include "structmember.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000052
Thomas Wouters00ee7ba2006-08-21 19:07:27 +000053#define VERSION "1.0.6"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000054
55/* -------------------------------------------------------------------- */
56/* configuration */
57
58/* Leave defined to include the expat-based XMLParser type */
59#define USE_EXPAT
60
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000061/* An element can hold this many children without extra memory
62 allocations. */
63#define STATIC_CHILDREN 4
64
65/* For best performance, chose a value so that 80-90% of all nodes
66 have no more than the given number of children. Set this to zero
67 to minimize the size of the element structure itself (this only
68 helps if you have lots of leaf nodes with attributes). */
69
70/* Also note that pymalloc always allocates blocks in multiples of
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010071 eight bytes. For the current C version of ElementTree, this means
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000072 that the number of children should be an even number, at least on
73 32-bit platforms. */
74
75/* -------------------------------------------------------------------- */
76
77#if 0
78static int memory = 0;
79#define ALLOC(size, comment)\
80do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
81#define RELEASE(size, comment)\
82do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
83#else
84#define ALLOC(size, comment)
85#define RELEASE(size, comment)
86#endif
87
88/* compiler tweaks */
89#if defined(_MSC_VER)
90#define LOCAL(type) static __inline type __fastcall
91#else
92#define LOCAL(type) static type
93#endif
94
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000095/* macros used to store 'join' flags in string object pointers. note
96 that all use of text and tail as object pointers must be wrapped in
97 JOIN_OBJ. see comments in the ElementObject definition for more
98 info. */
99#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
100#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
Antoine Pitrouca8aa4a2012-09-20 20:56:47 +0200101#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~(Py_uintptr_t)1))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000102
103/* glue functions (see the init function for details) */
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000104static PyObject* elementtree_parseerror_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000105static PyObject* elementtree_deepcopy_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000106static PyObject* elementpath_obj;
107
108/* helpers */
109
110LOCAL(PyObject*)
111deepcopy(PyObject* object, PyObject* memo)
112{
113 /* do a deep copy of the given object */
114
115 PyObject* args;
116 PyObject* result;
117
118 if (!elementtree_deepcopy_obj) {
119 PyErr_SetString(
120 PyExc_RuntimeError,
121 "deepcopy helper not found"
122 );
123 return NULL;
124 }
125
Antoine Pitrouc1948842012-10-01 23:40:37 +0200126 args = PyTuple_Pack(2, object, memo);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000127 if (!args)
128 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000129 result = PyObject_CallObject(elementtree_deepcopy_obj, args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000130 Py_DECREF(args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000131 return result;
132}
133
134LOCAL(PyObject*)
135list_join(PyObject* list)
136{
137 /* join list elements (destroying the list in the process) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000138 PyObject* joiner;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000139 PyObject* result;
140
Antoine Pitrouc1948842012-10-01 23:40:37 +0200141 joiner = PyUnicode_FromStringAndSize("", 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000142 if (!joiner)
143 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200144 result = PyUnicode_Join(joiner, list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000145 Py_DECREF(joiner);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200146 if (result)
147 Py_DECREF(list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000148 return result;
149}
150
Eli Bendersky48d358b2012-05-30 17:57:50 +0300151/* Is the given object an empty dictionary?
152*/
153static int
154is_empty_dict(PyObject *obj)
155{
156 return PyDict_CheckExact(obj) && PyDict_Size(obj) == 0;
157}
158
159
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000160/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200161/* the Element type */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000162
163typedef struct {
164
165 /* attributes (a dictionary object), or None if no attributes */
166 PyObject* attrib;
167
168 /* child elements */
169 int length; /* actual number of items */
170 int allocated; /* allocated items */
171
172 /* this either points to _children or to a malloced buffer */
173 PyObject* *children;
174
175 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100176
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000177} ElementObjectExtra;
178
179typedef struct {
180 PyObject_HEAD
181
182 /* element tag (a string). */
183 PyObject* tag;
184
185 /* text before first child. note that this is a tagged pointer;
186 use JOIN_OBJ to get the object pointer. the join flag is used
187 to distinguish lists created by the tree builder from lists
188 assigned to the attribute by application code; the former
189 should be joined before being returned to the user, the latter
190 should be left intact. */
191 PyObject* text;
192
193 /* text after this element, in parent. note that this is a tagged
194 pointer; use JOIN_OBJ to get the object pointer. */
195 PyObject* tail;
196
197 ElementObjectExtra* extra;
198
Eli Benderskyebf37a22012-04-03 22:02:37 +0300199 PyObject *weakreflist; /* For tp_weaklistoffset */
200
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000201} ElementObject;
202
Neal Norwitz227b5332006-03-22 09:28:35 +0000203static PyTypeObject Element_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000204
Christian Heimes90aa7642007-12-19 02:45:37 +0000205#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000206
207/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200208/* Element constructors and destructor */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000209
210LOCAL(int)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200211create_extra(ElementObject* self, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000212{
213 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
214 if (!self->extra)
215 return -1;
216
217 if (!attrib)
218 attrib = Py_None;
219
220 Py_INCREF(attrib);
221 self->extra->attrib = attrib;
222
223 self->extra->length = 0;
224 self->extra->allocated = STATIC_CHILDREN;
225 self->extra->children = self->extra->_children;
226
227 return 0;
228}
229
230LOCAL(void)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200231dealloc_extra(ElementObject* self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000232{
Eli Bendersky08b85292012-04-04 15:55:07 +0300233 ElementObjectExtra *myextra;
234 int i;
235
Eli Benderskyebf37a22012-04-03 22:02:37 +0300236 if (!self->extra)
237 return;
238
239 /* Avoid DECREFs calling into this code again (cycles, etc.)
240 */
Eli Bendersky08b85292012-04-04 15:55:07 +0300241 myextra = self->extra;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300242 self->extra = NULL;
243
244 Py_DECREF(myextra->attrib);
245
Eli Benderskyebf37a22012-04-03 22:02:37 +0300246 for (i = 0; i < myextra->length; i++)
247 Py_DECREF(myextra->children[i]);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000248
Eli Benderskyebf37a22012-04-03 22:02:37 +0300249 if (myextra->children != myextra->_children)
250 PyObject_Free(myextra->children);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000251
Eli Benderskyebf37a22012-04-03 22:02:37 +0300252 PyObject_Free(myextra);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000253}
254
Eli Bendersky092af1f2012-03-04 07:14:03 +0200255/* Convenience internal function to create new Element objects with the given
256 * tag and attributes.
257*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000258LOCAL(PyObject*)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200259create_new_element(PyObject* tag, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000260{
261 ElementObject* self;
262
Eli Bendersky0192ba32012-03-30 16:38:33 +0300263 self = PyObject_GC_New(ElementObject, &Element_Type);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000264 if (self == NULL)
265 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000266 self->extra = NULL;
267
Eli Bendersky48d358b2012-05-30 17:57:50 +0300268 if (attrib != Py_None && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200269 if (create_extra(self, attrib) < 0) {
Thomas Wouters477c8d52006-05-27 19:21:47 +0000270 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000271 return NULL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000272 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000273 }
274
275 Py_INCREF(tag);
276 self->tag = tag;
277
278 Py_INCREF(Py_None);
279 self->text = Py_None;
280
281 Py_INCREF(Py_None);
282 self->tail = Py_None;
283
Eli Benderskyebf37a22012-04-03 22:02:37 +0300284 self->weakreflist = NULL;
285
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000286 ALLOC(sizeof(ElementObject), "create element");
Eli Bendersky0192ba32012-03-30 16:38:33 +0300287 PyObject_GC_Track(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000288 return (PyObject*) self;
289}
290
Eli Bendersky092af1f2012-03-04 07:14:03 +0200291static PyObject *
292element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
293{
294 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
295 if (e != NULL) {
296 Py_INCREF(Py_None);
297 e->tag = Py_None;
298
299 Py_INCREF(Py_None);
300 e->text = Py_None;
301
302 Py_INCREF(Py_None);
303 e->tail = Py_None;
304
305 e->extra = NULL;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300306 e->weakreflist = NULL;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200307 }
308 return (PyObject *)e;
309}
310
Eli Bendersky737b1732012-05-29 06:02:56 +0300311/* Helper function for extracting the attrib dictionary from a keywords dict.
312 * This is required by some constructors/functions in this module that can
313 * either accept attrib as a keyword argument or all attributes splashed
314 * directly into *kwds.
315 * If there is no 'attrib' keyword, return an empty dict.
316 */
317static PyObject*
318get_attrib_from_keywords(PyObject *kwds)
319{
320 PyObject *attrib_str = PyUnicode_FromString("attrib");
321 PyObject *attrib = PyDict_GetItem(kwds, attrib_str);
322
323 if (attrib) {
324 /* If attrib was found in kwds, copy its value and remove it from
325 * kwds
326 */
327 if (!PyDict_Check(attrib)) {
328 Py_DECREF(attrib_str);
329 PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
330 Py_TYPE(attrib)->tp_name);
331 return NULL;
332 }
333 attrib = PyDict_Copy(attrib);
334 PyDict_DelItem(kwds, attrib_str);
335 } else {
336 attrib = PyDict_New();
337 }
338
339 Py_DECREF(attrib_str);
340
341 if (attrib)
342 PyDict_Update(attrib, kwds);
343 return attrib;
344}
345
Eli Bendersky092af1f2012-03-04 07:14:03 +0200346static int
347element_init(PyObject *self, PyObject *args, PyObject *kwds)
348{
349 PyObject *tag;
350 PyObject *tmp;
351 PyObject *attrib = NULL;
352 ElementObject *self_elem;
353
354 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
355 return -1;
356
Eli Bendersky737b1732012-05-29 06:02:56 +0300357 if (attrib) {
358 /* attrib passed as positional arg */
359 attrib = PyDict_Copy(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200360 if (!attrib)
361 return -1;
Eli Bendersky737b1732012-05-29 06:02:56 +0300362 if (kwds) {
363 if (PyDict_Update(attrib, kwds) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200364 Py_DECREF(attrib);
Eli Bendersky737b1732012-05-29 06:02:56 +0300365 return -1;
366 }
367 }
368 } else if (kwds) {
369 /* have keywords args */
370 attrib = get_attrib_from_keywords(kwds);
371 if (!attrib)
372 return -1;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200373 }
374
375 self_elem = (ElementObject *)self;
376
Antoine Pitrouc1948842012-10-01 23:40:37 +0200377 if (attrib != NULL && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200378 if (create_extra(self_elem, attrib) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200379 Py_DECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200380 return -1;
381 }
382 }
383
Eli Bendersky48d358b2012-05-30 17:57:50 +0300384 /* We own a reference to attrib here and it's no longer needed. */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200385 Py_XDECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200386
387 /* Replace the objects already pointed to by tag, text and tail. */
388 tmp = self_elem->tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200389 Py_INCREF(tag);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200390 self_elem->tag = tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200391 Py_DECREF(tmp);
392
393 tmp = self_elem->text;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200394 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200395 self_elem->text = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200396 Py_DECREF(JOIN_OBJ(tmp));
397
398 tmp = self_elem->tail;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200399 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200400 self_elem->tail = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200401 Py_DECREF(JOIN_OBJ(tmp));
402
403 return 0;
404}
405
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000406LOCAL(int)
407element_resize(ElementObject* self, int extra)
408{
409 int size;
410 PyObject* *children;
411
412 /* make sure self->children can hold the given number of extra
413 elements. set an exception and return -1 if allocation failed */
414
415 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200416 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000417
418 size = self->extra->length + extra;
419
420 if (size > self->extra->allocated) {
421 /* use Python 2.4's list growth strategy */
422 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000423 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100424 * which needs at least 4 bytes.
425 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000426 * be safe.
427 */
428 size = size ? size : 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000429 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000430 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100431 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000432 * false alarm always assume at least one child to be safe.
433 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000434 children = PyObject_Realloc(self->extra->children,
435 size * sizeof(PyObject*));
436 if (!children)
437 goto nomemory;
438 } else {
439 children = PyObject_Malloc(size * sizeof(PyObject*));
440 if (!children)
441 goto nomemory;
442 /* copy existing children from static area to malloc buffer */
443 memcpy(children, self->extra->children,
444 self->extra->length * sizeof(PyObject*));
445 }
446 self->extra->children = children;
447 self->extra->allocated = size;
448 }
449
450 return 0;
451
452 nomemory:
453 PyErr_NoMemory();
454 return -1;
455}
456
457LOCAL(int)
458element_add_subelement(ElementObject* self, PyObject* element)
459{
460 /* add a child element to a parent */
461
462 if (element_resize(self, 1) < 0)
463 return -1;
464
465 Py_INCREF(element);
466 self->extra->children[self->extra->length] = element;
467
468 self->extra->length++;
469
470 return 0;
471}
472
473LOCAL(PyObject*)
474element_get_attrib(ElementObject* self)
475{
476 /* return borrowed reference to attrib dictionary */
477 /* note: this function assumes that the extra section exists */
478
479 PyObject* res = self->extra->attrib;
480
481 if (res == Py_None) {
482 /* create missing dictionary */
483 res = PyDict_New();
484 if (!res)
485 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200486 Py_DECREF(Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000487 self->extra->attrib = res;
488 }
489
490 return res;
491}
492
493LOCAL(PyObject*)
494element_get_text(ElementObject* self)
495{
496 /* return borrowed reference to text attribute */
497
498 PyObject* res = self->text;
499
500 if (JOIN_GET(res)) {
501 res = JOIN_OBJ(res);
502 if (PyList_CheckExact(res)) {
503 res = list_join(res);
504 if (!res)
505 return NULL;
506 self->text = res;
507 }
508 }
509
510 return res;
511}
512
513LOCAL(PyObject*)
514element_get_tail(ElementObject* self)
515{
516 /* return borrowed reference to text attribute */
517
518 PyObject* res = self->tail;
519
520 if (JOIN_GET(res)) {
521 res = JOIN_OBJ(res);
522 if (PyList_CheckExact(res)) {
523 res = list_join(res);
524 if (!res)
525 return NULL;
526 self->tail = res;
527 }
528 }
529
530 return res;
531}
532
533static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300534subelement(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000535{
536 PyObject* elem;
537
538 ElementObject* parent;
539 PyObject* tag;
540 PyObject* attrib = NULL;
541 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
542 &Element_Type, &parent, &tag,
543 &PyDict_Type, &attrib))
544 return NULL;
545
Eli Bendersky737b1732012-05-29 06:02:56 +0300546 if (attrib) {
547 /* attrib passed as positional arg */
548 attrib = PyDict_Copy(attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000549 if (!attrib)
550 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300551 if (kwds) {
552 if (PyDict_Update(attrib, kwds) < 0) {
553 return NULL;
554 }
555 }
556 } else if (kwds) {
557 /* have keyword args */
558 attrib = get_attrib_from_keywords(kwds);
559 if (!attrib)
560 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000561 } else {
Eli Bendersky737b1732012-05-29 06:02:56 +0300562 /* no attrib arg, no kwds, so no attribute */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000563 Py_INCREF(Py_None);
564 attrib = Py_None;
565 }
566
Eli Bendersky092af1f2012-03-04 07:14:03 +0200567 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000568
569 Py_DECREF(attrib);
570
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000571 if (element_add_subelement(parent, elem) < 0) {
572 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000573 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000574 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000575
576 return elem;
577}
578
Eli Bendersky0192ba32012-03-30 16:38:33 +0300579static int
580element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
581{
582 Py_VISIT(self->tag);
583 Py_VISIT(JOIN_OBJ(self->text));
584 Py_VISIT(JOIN_OBJ(self->tail));
585
586 if (self->extra) {
587 int i;
588 Py_VISIT(self->extra->attrib);
589
590 for (i = 0; i < self->extra->length; ++i)
591 Py_VISIT(self->extra->children[i]);
592 }
593 return 0;
594}
595
596static int
597element_gc_clear(ElementObject *self)
598{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300599 Py_CLEAR(self->tag);
Eli Benderskyebf37a22012-04-03 22:02:37 +0300600
601 /* The following is like Py_CLEAR for self->text and self->tail, but
602 * written explicitily because the real pointers hide behind access
603 * macros.
604 */
605 if (self->text) {
606 PyObject *tmp = JOIN_OBJ(self->text);
607 self->text = NULL;
608 Py_DECREF(tmp);
609 }
610
611 if (self->tail) {
612 PyObject *tmp = JOIN_OBJ(self->tail);
613 self->tail = NULL;
614 Py_DECREF(tmp);
615 }
Eli Bendersky0192ba32012-03-30 16:38:33 +0300616
617 /* After dropping all references from extra, it's no longer valid anyway,
Eli Benderskyebf37a22012-04-03 22:02:37 +0300618 * so fully deallocate it.
Eli Bendersky0192ba32012-03-30 16:38:33 +0300619 */
Eli Benderskyebf37a22012-04-03 22:02:37 +0300620 dealloc_extra(self);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300621 return 0;
622}
623
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000624static void
625element_dealloc(ElementObject* self)
626{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300627 PyObject_GC_UnTrack(self);
Eli Benderskyebf37a22012-04-03 22:02:37 +0300628
629 if (self->weakreflist != NULL)
630 PyObject_ClearWeakRefs((PyObject *) self);
631
Eli Bendersky0192ba32012-03-30 16:38:33 +0300632 /* element_gc_clear clears all references and deallocates extra
633 */
634 element_gc_clear(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000635
636 RELEASE(sizeof(ElementObject), "destroy element");
Eli Bendersky092af1f2012-03-04 07:14:03 +0200637 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000638}
639
640/* -------------------------------------------------------------------- */
641/* methods (in alphabetical order) */
642
643static PyObject*
644element_append(ElementObject* self, PyObject* args)
645{
646 PyObject* element;
647 if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element))
648 return NULL;
649
650 if (element_add_subelement(self, element) < 0)
651 return NULL;
652
653 Py_RETURN_NONE;
654}
655
656static PyObject*
Eli Bendersky0192ba32012-03-30 16:38:33 +0300657element_clearmethod(ElementObject* self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000658{
659 if (!PyArg_ParseTuple(args, ":clear"))
660 return NULL;
661
Eli Benderskyebf37a22012-04-03 22:02:37 +0300662 dealloc_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000663
664 Py_INCREF(Py_None);
665 Py_DECREF(JOIN_OBJ(self->text));
666 self->text = Py_None;
667
668 Py_INCREF(Py_None);
669 Py_DECREF(JOIN_OBJ(self->tail));
670 self->tail = Py_None;
671
672 Py_RETURN_NONE;
673}
674
675static PyObject*
676element_copy(ElementObject* self, PyObject* args)
677{
678 int i;
679 ElementObject* element;
680
681 if (!PyArg_ParseTuple(args, ":__copy__"))
682 return NULL;
683
Eli Bendersky092af1f2012-03-04 07:14:03 +0200684 element = (ElementObject*) create_new_element(
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000685 self->tag, (self->extra) ? self->extra->attrib : Py_None
686 );
687 if (!element)
688 return NULL;
689
690 Py_DECREF(JOIN_OBJ(element->text));
691 element->text = self->text;
692 Py_INCREF(JOIN_OBJ(element->text));
693
694 Py_DECREF(JOIN_OBJ(element->tail));
695 element->tail = self->tail;
696 Py_INCREF(JOIN_OBJ(element->tail));
697
698 if (self->extra) {
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100699
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000700 if (element_resize(element, self->extra->length) < 0) {
701 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000702 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000703 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000704
705 for (i = 0; i < self->extra->length; i++) {
706 Py_INCREF(self->extra->children[i]);
707 element->extra->children[i] = self->extra->children[i];
708 }
709
710 element->extra->length = self->extra->length;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100711
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000712 }
713
714 return (PyObject*) element;
715}
716
717static PyObject*
718element_deepcopy(ElementObject* self, PyObject* args)
719{
720 int i;
721 ElementObject* element;
722 PyObject* tag;
723 PyObject* attrib;
724 PyObject* text;
725 PyObject* tail;
726 PyObject* id;
727
728 PyObject* memo;
729 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
730 return NULL;
731
732 tag = deepcopy(self->tag, memo);
733 if (!tag)
734 return NULL;
735
736 if (self->extra) {
737 attrib = deepcopy(self->extra->attrib, memo);
738 if (!attrib) {
739 Py_DECREF(tag);
740 return NULL;
741 }
742 } else {
743 Py_INCREF(Py_None);
744 attrib = Py_None;
745 }
746
Eli Bendersky092af1f2012-03-04 07:14:03 +0200747 element = (ElementObject*) create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000748
749 Py_DECREF(tag);
750 Py_DECREF(attrib);
751
752 if (!element)
753 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100754
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000755 text = deepcopy(JOIN_OBJ(self->text), memo);
756 if (!text)
757 goto error;
758 Py_DECREF(element->text);
759 element->text = JOIN_SET(text, JOIN_GET(self->text));
760
761 tail = deepcopy(JOIN_OBJ(self->tail), memo);
762 if (!tail)
763 goto error;
764 Py_DECREF(element->tail);
765 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
766
767 if (self->extra) {
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100768
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000769 if (element_resize(element, self->extra->length) < 0)
770 goto error;
771
772 for (i = 0; i < self->extra->length; i++) {
773 PyObject* child = deepcopy(self->extra->children[i], memo);
774 if (!child) {
775 element->extra->length = i;
776 goto error;
777 }
778 element->extra->children[i] = child;
779 }
780
781 element->extra->length = self->extra->length;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100782
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000783 }
784
785 /* add object to memo dictionary (so deepcopy won't visit it again) */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200786 id = PyLong_FromSsize_t((Py_uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000787 if (!id)
788 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000789
790 i = PyDict_SetItem(memo, id, (PyObject*) element);
791
792 Py_DECREF(id);
793
794 if (i < 0)
795 goto error;
796
797 return (PyObject*) element;
798
799 error:
800 Py_DECREF(element);
801 return NULL;
802}
803
Martin v. Löwisbce16662012-06-17 10:41:22 +0200804static PyObject*
805element_sizeof(PyObject* _self, PyObject* args)
806{
807 ElementObject *self = (ElementObject*)_self;
808 Py_ssize_t result = sizeof(ElementObject);
809 if (self->extra) {
810 result += sizeof(ElementObjectExtra);
811 if (self->extra->children != self->extra->_children)
812 result += sizeof(PyObject*) * self->extra->allocated;
813 }
814 return PyLong_FromSsize_t(result);
815}
816
Eli Bendersky698bdb22013-01-10 06:01:06 -0800817/* dict keys for getstate/setstate. */
818#define PICKLED_TAG "tag"
819#define PICKLED_CHILDREN "_children"
820#define PICKLED_ATTRIB "attrib"
821#define PICKLED_TAIL "tail"
822#define PICKLED_TEXT "text"
823
824/* __getstate__ returns a fabricated instance dict as in the pure-Python
825 * Element implementation, for interoperability/interchangeability. This
826 * makes the pure-Python implementation details an API, but (a) there aren't
827 * any unnecessary structures there; and (b) it buys compatibility with 3.2
828 * pickles. See issue #16076.
829 */
830static PyObject *
831element_getstate(ElementObject *self)
832{
833 int i, noattrib;
834 PyObject *instancedict = NULL, *children;
835
836 /* Build a list of children. */
837 children = PyList_New(self->extra ? self->extra->length : 0);
838 if (!children)
839 return NULL;
840 for (i = 0; i < PyList_GET_SIZE(children); i++) {
841 PyObject *child = self->extra->children[i];
842 Py_INCREF(child);
843 PyList_SET_ITEM(children, i, child);
844 }
845
846 /* Construct the state object. */
847 noattrib = (self->extra == NULL || self->extra->attrib == Py_None);
848 if (noattrib)
849 instancedict = Py_BuildValue("{sOsOs{}sOsO}",
850 PICKLED_TAG, self->tag,
851 PICKLED_CHILDREN, children,
852 PICKLED_ATTRIB,
853 PICKLED_TEXT, self->text,
854 PICKLED_TAIL, self->tail);
855 else
856 instancedict = Py_BuildValue("{sOsOsOsOsO}",
857 PICKLED_TAG, self->tag,
858 PICKLED_CHILDREN, children,
859 PICKLED_ATTRIB, self->extra->attrib,
860 PICKLED_TEXT, self->text,
861 PICKLED_TAIL, self->tail);
862 if (instancedict)
863 return instancedict;
864 else {
865 for (i = 0; i < PyList_GET_SIZE(children); i++)
866 Py_DECREF(PyList_GET_ITEM(children, i));
867 Py_DECREF(children);
868
869 return NULL;
870 }
871}
872
873static PyObject *
874element_setstate_from_attributes(ElementObject *self,
875 PyObject *tag,
876 PyObject *attrib,
877 PyObject *text,
878 PyObject *tail,
879 PyObject *children)
880{
881 Py_ssize_t i, nchildren;
882
883 if (!tag) {
884 PyErr_SetString(PyExc_TypeError, "tag may not be NULL");
885 return NULL;
886 }
887 if (!text) {
888 Py_INCREF(Py_None);
889 text = Py_None;
890 }
891 if (!tail) {
892 Py_INCREF(Py_None);
893 tail = Py_None;
894 }
895
896 Py_CLEAR(self->tag);
897 self->tag = tag;
898 Py_INCREF(self->tag);
899
900 Py_CLEAR(self->text);
901 self->text = text;
902 Py_INCREF(self->text);
903
904 Py_CLEAR(self->tail);
905 self->tail = tail;
906 Py_INCREF(self->tail);
907
908 /* Handle ATTRIB and CHILDREN. */
909 if (!children && !attrib)
910 Py_RETURN_NONE;
911
912 /* Compute 'nchildren'. */
913 if (children) {
914 if (!PyList_Check(children)) {
915 PyErr_SetString(PyExc_TypeError, "'_children' is not a list");
916 return NULL;
917 }
918 nchildren = PyList_Size(children);
919 }
920 else {
921 nchildren = 0;
922 }
923
924 /* Allocate 'extra'. */
925 if (element_resize(self, nchildren)) {
926 return NULL;
927 }
928 assert(self->extra && self->extra->allocated >= nchildren);
929
930 /* Copy children */
931 for (i = 0; i < nchildren; i++) {
932 self->extra->children[i] = PyList_GET_ITEM(children, i);
933 Py_INCREF(self->extra->children[i]);
934 }
935
936 self->extra->length = nchildren;
937 self->extra->allocated = nchildren;
938
939 /* Stash attrib. */
940 if (attrib) {
941 Py_CLEAR(self->extra->attrib);
942 self->extra->attrib = attrib;
943 Py_INCREF(attrib);
944 }
945
946 Py_RETURN_NONE;
947}
948
949/* __setstate__ for Element instance from the Python implementation.
950 * 'state' should be the instance dict.
951 */
952static PyObject *
953element_setstate_from_Python(ElementObject *self, PyObject *state)
954{
955 static char *kwlist[] = {PICKLED_TAG, PICKLED_ATTRIB, PICKLED_TEXT,
956 PICKLED_TAIL, PICKLED_CHILDREN, 0};
957 PyObject *args;
958 PyObject *tag, *attrib, *text, *tail, *children;
959 int error;
960
961 /* More instance dict members than we know to handle? */
962 tag = attrib = text = tail = children = NULL;
963 args = PyTuple_New(0);
964 error = ! PyArg_ParseTupleAndKeywords(args, state, "|$OOOOO", kwlist, &tag,
965 &attrib, &text, &tail, &children);
966 Py_DECREF(args);
967 if (error)
968 return NULL;
969 else
970 return element_setstate_from_attributes(self, tag, attrib, text,
971 tail, children);
972}
973
974static PyObject *
975element_setstate(ElementObject *self, PyObject *state)
976{
977 if (!PyDict_CheckExact(state)) {
978 PyErr_Format(PyExc_TypeError,
979 "Don't know how to unpickle \"%.200R\" as an Element",
980 state);
981 return NULL;
982 }
983 else
984 return element_setstate_from_Python(self, state);
985}
986
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000987LOCAL(int)
988checkpath(PyObject* tag)
989{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000990 Py_ssize_t i;
991 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000992
993 /* check if a tag contains an xpath character */
994
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000995#define PATHCHAR(ch) \
996 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000997
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000998 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200999 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
1000 void *data = PyUnicode_DATA(tag);
1001 unsigned int kind = PyUnicode_KIND(tag);
1002 for (i = 0; i < len; i++) {
1003 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1004 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001005 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001006 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001007 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001008 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001009 return 1;
1010 }
1011 return 0;
1012 }
Christian Heimes72b710a2008-05-26 13:28:38 +00001013 if (PyBytes_Check(tag)) {
1014 char *p = PyBytes_AS_STRING(tag);
1015 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001016 if (p[i] == '{')
1017 check = 0;
1018 else if (p[i] == '}')
1019 check = 1;
1020 else if (check && PATHCHAR(p[i]))
1021 return 1;
1022 }
1023 return 0;
1024 }
1025
1026 return 1; /* unknown type; might be path expression */
1027}
1028
1029static PyObject*
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001030element_extend(ElementObject* self, PyObject* args)
1031{
1032 PyObject* seq;
1033 Py_ssize_t i, seqlen = 0;
1034
1035 PyObject* seq_in;
1036 if (!PyArg_ParseTuple(args, "O:extend", &seq_in))
1037 return NULL;
1038
1039 seq = PySequence_Fast(seq_in, "");
1040 if (!seq) {
1041 PyErr_Format(
1042 PyExc_TypeError,
1043 "expected sequence, not \"%.200s\"", Py_TYPE(seq_in)->tp_name
1044 );
1045 return NULL;
1046 }
1047
1048 seqlen = PySequence_Size(seq);
1049 for (i = 0; i < seqlen; i++) {
1050 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001051 if (!PyObject_IsInstance(element, (PyObject *)&Element_Type)) {
1052 Py_DECREF(seq);
1053 PyErr_Format(
1054 PyExc_TypeError,
1055 "expected an Element, not \"%.200s\"",
1056 Py_TYPE(element)->tp_name);
1057 return NULL;
1058 }
1059
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001060 if (element_add_subelement(self, element) < 0) {
1061 Py_DECREF(seq);
1062 return NULL;
1063 }
1064 }
1065
1066 Py_DECREF(seq);
1067
1068 Py_RETURN_NONE;
1069}
1070
1071static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001072element_find(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001073{
1074 int i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001075 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001076 PyObject* namespaces = Py_None;
Eli Bendersky737b1732012-05-29 06:02:56 +03001077 static char *kwlist[] = {"path", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001078
Eli Bendersky737b1732012-05-29 06:02:56 +03001079 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:find", kwlist,
1080 &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001081 return NULL;
1082
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001083 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001084 _Py_IDENTIFIER(find);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001085 return _PyObject_CallMethodId(
1086 elementpath_obj, &PyId_find, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001087 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001088 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001089
1090 if (!self->extra)
1091 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001092
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001093 for (i = 0; i < self->extra->length; i++) {
1094 PyObject* item = self->extra->children[i];
1095 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +00001096 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001097 Py_INCREF(item);
1098 return item;
1099 }
1100 }
1101
1102 Py_RETURN_NONE;
1103}
1104
1105static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001106element_findtext(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001107{
1108 int i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001109 PyObject* tag;
1110 PyObject* default_value = Py_None;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001111 PyObject* namespaces = Py_None;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001112 _Py_IDENTIFIER(findtext);
Eli Bendersky737b1732012-05-29 06:02:56 +03001113 static char *kwlist[] = {"path", "default", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001114
Eli Bendersky737b1732012-05-29 06:02:56 +03001115 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO:findtext", kwlist,
1116 &tag, &default_value, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001117 return NULL;
1118
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001119 if (checkpath(tag) || namespaces != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001120 return _PyObject_CallMethodId(
1121 elementpath_obj, &PyId_findtext, "OOOO", self, tag, default_value, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001122 );
1123
1124 if (!self->extra) {
1125 Py_INCREF(default_value);
1126 return default_value;
1127 }
1128
1129 for (i = 0; i < self->extra->length; i++) {
1130 ElementObject* item = (ElementObject*) self->extra->children[i];
Mark Dickinson211c6252009-02-01 10:28:51 +00001131 if (Element_CheckExact(item) && (PyObject_RichCompareBool(item->tag, tag, Py_EQ) == 1)) {
1132
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001133 PyObject* text = element_get_text(item);
1134 if (text == Py_None)
Christian Heimes72b710a2008-05-26 13:28:38 +00001135 return PyBytes_FromString("");
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001136 Py_XINCREF(text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001137 return text;
1138 }
1139 }
1140
1141 Py_INCREF(default_value);
1142 return default_value;
1143}
1144
1145static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001146element_findall(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001147{
1148 int i;
1149 PyObject* out;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001150 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001151 PyObject* namespaces = Py_None;
Eli Bendersky737b1732012-05-29 06:02:56 +03001152 static char *kwlist[] = {"path", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001153
Eli Bendersky737b1732012-05-29 06:02:56 +03001154 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:findall", kwlist,
1155 &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001156 return NULL;
1157
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001158 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001159 _Py_IDENTIFIER(findall);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001160 return _PyObject_CallMethodId(
1161 elementpath_obj, &PyId_findall, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001162 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001163 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001164
1165 out = PyList_New(0);
1166 if (!out)
1167 return NULL;
1168
1169 if (!self->extra)
1170 return out;
1171
1172 for (i = 0; i < self->extra->length; i++) {
1173 PyObject* item = self->extra->children[i];
1174 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +00001175 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001176 if (PyList_Append(out, item) < 0) {
1177 Py_DECREF(out);
1178 return NULL;
1179 }
1180 }
1181 }
1182
1183 return out;
1184}
1185
1186static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001187element_iterfind(ElementObject *self, PyObject *args, PyObject *kwds)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001188{
1189 PyObject* tag;
1190 PyObject* namespaces = Py_None;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001191 _Py_IDENTIFIER(iterfind);
Eli Bendersky737b1732012-05-29 06:02:56 +03001192 static char *kwlist[] = {"path", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001193
Eli Bendersky737b1732012-05-29 06:02:56 +03001194 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:iterfind", kwlist,
1195 &tag, &namespaces))
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001196 return NULL;
1197
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001198 return _PyObject_CallMethodId(
1199 elementpath_obj, &PyId_iterfind, "OOO", self, tag, namespaces
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001200 );
1201}
1202
1203static PyObject*
Eli Benderskya8736902013-01-05 06:26:39 -08001204element_get(ElementObject* self, PyObject* args, PyObject* kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001205{
1206 PyObject* value;
Eli Benderskya8736902013-01-05 06:26:39 -08001207 static char* kwlist[] = {"key", "default", 0};
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001208
1209 PyObject* key;
1210 PyObject* default_value = Py_None;
Eli Benderskya8736902013-01-05 06:26:39 -08001211
1212 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:get", kwlist, &key,
1213 &default_value))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001214 return NULL;
1215
1216 if (!self->extra || self->extra->attrib == Py_None)
1217 value = default_value;
1218 else {
1219 value = PyDict_GetItem(self->extra->attrib, key);
1220 if (!value)
1221 value = default_value;
1222 }
1223
1224 Py_INCREF(value);
1225 return value;
1226}
1227
1228static PyObject*
1229element_getchildren(ElementObject* self, PyObject* args)
1230{
1231 int i;
1232 PyObject* list;
1233
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001234 /* FIXME: report as deprecated? */
1235
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001236 if (!PyArg_ParseTuple(args, ":getchildren"))
1237 return NULL;
1238
1239 if (!self->extra)
1240 return PyList_New(0);
1241
1242 list = PyList_New(self->extra->length);
1243 if (!list)
1244 return NULL;
1245
1246 for (i = 0; i < self->extra->length; i++) {
1247 PyObject* item = self->extra->children[i];
1248 Py_INCREF(item);
1249 PyList_SET_ITEM(list, i, item);
1250 }
1251
1252 return list;
1253}
1254
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001255
Eli Bendersky64d11e62012-06-15 07:42:50 +03001256static PyObject *
1257create_elementiter(ElementObject *self, PyObject *tag, int gettext);
1258
1259
1260static PyObject *
Eli Benderskya8736902013-01-05 06:26:39 -08001261element_iter(ElementObject *self, PyObject *args, PyObject *kwds)
Eli Bendersky64d11e62012-06-15 07:42:50 +03001262{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001263 PyObject* tag = Py_None;
Eli Benderskya8736902013-01-05 06:26:39 -08001264 static char* kwlist[] = {"tag", 0};
1265
1266 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:iter", kwlist, &tag))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001267 return NULL;
1268
Eli Bendersky64d11e62012-06-15 07:42:50 +03001269 return create_elementiter(self, tag, 0);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001270}
1271
1272
1273static PyObject*
1274element_itertext(ElementObject* self, PyObject* args)
1275{
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001276 if (!PyArg_ParseTuple(args, ":itertext"))
1277 return NULL;
1278
Eli Bendersky64d11e62012-06-15 07:42:50 +03001279 return create_elementiter(self, Py_None, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001280}
1281
Eli Bendersky64d11e62012-06-15 07:42:50 +03001282
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001283static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001284element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001285{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001286 ElementObject* self = (ElementObject*) self_;
1287
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001288 if (!self->extra || index < 0 || index >= self->extra->length) {
1289 PyErr_SetString(
1290 PyExc_IndexError,
1291 "child index out of range"
1292 );
1293 return NULL;
1294 }
1295
1296 Py_INCREF(self->extra->children[index]);
1297 return self->extra->children[index];
1298}
1299
1300static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001301element_insert(ElementObject* self, PyObject* args)
1302{
1303 int i;
1304
1305 int index;
1306 PyObject* element;
1307 if (!PyArg_ParseTuple(args, "iO!:insert", &index,
1308 &Element_Type, &element))
1309 return NULL;
1310
1311 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001312 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001313
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001314 if (index < 0) {
1315 index += self->extra->length;
1316 if (index < 0)
1317 index = 0;
1318 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001319 if (index > self->extra->length)
1320 index = self->extra->length;
1321
1322 if (element_resize(self, 1) < 0)
1323 return NULL;
1324
1325 for (i = self->extra->length; i > index; i--)
1326 self->extra->children[i] = self->extra->children[i-1];
1327
1328 Py_INCREF(element);
1329 self->extra->children[index] = element;
1330
1331 self->extra->length++;
1332
1333 Py_RETURN_NONE;
1334}
1335
1336static PyObject*
1337element_items(ElementObject* self, PyObject* args)
1338{
1339 if (!PyArg_ParseTuple(args, ":items"))
1340 return NULL;
1341
1342 if (!self->extra || self->extra->attrib == Py_None)
1343 return PyList_New(0);
1344
1345 return PyDict_Items(self->extra->attrib);
1346}
1347
1348static PyObject*
1349element_keys(ElementObject* self, PyObject* args)
1350{
1351 if (!PyArg_ParseTuple(args, ":keys"))
1352 return NULL;
1353
1354 if (!self->extra || self->extra->attrib == Py_None)
1355 return PyList_New(0);
1356
1357 return PyDict_Keys(self->extra->attrib);
1358}
1359
Martin v. Löwis18e16552006-02-15 17:27:45 +00001360static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001361element_length(ElementObject* self)
1362{
1363 if (!self->extra)
1364 return 0;
1365
1366 return self->extra->length;
1367}
1368
1369static PyObject*
1370element_makeelement(PyObject* self, PyObject* args, PyObject* kw)
1371{
1372 PyObject* elem;
1373
1374 PyObject* tag;
1375 PyObject* attrib;
1376 if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib))
1377 return NULL;
1378
1379 attrib = PyDict_Copy(attrib);
1380 if (!attrib)
1381 return NULL;
1382
Eli Bendersky092af1f2012-03-04 07:14:03 +02001383 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001384
1385 Py_DECREF(attrib);
1386
1387 return elem;
1388}
1389
1390static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001391element_remove(ElementObject* self, PyObject* args)
1392{
1393 int i;
1394
1395 PyObject* element;
1396 if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element))
1397 return NULL;
1398
1399 if (!self->extra) {
1400 /* element has no children, so raise exception */
1401 PyErr_SetString(
1402 PyExc_ValueError,
1403 "list.remove(x): x not in list"
1404 );
1405 return NULL;
1406 }
1407
1408 for (i = 0; i < self->extra->length; i++) {
1409 if (self->extra->children[i] == element)
1410 break;
Mark Dickinson211c6252009-02-01 10:28:51 +00001411 if (PyObject_RichCompareBool(self->extra->children[i], element, Py_EQ) == 1)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001412 break;
1413 }
1414
1415 if (i == self->extra->length) {
1416 /* element is not in children, so raise exception */
1417 PyErr_SetString(
1418 PyExc_ValueError,
1419 "list.remove(x): x not in list"
1420 );
1421 return NULL;
1422 }
1423
1424 Py_DECREF(self->extra->children[i]);
1425
1426 self->extra->length--;
1427
1428 for (; i < self->extra->length; i++)
1429 self->extra->children[i] = self->extra->children[i+1];
1430
1431 Py_RETURN_NONE;
1432}
1433
1434static PyObject*
1435element_repr(ElementObject* self)
1436{
Eli Bendersky092af1f2012-03-04 07:14:03 +02001437 if (self->tag)
1438 return PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1439 else
1440 return PyUnicode_FromFormat("<Element at %p>", self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001441}
1442
1443static PyObject*
1444element_set(ElementObject* self, PyObject* args)
1445{
1446 PyObject* attrib;
1447
1448 PyObject* key;
1449 PyObject* value;
1450 if (!PyArg_ParseTuple(args, "OO:set", &key, &value))
1451 return NULL;
1452
1453 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001454 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001455
1456 attrib = element_get_attrib(self);
1457 if (!attrib)
1458 return NULL;
1459
1460 if (PyDict_SetItem(attrib, key, value) < 0)
1461 return NULL;
1462
1463 Py_RETURN_NONE;
1464}
1465
1466static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001467element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001468{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001469 ElementObject* self = (ElementObject*) self_;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001470 int i;
1471 PyObject* old;
1472
1473 if (!self->extra || index < 0 || index >= self->extra->length) {
1474 PyErr_SetString(
1475 PyExc_IndexError,
1476 "child assignment index out of range");
1477 return -1;
1478 }
1479
1480 old = self->extra->children[index];
1481
1482 if (item) {
1483 Py_INCREF(item);
1484 self->extra->children[index] = item;
1485 } else {
1486 self->extra->length--;
1487 for (i = index; i < self->extra->length; i++)
1488 self->extra->children[i] = self->extra->children[i+1];
1489 }
1490
1491 Py_DECREF(old);
1492
1493 return 0;
1494}
1495
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001496static PyObject*
1497element_subscr(PyObject* self_, PyObject* item)
1498{
1499 ElementObject* self = (ElementObject*) self_;
1500
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001501 if (PyIndex_Check(item)) {
1502 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001503
1504 if (i == -1 && PyErr_Occurred()) {
1505 return NULL;
1506 }
1507 if (i < 0 && self->extra)
1508 i += self->extra->length;
1509 return element_getitem(self_, i);
1510 }
1511 else if (PySlice_Check(item)) {
1512 Py_ssize_t start, stop, step, slicelen, cur, i;
1513 PyObject* list;
1514
1515 if (!self->extra)
1516 return PyList_New(0);
1517
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001518 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001519 self->extra->length,
1520 &start, &stop, &step, &slicelen) < 0) {
1521 return NULL;
1522 }
1523
1524 if (slicelen <= 0)
1525 return PyList_New(0);
1526 else {
1527 list = PyList_New(slicelen);
1528 if (!list)
1529 return NULL;
1530
1531 for (cur = start, i = 0; i < slicelen;
1532 cur += step, i++) {
1533 PyObject* item = self->extra->children[cur];
1534 Py_INCREF(item);
1535 PyList_SET_ITEM(list, i, item);
1536 }
1537
1538 return list;
1539 }
1540 }
1541 else {
1542 PyErr_SetString(PyExc_TypeError,
1543 "element indices must be integers");
1544 return NULL;
1545 }
1546}
1547
1548static int
1549element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1550{
1551 ElementObject* self = (ElementObject*) self_;
1552
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001553 if (PyIndex_Check(item)) {
1554 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001555
1556 if (i == -1 && PyErr_Occurred()) {
1557 return -1;
1558 }
1559 if (i < 0 && self->extra)
1560 i += self->extra->length;
1561 return element_setitem(self_, i, value);
1562 }
1563 else if (PySlice_Check(item)) {
1564 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1565
1566 PyObject* recycle = NULL;
1567 PyObject* seq = NULL;
1568
1569 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001570 create_extra(self, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001571
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001572 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001573 self->extra->length,
1574 &start, &stop, &step, &slicelen) < 0) {
1575 return -1;
1576 }
1577
Eli Bendersky865756a2012-03-09 13:38:15 +02001578 if (value == NULL) {
1579 /* Delete slice */
1580 size_t cur;
1581 Py_ssize_t i;
1582
1583 if (slicelen <= 0)
1584 return 0;
1585
1586 /* Since we're deleting, the direction of the range doesn't matter,
1587 * so for simplicity make it always ascending.
1588 */
1589 if (step < 0) {
1590 stop = start + 1;
1591 start = stop + step * (slicelen - 1) - 1;
1592 step = -step;
1593 }
1594
1595 assert((size_t)slicelen <= PY_SIZE_MAX / sizeof(PyObject *));
1596
1597 /* recycle is a list that will contain all the children
1598 * scheduled for removal.
1599 */
1600 if (!(recycle = PyList_New(slicelen))) {
1601 PyErr_NoMemory();
1602 return -1;
1603 }
1604
1605 /* This loop walks over all the children that have to be deleted,
1606 * with cur pointing at them. num_moved is the amount of children
1607 * until the next deleted child that have to be "shifted down" to
1608 * occupy the deleted's places.
1609 * Note that in the ith iteration, shifting is done i+i places down
1610 * because i children were already removed.
1611 */
1612 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1613 /* Compute how many children have to be moved, clipping at the
1614 * list end.
1615 */
1616 Py_ssize_t num_moved = step - 1;
1617 if (cur + step >= (size_t)self->extra->length) {
1618 num_moved = self->extra->length - cur - 1;
1619 }
1620
1621 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1622
1623 memmove(
1624 self->extra->children + cur - i,
1625 self->extra->children + cur + 1,
1626 num_moved * sizeof(PyObject *));
1627 }
1628
1629 /* Leftover "tail" after the last removed child */
1630 cur = start + (size_t)slicelen * step;
1631 if (cur < (size_t)self->extra->length) {
1632 memmove(
1633 self->extra->children + cur - slicelen,
1634 self->extra->children + cur,
1635 (self->extra->length - cur) * sizeof(PyObject *));
1636 }
1637
1638 self->extra->length -= slicelen;
1639
1640 /* Discard the recycle list with all the deleted sub-elements */
1641 Py_XDECREF(recycle);
1642 return 0;
1643 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001644 else {
Eli Bendersky865756a2012-03-09 13:38:15 +02001645 /* A new slice is actually being assigned */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001646 seq = PySequence_Fast(value, "");
1647 if (!seq) {
1648 PyErr_Format(
1649 PyExc_TypeError,
1650 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1651 );
1652 return -1;
1653 }
1654 newlen = PySequence_Size(seq);
1655 }
1656
1657 if (step != 1 && newlen != slicelen)
1658 {
1659 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001660 "attempt to assign sequence of size %zd "
1661 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001662 newlen, slicelen
1663 );
1664 return -1;
1665 }
1666
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001667 /* Resize before creating the recycle bin, to prevent refleaks. */
1668 if (newlen > slicelen) {
1669 if (element_resize(self, newlen - slicelen) < 0) {
1670 if (seq) {
1671 Py_DECREF(seq);
1672 }
1673 return -1;
1674 }
1675 }
1676
1677 if (slicelen > 0) {
1678 /* to avoid recursive calls to this method (via decref), move
1679 old items to the recycle bin here, and get rid of them when
1680 we're done modifying the element */
1681 recycle = PyList_New(slicelen);
1682 if (!recycle) {
1683 if (seq) {
1684 Py_DECREF(seq);
1685 }
1686 return -1;
1687 }
1688 for (cur = start, i = 0; i < slicelen;
1689 cur += step, i++)
1690 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1691 }
1692
1693 if (newlen < slicelen) {
1694 /* delete slice */
1695 for (i = stop; i < self->extra->length; i++)
1696 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1697 } else if (newlen > slicelen) {
1698 /* insert slice */
1699 for (i = self->extra->length-1; i >= stop; i--)
1700 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1701 }
1702
1703 /* replace the slice */
1704 for (cur = start, i = 0; i < newlen;
1705 cur += step, i++) {
1706 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1707 Py_INCREF(element);
1708 self->extra->children[cur] = element;
1709 }
1710
1711 self->extra->length += newlen - slicelen;
1712
1713 if (seq) {
1714 Py_DECREF(seq);
1715 }
1716
1717 /* discard the recycle bin, and everything in it */
1718 Py_XDECREF(recycle);
1719
1720 return 0;
1721 }
1722 else {
1723 PyErr_SetString(PyExc_TypeError,
1724 "element indices must be integers");
1725 return -1;
1726 }
1727}
1728
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001729static PyMethodDef element_methods[] = {
1730
Eli Bendersky0192ba32012-03-30 16:38:33 +03001731 {"clear", (PyCFunction) element_clearmethod, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001732
Eli Benderskya8736902013-01-05 06:26:39 -08001733 {"get", (PyCFunction) element_get, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001734 {"set", (PyCFunction) element_set, METH_VARARGS},
1735
Eli Bendersky737b1732012-05-29 06:02:56 +03001736 {"find", (PyCFunction) element_find, METH_VARARGS | METH_KEYWORDS},
1737 {"findtext", (PyCFunction) element_findtext, METH_VARARGS | METH_KEYWORDS},
1738 {"findall", (PyCFunction) element_findall, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001739
1740 {"append", (PyCFunction) element_append, METH_VARARGS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001741 {"extend", (PyCFunction) element_extend, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001742 {"insert", (PyCFunction) element_insert, METH_VARARGS},
1743 {"remove", (PyCFunction) element_remove, METH_VARARGS},
1744
Eli Benderskya8736902013-01-05 06:26:39 -08001745 {"iter", (PyCFunction) element_iter, METH_VARARGS | METH_KEYWORDS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001746 {"itertext", (PyCFunction) element_itertext, METH_VARARGS},
Eli Bendersky737b1732012-05-29 06:02:56 +03001747 {"iterfind", (PyCFunction) element_iterfind, METH_VARARGS | METH_KEYWORDS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001748
Eli Benderskya8736902013-01-05 06:26:39 -08001749 {"getiterator", (PyCFunction) element_iter, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001750 {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS},
1751
1752 {"items", (PyCFunction) element_items, METH_VARARGS},
1753 {"keys", (PyCFunction) element_keys, METH_VARARGS},
1754
1755 {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS},
1756
1757 {"__copy__", (PyCFunction) element_copy, METH_VARARGS},
1758 {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS},
Martin v. Löwisbce16662012-06-17 10:41:22 +02001759 {"__sizeof__", element_sizeof, METH_NOARGS},
Eli Bendersky698bdb22013-01-10 06:01:06 -08001760 {"__getstate__", (PyCFunction)element_getstate, METH_NOARGS},
1761 {"__setstate__", (PyCFunction)element_setstate, METH_O},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001762
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001763 {NULL, NULL}
1764};
1765
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001766static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001767element_getattro(ElementObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001768{
1769 PyObject* res;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001770 char *name = "";
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001771
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001772 if (PyUnicode_Check(nameobj))
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001773 name = _PyUnicode_AsString(nameobj);
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001774
Alexander Belopolskye239d232010-12-08 23:31:48 +00001775 if (name == NULL)
1776 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001777
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001778 /* handle common attributes first */
1779 if (strcmp(name, "tag") == 0) {
1780 res = self->tag;
1781 Py_INCREF(res);
1782 return res;
1783 } else if (strcmp(name, "text") == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001784 res = element_get_text(self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001785 Py_INCREF(res);
1786 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001787 }
1788
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001789 /* methods */
1790 res = PyObject_GenericGetAttr((PyObject*) self, nameobj);
1791 if (res)
1792 return res;
1793
1794 /* less common attributes */
1795 if (strcmp(name, "tail") == 0) {
1796 PyErr_Clear();
1797 res = element_get_tail(self);
1798 } else if (strcmp(name, "attrib") == 0) {
1799 PyErr_Clear();
1800 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001801 create_extra(self, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001802 res = element_get_attrib(self);
1803 }
1804
1805 if (!res)
1806 return NULL;
1807
1808 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001809 return res;
1810}
1811
Eli Benderskyb20df952012-05-20 06:33:29 +03001812static PyObject*
1813element_setattro(ElementObject* self, PyObject* nameobj, PyObject* value)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001814{
Eli Benderskyb20df952012-05-20 06:33:29 +03001815 char *name = "";
1816 if (PyUnicode_Check(nameobj))
1817 name = _PyUnicode_AsString(nameobj);
1818
1819 if (name == NULL)
1820 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001821
1822 if (strcmp(name, "tag") == 0) {
1823 Py_DECREF(self->tag);
1824 self->tag = value;
1825 Py_INCREF(self->tag);
1826 } else if (strcmp(name, "text") == 0) {
1827 Py_DECREF(JOIN_OBJ(self->text));
1828 self->text = value;
1829 Py_INCREF(self->text);
1830 } else if (strcmp(name, "tail") == 0) {
1831 Py_DECREF(JOIN_OBJ(self->tail));
1832 self->tail = value;
1833 Py_INCREF(self->tail);
1834 } else if (strcmp(name, "attrib") == 0) {
1835 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001836 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001837 Py_DECREF(self->extra->attrib);
1838 self->extra->attrib = value;
1839 Py_INCREF(self->extra->attrib);
1840 } else {
1841 PyErr_SetString(PyExc_AttributeError, name);
Eli Benderskyb20df952012-05-20 06:33:29 +03001842 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001843 }
1844
Eli Benderskyb20df952012-05-20 06:33:29 +03001845 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001846}
1847
1848static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001849 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001850 0, /* sq_concat */
1851 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001852 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001853 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001854 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001855 0,
1856};
1857
1858static PyMappingMethods element_as_mapping = {
1859 (lenfunc) element_length,
1860 (binaryfunc) element_subscr,
1861 (objobjargproc) element_ass_subscr,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001862};
1863
Neal Norwitz227b5332006-03-22 09:28:35 +00001864static PyTypeObject Element_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001865 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08001866 "xml.etree.ElementTree.Element", sizeof(ElementObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001867 /* methods */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001868 (destructor)element_dealloc, /* tp_dealloc */
1869 0, /* tp_print */
1870 0, /* tp_getattr */
Eli Benderskyb20df952012-05-20 06:33:29 +03001871 0, /* tp_setattr */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001872 0, /* tp_reserved */
1873 (reprfunc)element_repr, /* tp_repr */
1874 0, /* tp_as_number */
1875 &element_as_sequence, /* tp_as_sequence */
1876 &element_as_mapping, /* tp_as_mapping */
1877 0, /* tp_hash */
1878 0, /* tp_call */
1879 0, /* tp_str */
1880 (getattrofunc)element_getattro, /* tp_getattro */
Eli Benderskyb20df952012-05-20 06:33:29 +03001881 (setattrofunc)element_setattro, /* tp_setattro */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001882 0, /* tp_as_buffer */
Eli Bendersky0192ba32012-03-30 16:38:33 +03001883 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
1884 /* tp_flags */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001885 0, /* tp_doc */
Eli Bendersky0192ba32012-03-30 16:38:33 +03001886 (traverseproc)element_gc_traverse, /* tp_traverse */
1887 (inquiry)element_gc_clear, /* tp_clear */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001888 0, /* tp_richcompare */
Eli Benderskyebf37a22012-04-03 22:02:37 +03001889 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001890 0, /* tp_iter */
1891 0, /* tp_iternext */
1892 element_methods, /* tp_methods */
1893 0, /* tp_members */
1894 0, /* tp_getset */
1895 0, /* tp_base */
1896 0, /* tp_dict */
1897 0, /* tp_descr_get */
1898 0, /* tp_descr_set */
1899 0, /* tp_dictoffset */
1900 (initproc)element_init, /* tp_init */
1901 PyType_GenericAlloc, /* tp_alloc */
1902 element_new, /* tp_new */
1903 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001904};
1905
Eli Bendersky64d11e62012-06-15 07:42:50 +03001906/******************************* Element iterator ****************************/
1907
1908/* ElementIterObject represents the iteration state over an XML element in
1909 * pre-order traversal. To keep track of which sub-element should be returned
1910 * next, a stack of parents is maintained. This is a standard stack-based
1911 * iterative pre-order traversal of a tree.
1912 * The stack is managed using a single-linked list starting at parent_stack.
1913 * Each stack node contains the saved parent to which we should return after
1914 * the current one is exhausted, and the next child to examine in that parent.
1915 */
1916typedef struct ParentLocator_t {
1917 ElementObject *parent;
1918 Py_ssize_t child_index;
1919 struct ParentLocator_t *next;
1920} ParentLocator;
1921
1922typedef struct {
1923 PyObject_HEAD
1924 ParentLocator *parent_stack;
1925 ElementObject *root_element;
1926 PyObject *sought_tag;
1927 int root_done;
1928 int gettext;
1929} ElementIterObject;
1930
1931
1932static void
1933elementiter_dealloc(ElementIterObject *it)
1934{
1935 ParentLocator *p = it->parent_stack;
1936 while (p) {
1937 ParentLocator *temp = p;
1938 Py_XDECREF(p->parent);
1939 p = p->next;
1940 PyObject_Free(temp);
1941 }
1942
1943 Py_XDECREF(it->sought_tag);
1944 Py_XDECREF(it->root_element);
1945
1946 PyObject_GC_UnTrack(it);
1947 PyObject_GC_Del(it);
1948}
1949
1950static int
1951elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
1952{
1953 ParentLocator *p = it->parent_stack;
1954 while (p) {
1955 Py_VISIT(p->parent);
1956 p = p->next;
1957 }
1958
1959 Py_VISIT(it->root_element);
1960 Py_VISIT(it->sought_tag);
1961 return 0;
1962}
1963
1964/* Helper function for elementiter_next. Add a new parent to the parent stack.
1965 */
1966static ParentLocator *
1967parent_stack_push_new(ParentLocator *stack, ElementObject *parent)
1968{
1969 ParentLocator *new_node = PyObject_Malloc(sizeof(ParentLocator));
1970 if (new_node) {
1971 new_node->parent = parent;
1972 Py_INCREF(parent);
1973 new_node->child_index = 0;
1974 new_node->next = stack;
1975 }
1976 return new_node;
1977}
1978
1979static PyObject *
1980elementiter_next(ElementIterObject *it)
1981{
1982 /* Sub-element iterator.
1983 *
1984 * A short note on gettext: this function serves both the iter() and
1985 * itertext() methods to avoid code duplication. However, there are a few
1986 * small differences in the way these iterations work. Namely:
1987 * - itertext() only yields text from nodes that have it, and continues
1988 * iterating when a node doesn't have text (so it doesn't return any
1989 * node like iter())
1990 * - itertext() also has to handle tail, after finishing with all the
1991 * children of a node.
1992 */
Eli Bendersky113da642012-06-15 07:52:49 +03001993 ElementObject *cur_parent;
1994 Py_ssize_t child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03001995
1996 while (1) {
1997 /* Handle the case reached in the beginning and end of iteration, where
1998 * the parent stack is empty. The root_done flag gives us indication
1999 * whether we've just started iterating (so root_done is 0), in which
2000 * case the root is returned. If root_done is 1 and we're here, the
2001 * iterator is exhausted.
2002 */
2003 if (!it->parent_stack->parent) {
2004 if (it->root_done) {
2005 PyErr_SetNone(PyExc_StopIteration);
2006 return NULL;
2007 } else {
2008 it->parent_stack = parent_stack_push_new(it->parent_stack,
2009 it->root_element);
2010 if (!it->parent_stack) {
2011 PyErr_NoMemory();
2012 return NULL;
2013 }
2014
2015 it->root_done = 1;
2016 if (it->sought_tag == Py_None ||
2017 PyObject_RichCompareBool(it->root_element->tag,
2018 it->sought_tag, Py_EQ) == 1) {
2019 if (it->gettext) {
Eli Benderskye6174ca2013-01-10 06:27:53 -08002020 PyObject *text = element_get_text(it->root_element);
2021 if (!text)
2022 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002023 if (PyObject_IsTrue(text)) {
2024 Py_INCREF(text);
2025 return text;
2026 }
2027 } else {
2028 Py_INCREF(it->root_element);
2029 return (PyObject *)it->root_element;
2030 }
2031 }
2032 }
2033 }
2034
2035 /* See if there are children left to traverse in the current parent. If
2036 * yes, visit the next child. If not, pop the stack and try again.
2037 */
Eli Bendersky113da642012-06-15 07:52:49 +03002038 cur_parent = it->parent_stack->parent;
2039 child_index = it->parent_stack->child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002040 if (cur_parent->extra && child_index < cur_parent->extra->length) {
2041 ElementObject *child = (ElementObject *)
2042 cur_parent->extra->children[child_index];
2043 it->parent_stack->child_index++;
2044 it->parent_stack = parent_stack_push_new(it->parent_stack,
2045 child);
2046 if (!it->parent_stack) {
2047 PyErr_NoMemory();
2048 return NULL;
2049 }
2050
2051 if (it->gettext) {
Eli Benderskye6174ca2013-01-10 06:27:53 -08002052 PyObject *text = element_get_text(child);
2053 if (!text)
2054 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002055 if (PyObject_IsTrue(text)) {
2056 Py_INCREF(text);
2057 return text;
2058 }
2059 } else if (it->sought_tag == Py_None ||
2060 PyObject_RichCompareBool(child->tag,
2061 it->sought_tag, Py_EQ) == 1) {
2062 Py_INCREF(child);
2063 return (PyObject *)child;
2064 }
2065 else
2066 continue;
2067 }
2068 else {
Eli Benderskye6174ca2013-01-10 06:27:53 -08002069 PyObject *tail;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002070 ParentLocator *next = it->parent_stack->next;
Eli Benderskye6174ca2013-01-10 06:27:53 -08002071 if (it->gettext) {
2072 tail = element_get_tail(cur_parent);
2073 if (!tail)
2074 return NULL;
2075 }
2076 else
2077 tail = Py_None;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002078 Py_XDECREF(it->parent_stack->parent);
2079 PyObject_Free(it->parent_stack);
2080 it->parent_stack = next;
2081
2082 /* Note that extra condition on it->parent_stack->parent here;
2083 * this is because itertext() is supposed to only return *inner*
2084 * text, not text following the element it began iteration with.
2085 */
2086 if (it->parent_stack->parent && PyObject_IsTrue(tail)) {
2087 Py_INCREF(tail);
2088 return tail;
2089 }
2090 }
2091 }
2092
2093 return NULL;
2094}
2095
2096
2097static PyTypeObject ElementIter_Type = {
2098 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002099 /* Using the module's name since the pure-Python implementation does not
2100 have such a type. */
Eli Bendersky64d11e62012-06-15 07:42:50 +03002101 "_elementtree._element_iterator", /* tp_name */
2102 sizeof(ElementIterObject), /* tp_basicsize */
2103 0, /* tp_itemsize */
2104 /* methods */
2105 (destructor)elementiter_dealloc, /* tp_dealloc */
2106 0, /* tp_print */
2107 0, /* tp_getattr */
2108 0, /* tp_setattr */
2109 0, /* tp_reserved */
2110 0, /* tp_repr */
2111 0, /* tp_as_number */
2112 0, /* tp_as_sequence */
2113 0, /* tp_as_mapping */
2114 0, /* tp_hash */
2115 0, /* tp_call */
2116 0, /* tp_str */
2117 0, /* tp_getattro */
2118 0, /* tp_setattro */
2119 0, /* tp_as_buffer */
2120 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2121 0, /* tp_doc */
2122 (traverseproc)elementiter_traverse, /* tp_traverse */
2123 0, /* tp_clear */
2124 0, /* tp_richcompare */
2125 0, /* tp_weaklistoffset */
2126 PyObject_SelfIter, /* tp_iter */
2127 (iternextfunc)elementiter_next, /* tp_iternext */
2128 0, /* tp_methods */
2129 0, /* tp_members */
2130 0, /* tp_getset */
2131 0, /* tp_base */
2132 0, /* tp_dict */
2133 0, /* tp_descr_get */
2134 0, /* tp_descr_set */
2135 0, /* tp_dictoffset */
2136 0, /* tp_init */
2137 0, /* tp_alloc */
2138 0, /* tp_new */
2139};
2140
2141
2142static PyObject *
2143create_elementiter(ElementObject *self, PyObject *tag, int gettext)
2144{
2145 ElementIterObject *it;
2146 PyObject *star = NULL;
2147
2148 it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
2149 if (!it)
2150 return NULL;
2151 if (!(it->parent_stack = PyObject_Malloc(sizeof(ParentLocator)))) {
2152 PyObject_GC_Del(it);
2153 return NULL;
2154 }
2155
2156 it->parent_stack->parent = NULL;
2157 it->parent_stack->child_index = 0;
2158 it->parent_stack->next = NULL;
2159
2160 if (PyUnicode_Check(tag))
2161 star = PyUnicode_FromString("*");
2162 else if (PyBytes_Check(tag))
2163 star = PyBytes_FromString("*");
2164
2165 if (star && PyObject_RichCompareBool(tag, star, Py_EQ) == 1)
2166 tag = Py_None;
2167
2168 Py_XDECREF(star);
2169 it->sought_tag = tag;
2170 it->root_done = 0;
2171 it->gettext = gettext;
2172 it->root_element = self;
2173
2174 Py_INCREF(self);
2175 Py_INCREF(tag);
2176
2177 PyObject_GC_Track(it);
2178 return (PyObject *)it;
2179}
2180
2181
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002182/* ==================================================================== */
2183/* the tree builder type */
2184
2185typedef struct {
2186 PyObject_HEAD
2187
Eli Bendersky58d548d2012-05-29 15:45:16 +03002188 PyObject *root; /* root node (first created node) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002189
Antoine Pitrouee329312012-10-04 19:53:29 +02002190 PyObject *this; /* current node */
2191 PyObject *last; /* most recently created node */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002192
Eli Bendersky58d548d2012-05-29 15:45:16 +03002193 PyObject *data; /* data collector (string or list), or NULL */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002194
Eli Bendersky58d548d2012-05-29 15:45:16 +03002195 PyObject *stack; /* element stack */
2196 Py_ssize_t index; /* current stack size (0 means empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002197
Eli Bendersky48d358b2012-05-30 17:57:50 +03002198 PyObject *element_factory;
2199
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002200 /* element tracing */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002201 PyObject *events; /* list of events, or NULL if not collecting */
2202 PyObject *start_event_obj; /* event objects (NULL to ignore) */
2203 PyObject *end_event_obj;
2204 PyObject *start_ns_event_obj;
2205 PyObject *end_ns_event_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002206} TreeBuilderObject;
2207
Neal Norwitz227b5332006-03-22 09:28:35 +00002208static PyTypeObject TreeBuilder_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002209
Christian Heimes90aa7642007-12-19 02:45:37 +00002210#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002211
2212/* -------------------------------------------------------------------- */
2213/* constructor and destructor */
2214
Eli Bendersky58d548d2012-05-29 15:45:16 +03002215static PyObject *
2216treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002217{
Eli Bendersky58d548d2012-05-29 15:45:16 +03002218 TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
2219 if (t != NULL) {
2220 t->root = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002221
Eli Bendersky58d548d2012-05-29 15:45:16 +03002222 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002223 t->this = Py_None;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002224 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002225 t->last = Py_None;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002226
Eli Bendersky58d548d2012-05-29 15:45:16 +03002227 t->data = NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002228 t->element_factory = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002229 t->stack = PyList_New(20);
2230 if (!t->stack) {
2231 Py_DECREF(t->this);
2232 Py_DECREF(t->last);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002233 Py_DECREF((PyObject *) t);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002234 return NULL;
2235 }
2236 t->index = 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002237
Eli Bendersky58d548d2012-05-29 15:45:16 +03002238 t->events = NULL;
2239 t->start_event_obj = t->end_event_obj = NULL;
2240 t->start_ns_event_obj = t->end_ns_event_obj = NULL;
2241 }
2242 return (PyObject *)t;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002243}
2244
Eli Bendersky58d548d2012-05-29 15:45:16 +03002245static int
2246treebuilder_init(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002247{
Eli Benderskyc68e1362012-06-03 06:09:42 +03002248 static char *kwlist[] = {"element_factory", 0};
Eli Bendersky48d358b2012-05-30 17:57:50 +03002249 PyObject *element_factory = NULL;
2250 TreeBuilderObject *self_tb = (TreeBuilderObject *)self;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002251 PyObject *tmp;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002252
2253 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:TreeBuilder", kwlist,
2254 &element_factory)) {
2255 return -1;
2256 }
2257
2258 if (element_factory) {
2259 Py_INCREF(element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002260 tmp = self_tb->element_factory;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002261 self_tb->element_factory = element_factory;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002262 Py_XDECREF(tmp);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002263 }
2264
Eli Bendersky58d548d2012-05-29 15:45:16 +03002265 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002266}
2267
Eli Bendersky48d358b2012-05-30 17:57:50 +03002268static int
2269treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
2270{
2271 Py_VISIT(self->root);
2272 Py_VISIT(self->this);
2273 Py_VISIT(self->last);
2274 Py_VISIT(self->data);
2275 Py_VISIT(self->stack);
2276 Py_VISIT(self->element_factory);
2277 return 0;
2278}
2279
2280static int
2281treebuilder_gc_clear(TreeBuilderObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002282{
Antoine Pitrouc1948842012-10-01 23:40:37 +02002283 Py_CLEAR(self->end_ns_event_obj);
2284 Py_CLEAR(self->start_ns_event_obj);
2285 Py_CLEAR(self->end_event_obj);
2286 Py_CLEAR(self->start_event_obj);
2287 Py_CLEAR(self->events);
2288 Py_CLEAR(self->stack);
2289 Py_CLEAR(self->data);
2290 Py_CLEAR(self->last);
2291 Py_CLEAR(self->this);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002292 Py_CLEAR(self->element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002293 Py_CLEAR(self->root);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002294 return 0;
2295}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002296
Eli Bendersky48d358b2012-05-30 17:57:50 +03002297static void
2298treebuilder_dealloc(TreeBuilderObject *self)
2299{
2300 PyObject_GC_UnTrack(self);
2301 treebuilder_gc_clear(self);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002302 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002303}
2304
2305/* -------------------------------------------------------------------- */
Antoine Pitrouee329312012-10-04 19:53:29 +02002306/* helpers for handling of arbitrary element-like objects */
2307
2308static int
2309treebuilder_set_element_text_or_tail(PyObject *element, PyObject *data,
2310 PyObject **dest, _Py_Identifier *name)
2311{
2312 if (Element_CheckExact(element)) {
2313 Py_DECREF(JOIN_OBJ(*dest));
2314 *dest = JOIN_SET(data, PyList_CheckExact(data));
2315 return 0;
2316 }
2317 else {
2318 PyObject *joined = list_join(data);
2319 int r;
2320 if (joined == NULL)
2321 return -1;
2322 r = _PyObject_SetAttrId(element, name, joined);
2323 Py_DECREF(joined);
2324 return r;
2325 }
2326}
2327
2328/* These two functions steal a reference to data */
2329static int
2330treebuilder_set_element_text(PyObject *element, PyObject *data)
2331{
2332 _Py_IDENTIFIER(text);
2333 return treebuilder_set_element_text_or_tail(
2334 element, data, &((ElementObject *) element)->text, &PyId_text);
2335}
2336
2337static int
2338treebuilder_set_element_tail(PyObject *element, PyObject *data)
2339{
2340 _Py_IDENTIFIER(tail);
2341 return treebuilder_set_element_text_or_tail(
2342 element, data, &((ElementObject *) element)->tail, &PyId_tail);
2343}
2344
2345static int
2346treebuilder_add_subelement(PyObject *element, PyObject *child)
2347{
2348 _Py_IDENTIFIER(append);
2349 if (Element_CheckExact(element)) {
2350 ElementObject *elem = (ElementObject *) element;
2351 return element_add_subelement(elem, child);
2352 }
2353 else {
2354 PyObject *res;
2355 res = _PyObject_CallMethodId(element, &PyId_append, "O", child);
2356 if (res == NULL)
2357 return -1;
2358 Py_DECREF(res);
2359 return 0;
2360 }
2361}
2362
2363/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002364/* handlers */
2365
2366LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002367treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
2368 PyObject* attrib)
2369{
2370 PyObject* node;
2371 PyObject* this;
2372
2373 if (self->data) {
2374 if (self->this == self->last) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002375 if (treebuilder_set_element_text(self->last, self->data))
2376 return NULL;
2377 }
2378 else {
2379 if (treebuilder_set_element_tail(self->last, self->data))
2380 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002381 }
2382 self->data = NULL;
2383 }
2384
Eli Bendersky48d358b2012-05-30 17:57:50 +03002385 if (self->element_factory) {
2386 node = PyObject_CallFunction(self->element_factory, "OO", tag, attrib);
2387 } else {
2388 node = create_new_element(tag, attrib);
2389 }
2390 if (!node) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002391 return NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002392 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002393
Antoine Pitrouee329312012-10-04 19:53:29 +02002394 this = self->this;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002395
2396 if (this != Py_None) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002397 if (treebuilder_add_subelement(this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002398 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002399 } else {
2400 if (self->root) {
2401 PyErr_SetString(
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002402 elementtree_parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002403 "multiple elements on top level"
2404 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002405 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002406 }
2407 Py_INCREF(node);
2408 self->root = node;
2409 }
2410
2411 if (self->index < PyList_GET_SIZE(self->stack)) {
2412 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002413 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002414 Py_INCREF(this);
2415 } else {
2416 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002417 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002418 }
2419 self->index++;
2420
2421 Py_DECREF(this);
2422 Py_INCREF(node);
Antoine Pitrouee329312012-10-04 19:53:29 +02002423 self->this = node;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002424
2425 Py_DECREF(self->last);
2426 Py_INCREF(node);
Antoine Pitrouee329312012-10-04 19:53:29 +02002427 self->last = node;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002428
2429 if (self->start_event_obj) {
2430 PyObject* res;
2431 PyObject* action = self->start_event_obj;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002432 res = PyTuple_Pack(2, action, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002433 if (res) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002434 PyList_Append(self->events, res);
2435 Py_DECREF(res);
2436 } else
2437 PyErr_Clear(); /* FIXME: propagate error */
2438 }
2439
2440 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002441
2442 error:
2443 Py_DECREF(node);
2444 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002445}
2446
2447LOCAL(PyObject*)
2448treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
2449{
2450 if (!self->data) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002451 if (self->last == Py_None) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00002452 /* ignore calls to data before the first call to start */
2453 Py_RETURN_NONE;
2454 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002455 /* store the first item as is */
2456 Py_INCREF(data); self->data = data;
2457 } else {
2458 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00002459 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
2460 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002461 /* XXX this code path unused in Python 3? */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002462 /* expat often generates single character data sections; handle
2463 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00002464 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
2465 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002466 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00002467 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002468 } else if (PyList_CheckExact(self->data)) {
2469 if (PyList_Append(self->data, data) < 0)
2470 return NULL;
2471 } else {
2472 PyObject* list = PyList_New(2);
2473 if (!list)
2474 return NULL;
2475 PyList_SET_ITEM(list, 0, self->data);
2476 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2477 self->data = list;
2478 }
2479 }
2480
2481 Py_RETURN_NONE;
2482}
2483
2484LOCAL(PyObject*)
2485treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2486{
2487 PyObject* item;
2488
2489 if (self->data) {
2490 if (self->this == self->last) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002491 if (treebuilder_set_element_text(self->last, self->data))
2492 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002493 } else {
Antoine Pitrouee329312012-10-04 19:53:29 +02002494 if (treebuilder_set_element_tail(self->last, self->data))
2495 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002496 }
2497 self->data = NULL;
2498 }
2499
2500 if (self->index == 0) {
2501 PyErr_SetString(
2502 PyExc_IndexError,
2503 "pop from empty stack"
2504 );
2505 return NULL;
2506 }
2507
2508 self->index--;
2509
2510 item = PyList_GET_ITEM(self->stack, self->index);
2511 Py_INCREF(item);
2512
2513 Py_DECREF(self->last);
2514
Antoine Pitrouee329312012-10-04 19:53:29 +02002515 self->last = self->this;
2516 self->this = item;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002517
2518 if (self->end_event_obj) {
2519 PyObject* res;
2520 PyObject* action = self->end_event_obj;
2521 PyObject* node = (PyObject*) self->last;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002522 res = PyTuple_Pack(2, action, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002523 if (res) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002524 PyList_Append(self->events, res);
2525 Py_DECREF(res);
2526 } else
2527 PyErr_Clear(); /* FIXME: propagate error */
2528 }
2529
2530 Py_INCREF(self->last);
2531 return (PyObject*) self->last;
2532}
2533
2534LOCAL(void)
2535treebuilder_handle_namespace(TreeBuilderObject* self, int start,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002536 PyObject *prefix, PyObject *uri)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002537{
2538 PyObject* res;
2539 PyObject* action;
2540 PyObject* parcel;
2541
2542 if (!self->events)
2543 return;
2544
2545 if (start) {
2546 if (!self->start_ns_event_obj)
2547 return;
2548 action = self->start_ns_event_obj;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002549 parcel = Py_BuildValue("OO", prefix, uri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002550 if (!parcel)
2551 return;
2552 Py_INCREF(action);
2553 } else {
2554 if (!self->end_ns_event_obj)
2555 return;
2556 action = self->end_ns_event_obj;
2557 Py_INCREF(action);
2558 parcel = Py_None;
2559 Py_INCREF(parcel);
2560 }
2561
2562 res = PyTuple_New(2);
2563
2564 if (res) {
2565 PyTuple_SET_ITEM(res, 0, action);
2566 PyTuple_SET_ITEM(res, 1, parcel);
2567 PyList_Append(self->events, res);
2568 Py_DECREF(res);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002569 }
2570 else {
2571 Py_DECREF(action);
2572 Py_DECREF(parcel);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002573 PyErr_Clear(); /* FIXME: propagate error */
Antoine Pitrouc1948842012-10-01 23:40:37 +02002574 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002575}
2576
2577/* -------------------------------------------------------------------- */
2578/* methods (in alphabetical order) */
2579
2580static PyObject*
2581treebuilder_data(TreeBuilderObject* self, PyObject* args)
2582{
2583 PyObject* data;
2584 if (!PyArg_ParseTuple(args, "O:data", &data))
2585 return NULL;
2586
2587 return treebuilder_handle_data(self, data);
2588}
2589
2590static PyObject*
2591treebuilder_end(TreeBuilderObject* self, PyObject* args)
2592{
2593 PyObject* tag;
2594 if (!PyArg_ParseTuple(args, "O:end", &tag))
2595 return NULL;
2596
2597 return treebuilder_handle_end(self, tag);
2598}
2599
2600LOCAL(PyObject*)
2601treebuilder_done(TreeBuilderObject* self)
2602{
2603 PyObject* res;
2604
2605 /* FIXME: check stack size? */
2606
2607 if (self->root)
2608 res = self->root;
2609 else
2610 res = Py_None;
2611
2612 Py_INCREF(res);
2613 return res;
2614}
2615
2616static PyObject*
2617treebuilder_close(TreeBuilderObject* self, PyObject* args)
2618{
2619 if (!PyArg_ParseTuple(args, ":close"))
2620 return NULL;
2621
2622 return treebuilder_done(self);
2623}
2624
2625static PyObject*
2626treebuilder_start(TreeBuilderObject* self, PyObject* args)
2627{
2628 PyObject* tag;
2629 PyObject* attrib = Py_None;
2630 if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib))
2631 return NULL;
2632
2633 return treebuilder_handle_start(self, tag, attrib);
2634}
2635
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002636static PyMethodDef treebuilder_methods[] = {
2637 {"data", (PyCFunction) treebuilder_data, METH_VARARGS},
2638 {"start", (PyCFunction) treebuilder_start, METH_VARARGS},
2639 {"end", (PyCFunction) treebuilder_end, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002640 {"close", (PyCFunction) treebuilder_close, METH_VARARGS},
2641 {NULL, NULL}
2642};
2643
Neal Norwitz227b5332006-03-22 09:28:35 +00002644static PyTypeObject TreeBuilder_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002645 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002646 "xml.etree.ElementTree.TreeBuilder", sizeof(TreeBuilderObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002647 /* methods */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002648 (destructor)treebuilder_dealloc, /* tp_dealloc */
2649 0, /* tp_print */
2650 0, /* tp_getattr */
2651 0, /* tp_setattr */
2652 0, /* tp_reserved */
2653 0, /* tp_repr */
2654 0, /* tp_as_number */
2655 0, /* tp_as_sequence */
2656 0, /* tp_as_mapping */
2657 0, /* tp_hash */
2658 0, /* tp_call */
2659 0, /* tp_str */
2660 0, /* tp_getattro */
2661 0, /* tp_setattro */
2662 0, /* tp_as_buffer */
Eli Bendersky48d358b2012-05-30 17:57:50 +03002663 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
2664 /* tp_flags */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002665 0, /* tp_doc */
Eli Bendersky48d358b2012-05-30 17:57:50 +03002666 (traverseproc)treebuilder_gc_traverse, /* tp_traverse */
2667 (inquiry)treebuilder_gc_clear, /* tp_clear */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002668 0, /* tp_richcompare */
2669 0, /* tp_weaklistoffset */
2670 0, /* tp_iter */
2671 0, /* tp_iternext */
2672 treebuilder_methods, /* tp_methods */
2673 0, /* tp_members */
2674 0, /* tp_getset */
2675 0, /* tp_base */
2676 0, /* tp_dict */
2677 0, /* tp_descr_get */
2678 0, /* tp_descr_set */
2679 0, /* tp_dictoffset */
2680 (initproc)treebuilder_init, /* tp_init */
2681 PyType_GenericAlloc, /* tp_alloc */
2682 treebuilder_new, /* tp_new */
2683 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002684};
2685
2686/* ==================================================================== */
2687/* the expat interface */
2688
2689#if defined(USE_EXPAT)
2690
2691#include "expat.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002692#include "pyexpat.h"
Eli Bendersky20d41742012-06-01 09:48:37 +03002693static struct PyExpat_CAPI *expat_capi;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002694#define EXPAT(func) (expat_capi->func)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002695
Eli Bendersky52467b12012-06-01 07:13:08 +03002696static XML_Memory_Handling_Suite ExpatMemoryHandler = {
2697 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
2698
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002699typedef struct {
2700 PyObject_HEAD
2701
2702 XML_Parser parser;
2703
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002704 PyObject *target;
2705 PyObject *entity;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002706
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002707 PyObject *names;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002708
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002709 PyObject *handle_start;
2710 PyObject *handle_data;
2711 PyObject *handle_end;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002712
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002713 PyObject *handle_comment;
2714 PyObject *handle_pi;
2715 PyObject *handle_doctype;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002716
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002717 PyObject *handle_close;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002718
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002719} XMLParserObject;
2720
Neal Norwitz227b5332006-03-22 09:28:35 +00002721static PyTypeObject XMLParser_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002722
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002723#define XMLParser_CheckExact(op) (Py_TYPE(op) == &XMLParser_Type)
2724
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002725/* helpers */
2726
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002727LOCAL(PyObject*)
2728makeuniversal(XMLParserObject* self, const char* string)
2729{
2730 /* convert a UTF-8 tag/attribute name from the expat parser
2731 to a universal name string */
2732
Antoine Pitrouc1948842012-10-01 23:40:37 +02002733 Py_ssize_t size = (Py_ssize_t) strlen(string);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002734 PyObject* key;
2735 PyObject* value;
2736
2737 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00002738 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002739 if (!key)
2740 return NULL;
2741
2742 value = PyDict_GetItem(self->names, key);
2743
2744 if (value) {
2745 Py_INCREF(value);
2746 } else {
2747 /* new name. convert to universal name, and decode as
2748 necessary */
2749
2750 PyObject* tag;
2751 char* p;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002752 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002753
2754 /* look for namespace separator */
2755 for (i = 0; i < size; i++)
2756 if (string[i] == '}')
2757 break;
2758 if (i != size) {
2759 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002760 tag = PyBytes_FromStringAndSize(NULL, size+1);
2761 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002762 p[0] = '{';
2763 memcpy(p+1, string, size);
2764 size++;
2765 } else {
2766 /* plain name; use key as tag */
2767 Py_INCREF(key);
2768 tag = key;
2769 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002770
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002771 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002772 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00002773 value = PyUnicode_DecodeUTF8(p, size, "strict");
2774 Py_DECREF(tag);
2775 if (!value) {
2776 Py_DECREF(key);
2777 return NULL;
2778 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002779
2780 /* add to names dictionary */
2781 if (PyDict_SetItem(self->names, key, value) < 0) {
2782 Py_DECREF(key);
2783 Py_DECREF(value);
2784 return NULL;
2785 }
2786 }
2787
2788 Py_DECREF(key);
2789 return value;
2790}
2791
Eli Bendersky5b77d812012-03-16 08:20:05 +02002792/* Set the ParseError exception with the given parameters.
2793 * If message is not NULL, it's used as the error string. Otherwise, the
2794 * message string is the default for the given error_code.
2795*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002796static void
Eli Bendersky5b77d812012-03-16 08:20:05 +02002797expat_set_error(enum XML_Error error_code, int line, int column, char *message)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002798{
Eli Bendersky5b77d812012-03-16 08:20:05 +02002799 PyObject *errmsg, *error, *position, *code;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002800
Victor Stinner499dfcf2011-03-21 13:26:24 +01002801 errmsg = PyUnicode_FromFormat("%s: line %d, column %d",
Eli Bendersky5b77d812012-03-16 08:20:05 +02002802 message ? message : EXPAT(ErrorString)(error_code),
2803 line, column);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002804 if (errmsg == NULL)
2805 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002806
Victor Stinner499dfcf2011-03-21 13:26:24 +01002807 error = PyObject_CallFunction(elementtree_parseerror_obj, "O", errmsg);
2808 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002809 if (!error)
2810 return;
2811
Eli Bendersky5b77d812012-03-16 08:20:05 +02002812 /* Add code and position attributes */
2813 code = PyLong_FromLong((long)error_code);
2814 if (!code) {
2815 Py_DECREF(error);
2816 return;
2817 }
2818 if (PyObject_SetAttrString(error, "code", code) == -1) {
2819 Py_DECREF(error);
2820 Py_DECREF(code);
2821 return;
2822 }
2823 Py_DECREF(code);
2824
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002825 position = Py_BuildValue("(ii)", line, column);
2826 if (!position) {
2827 Py_DECREF(error);
2828 return;
2829 }
2830 if (PyObject_SetAttrString(error, "position", position) == -1) {
2831 Py_DECREF(error);
2832 Py_DECREF(position);
2833 return;
2834 }
2835 Py_DECREF(position);
2836
2837 PyErr_SetObject(elementtree_parseerror_obj, error);
2838 Py_DECREF(error);
2839}
2840
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002841/* -------------------------------------------------------------------- */
2842/* handlers */
2843
2844static void
2845expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2846 int data_len)
2847{
2848 PyObject* key;
2849 PyObject* value;
2850 PyObject* res;
2851
2852 if (data_len < 2 || data_in[0] != '&')
2853 return;
2854
Neal Norwitz0269b912007-08-08 06:56:02 +00002855 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002856 if (!key)
2857 return;
2858
2859 value = PyDict_GetItem(self->entity, key);
2860
2861 if (value) {
2862 if (TreeBuilder_CheckExact(self->target))
2863 res = treebuilder_handle_data(
2864 (TreeBuilderObject*) self->target, value
2865 );
2866 else if (self->handle_data)
2867 res = PyObject_CallFunction(self->handle_data, "O", value);
2868 else
2869 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002870 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002871 } else if (!PyErr_Occurred()) {
2872 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00002873 char message[128] = "undefined entity ";
2874 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002875 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02002876 XML_ERROR_UNDEFINED_ENTITY,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002877 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02002878 EXPAT(GetErrorColumnNumber)(self->parser),
2879 message
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002880 );
2881 }
2882
2883 Py_DECREF(key);
2884}
2885
2886static void
2887expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2888 const XML_Char **attrib_in)
2889{
2890 PyObject* res;
2891 PyObject* tag;
2892 PyObject* attrib;
2893 int ok;
2894
2895 /* tag name */
2896 tag = makeuniversal(self, tag_in);
2897 if (!tag)
2898 return; /* parser will look for errors */
2899
2900 /* attributes */
2901 if (attrib_in[0]) {
2902 attrib = PyDict_New();
2903 if (!attrib)
2904 return;
2905 while (attrib_in[0] && attrib_in[1]) {
2906 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00002907 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002908 if (!key || !value) {
2909 Py_XDECREF(value);
2910 Py_XDECREF(key);
2911 Py_DECREF(attrib);
2912 return;
2913 }
2914 ok = PyDict_SetItem(attrib, key, value);
2915 Py_DECREF(value);
2916 Py_DECREF(key);
2917 if (ok < 0) {
2918 Py_DECREF(attrib);
2919 return;
2920 }
2921 attrib_in += 2;
2922 }
2923 } else {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002924 /* Pass an empty dictionary on */
Eli Bendersky48d358b2012-05-30 17:57:50 +03002925 attrib = PyDict_New();
2926 if (!attrib)
2927 return;
2928 }
2929
2930 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002931 /* shortcut */
2932 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2933 tag, attrib);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002934 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002935 else if (self->handle_start) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002936 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002937 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002938 res = NULL;
2939
2940 Py_DECREF(tag);
2941 Py_DECREF(attrib);
2942
2943 Py_XDECREF(res);
2944}
2945
2946static void
2947expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2948 int data_len)
2949{
2950 PyObject* data;
2951 PyObject* res;
2952
Neal Norwitz0269b912007-08-08 06:56:02 +00002953 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002954 if (!data)
2955 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002956
2957 if (TreeBuilder_CheckExact(self->target))
2958 /* shortcut */
2959 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
2960 else if (self->handle_data)
2961 res = PyObject_CallFunction(self->handle_data, "O", data);
2962 else
2963 res = NULL;
2964
2965 Py_DECREF(data);
2966
2967 Py_XDECREF(res);
2968}
2969
2970static void
2971expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
2972{
2973 PyObject* tag;
2974 PyObject* res = NULL;
2975
2976 if (TreeBuilder_CheckExact(self->target))
2977 /* shortcut */
2978 /* the standard tree builder doesn't look at the end tag */
2979 res = treebuilder_handle_end(
2980 (TreeBuilderObject*) self->target, Py_None
2981 );
2982 else if (self->handle_end) {
2983 tag = makeuniversal(self, tag_in);
2984 if (tag) {
2985 res = PyObject_CallFunction(self->handle_end, "O", tag);
2986 Py_DECREF(tag);
2987 }
2988 }
2989
2990 Py_XDECREF(res);
2991}
2992
2993static void
2994expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
2995 const XML_Char *uri)
2996{
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002997 PyObject* sprefix = NULL;
2998 PyObject* suri = NULL;
2999
3000 suri = PyUnicode_DecodeUTF8(uri, strlen(uri), "strict");
3001 if (!suri)
3002 return;
3003
3004 if (prefix)
3005 sprefix = PyUnicode_DecodeUTF8(prefix, strlen(prefix), "strict");
3006 else
3007 sprefix = PyUnicode_FromString("");
3008 if (!sprefix) {
3009 Py_DECREF(suri);
3010 return;
3011 }
3012
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003013 treebuilder_handle_namespace(
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003014 (TreeBuilderObject*) self->target, 1, sprefix, suri
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003015 );
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003016
3017 Py_DECREF(sprefix);
3018 Py_DECREF(suri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003019}
3020
3021static void
3022expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
3023{
3024 treebuilder_handle_namespace(
3025 (TreeBuilderObject*) self->target, 0, NULL, NULL
3026 );
3027}
3028
3029static void
3030expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
3031{
3032 PyObject* comment;
3033 PyObject* res;
3034
3035 if (self->handle_comment) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003036 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003037 if (comment) {
3038 res = PyObject_CallFunction(self->handle_comment, "O", comment);
3039 Py_XDECREF(res);
3040 Py_DECREF(comment);
3041 }
3042 }
3043}
3044
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003045static void
3046expat_start_doctype_handler(XMLParserObject *self,
3047 const XML_Char *doctype_name,
3048 const XML_Char *sysid,
3049 const XML_Char *pubid,
3050 int has_internal_subset)
3051{
3052 PyObject *self_pyobj = (PyObject *)self;
3053 PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
3054 PyObject *parser_doctype = NULL;
3055 PyObject *res = NULL;
3056
3057 doctype_name_obj = makeuniversal(self, doctype_name);
3058 if (!doctype_name_obj)
3059 return;
3060
3061 if (sysid) {
3062 sysid_obj = makeuniversal(self, sysid);
3063 if (!sysid_obj) {
3064 Py_DECREF(doctype_name_obj);
3065 return;
3066 }
3067 } else {
3068 Py_INCREF(Py_None);
3069 sysid_obj = Py_None;
3070 }
3071
3072 if (pubid) {
3073 pubid_obj = makeuniversal(self, pubid);
3074 if (!pubid_obj) {
3075 Py_DECREF(doctype_name_obj);
3076 Py_DECREF(sysid_obj);
3077 return;
3078 }
3079 } else {
3080 Py_INCREF(Py_None);
3081 pubid_obj = Py_None;
3082 }
3083
3084 /* If the target has a handler for doctype, call it. */
3085 if (self->handle_doctype) {
3086 res = PyObject_CallFunction(self->handle_doctype, "OOO",
3087 doctype_name_obj, pubid_obj, sysid_obj);
3088 Py_CLEAR(res);
3089 }
3090
3091 /* Now see if the parser itself has a doctype method. If yes and it's
3092 * a subclass, call it but warn about deprecation. If it's not a subclass
3093 * (i.e. vanilla XMLParser), do nothing.
3094 */
3095 parser_doctype = PyObject_GetAttrString(self_pyobj, "doctype");
3096 if (parser_doctype) {
3097 if (!XMLParser_CheckExact(self_pyobj)) {
3098 if (PyErr_WarnEx(PyExc_DeprecationWarning,
3099 "This method of XMLParser is deprecated. Define"
3100 " doctype() method on the TreeBuilder target.",
3101 1) < 0) {
3102 goto clear;
3103 }
3104 res = PyObject_CallFunction(parser_doctype, "OOO",
3105 doctype_name_obj, pubid_obj, sysid_obj);
3106 Py_CLEAR(res);
3107 }
3108 }
3109
3110clear:
3111 Py_XDECREF(parser_doctype);
3112 Py_DECREF(doctype_name_obj);
3113 Py_DECREF(pubid_obj);
3114 Py_DECREF(sysid_obj);
3115}
3116
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003117static void
3118expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
3119 const XML_Char* data_in)
3120{
3121 PyObject* target;
3122 PyObject* data;
3123 PyObject* res;
3124
3125 if (self->handle_pi) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003126 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3127 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003128 if (target && data) {
3129 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
3130 Py_XDECREF(res);
3131 Py_DECREF(data);
3132 Py_DECREF(target);
3133 } else {
3134 Py_XDECREF(data);
3135 Py_XDECREF(target);
3136 }
3137 }
3138}
3139
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003140static int
3141expat_unknown_encoding_handler(XMLParserObject *self, const XML_Char *name,
3142 XML_Encoding *info)
3143{
3144 PyObject* u;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003145 unsigned char s[256];
3146 int i;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003147 void *data;
3148 unsigned int kind;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003149
3150 memset(info, 0, sizeof(XML_Encoding));
3151
3152 for (i = 0; i < 256; i++)
3153 s[i] = i;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003154
Fredrik Lundhc3389992005-12-25 11:40:19 +00003155 u = PyUnicode_Decode((char*) s, 256, name, "replace");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003156 if (!u)
3157 return XML_STATUS_ERROR;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003158 if (PyUnicode_READY(u))
3159 return XML_STATUS_ERROR;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003160
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003161 if (PyUnicode_GET_LENGTH(u) != 256) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003162 Py_DECREF(u);
3163 return XML_STATUS_ERROR;
3164 }
3165
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003166 kind = PyUnicode_KIND(u);
3167 data = PyUnicode_DATA(u);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003168 for (i = 0; i < 256; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003169 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
3170 if (ch != Py_UNICODE_REPLACEMENT_CHARACTER)
3171 info->map[i] = ch;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003172 else
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003173 info->map[i] = -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003174 }
3175
3176 Py_DECREF(u);
3177
3178 return XML_STATUS_OK;
3179}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003180
3181/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003182
Eli Bendersky52467b12012-06-01 07:13:08 +03003183static PyObject *
3184xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003185{
Eli Bendersky52467b12012-06-01 07:13:08 +03003186 XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
3187 if (self) {
3188 self->parser = NULL;
3189 self->target = self->entity = self->names = NULL;
3190 self->handle_start = self->handle_data = self->handle_end = NULL;
3191 self->handle_comment = self->handle_pi = self->handle_close = NULL;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003192 self->handle_doctype = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003193 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003194 return (PyObject *)self;
3195}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003196
Eli Bendersky52467b12012-06-01 07:13:08 +03003197static int
3198xmlparser_init(PyObject *self, PyObject *args, PyObject *kwds)
3199{
3200 XMLParserObject *self_xp = (XMLParserObject *)self;
3201 PyObject *target = NULL, *html = NULL;
3202 char *encoding = NULL;
Eli Benderskyc68e1362012-06-03 06:09:42 +03003203 static char *kwlist[] = {"html", "target", "encoding", 0};
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003204
Eli Bendersky52467b12012-06-01 07:13:08 +03003205 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|OOz:XMLParser", kwlist,
3206 &html, &target, &encoding)) {
3207 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003208 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003209
Eli Bendersky52467b12012-06-01 07:13:08 +03003210 self_xp->entity = PyDict_New();
3211 if (!self_xp->entity)
3212 return -1;
3213
3214 self_xp->names = PyDict_New();
3215 if (!self_xp->names) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02003216 Py_CLEAR(self_xp->entity);
Eli Bendersky52467b12012-06-01 07:13:08 +03003217 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003218 }
3219
Eli Bendersky52467b12012-06-01 07:13:08 +03003220 self_xp->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
3221 if (!self_xp->parser) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02003222 Py_CLEAR(self_xp->entity);
3223 Py_CLEAR(self_xp->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003224 PyErr_NoMemory();
Eli Bendersky52467b12012-06-01 07:13:08 +03003225 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003226 }
3227
Eli Bendersky52467b12012-06-01 07:13:08 +03003228 if (target) {
3229 Py_INCREF(target);
3230 } else {
Eli Bendersky58d548d2012-05-29 15:45:16 +03003231 target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003232 if (!target) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02003233 Py_CLEAR(self_xp->entity);
3234 Py_CLEAR(self_xp->names);
Eli Bendersky52467b12012-06-01 07:13:08 +03003235 EXPAT(ParserFree)(self_xp->parser);
3236 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003237 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003238 }
3239 self_xp->target = target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003240
Eli Bendersky52467b12012-06-01 07:13:08 +03003241 self_xp->handle_start = PyObject_GetAttrString(target, "start");
3242 self_xp->handle_data = PyObject_GetAttrString(target, "data");
3243 self_xp->handle_end = PyObject_GetAttrString(target, "end");
3244 self_xp->handle_comment = PyObject_GetAttrString(target, "comment");
3245 self_xp->handle_pi = PyObject_GetAttrString(target, "pi");
3246 self_xp->handle_close = PyObject_GetAttrString(target, "close");
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003247 self_xp->handle_doctype = PyObject_GetAttrString(target, "doctype");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003248
3249 PyErr_Clear();
Eli Bendersky52467b12012-06-01 07:13:08 +03003250
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003251 /* configure parser */
Eli Bendersky52467b12012-06-01 07:13:08 +03003252 EXPAT(SetUserData)(self_xp->parser, self_xp);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003253 EXPAT(SetElementHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003254 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003255 (XML_StartElementHandler) expat_start_handler,
3256 (XML_EndElementHandler) expat_end_handler
3257 );
3258 EXPAT(SetDefaultHandlerExpand)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003259 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003260 (XML_DefaultHandler) expat_default_handler
3261 );
3262 EXPAT(SetCharacterDataHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003263 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003264 (XML_CharacterDataHandler) expat_data_handler
3265 );
Eli Bendersky52467b12012-06-01 07:13:08 +03003266 if (self_xp->handle_comment)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003267 EXPAT(SetCommentHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003268 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003269 (XML_CommentHandler) expat_comment_handler
3270 );
Eli Bendersky52467b12012-06-01 07:13:08 +03003271 if (self_xp->handle_pi)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003272 EXPAT(SetProcessingInstructionHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003273 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003274 (XML_ProcessingInstructionHandler) expat_pi_handler
3275 );
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003276 EXPAT(SetStartDoctypeDeclHandler)(
3277 self_xp->parser,
3278 (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
3279 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003280 EXPAT(SetUnknownEncodingHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003281 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003282 (XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL
3283 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003284
Eli Bendersky52467b12012-06-01 07:13:08 +03003285 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003286}
3287
Eli Bendersky52467b12012-06-01 07:13:08 +03003288static int
3289xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
3290{
3291 Py_VISIT(self->handle_close);
3292 Py_VISIT(self->handle_pi);
3293 Py_VISIT(self->handle_comment);
3294 Py_VISIT(self->handle_end);
3295 Py_VISIT(self->handle_data);
3296 Py_VISIT(self->handle_start);
3297
3298 Py_VISIT(self->target);
3299 Py_VISIT(self->entity);
3300 Py_VISIT(self->names);
3301
3302 return 0;
3303}
3304
3305static int
3306xmlparser_gc_clear(XMLParserObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003307{
3308 EXPAT(ParserFree)(self->parser);
3309
Antoine Pitrouc1948842012-10-01 23:40:37 +02003310 Py_CLEAR(self->handle_close);
3311 Py_CLEAR(self->handle_pi);
3312 Py_CLEAR(self->handle_comment);
3313 Py_CLEAR(self->handle_end);
3314 Py_CLEAR(self->handle_data);
3315 Py_CLEAR(self->handle_start);
3316 Py_CLEAR(self->handle_doctype);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003317
Antoine Pitrouc1948842012-10-01 23:40:37 +02003318 Py_CLEAR(self->target);
3319 Py_CLEAR(self->entity);
3320 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003321
Eli Bendersky52467b12012-06-01 07:13:08 +03003322 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003323}
3324
Eli Bendersky52467b12012-06-01 07:13:08 +03003325static void
3326xmlparser_dealloc(XMLParserObject* self)
3327{
3328 PyObject_GC_UnTrack(self);
3329 xmlparser_gc_clear(self);
3330 Py_TYPE(self)->tp_free((PyObject *)self);
3331}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003332
3333LOCAL(PyObject*)
3334expat_parse(XMLParserObject* self, char* data, int data_len, int final)
3335{
3336 int ok;
3337
3338 ok = EXPAT(Parse)(self->parser, data, data_len, final);
3339
3340 if (PyErr_Occurred())
3341 return NULL;
3342
3343 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003344 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02003345 EXPAT(GetErrorCode)(self->parser),
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003346 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02003347 EXPAT(GetErrorColumnNumber)(self->parser),
3348 NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003349 );
3350 return NULL;
3351 }
3352
3353 Py_RETURN_NONE;
3354}
3355
3356static PyObject*
3357xmlparser_close(XMLParserObject* self, PyObject* args)
3358{
3359 /* end feeding data to parser */
3360
3361 PyObject* res;
3362 if (!PyArg_ParseTuple(args, ":close"))
3363 return NULL;
3364
3365 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003366 if (!res)
3367 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003368
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003369 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003370 Py_DECREF(res);
3371 return treebuilder_done((TreeBuilderObject*) self->target);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003372 } if (self->handle_close) {
3373 Py_DECREF(res);
3374 return PyObject_CallFunction(self->handle_close, "");
3375 } else
3376 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003377}
3378
3379static PyObject*
3380xmlparser_feed(XMLParserObject* self, PyObject* args)
3381{
3382 /* feed data to parser */
3383
3384 char* data;
3385 int data_len;
3386 if (!PyArg_ParseTuple(args, "s#:feed", &data, &data_len))
3387 return NULL;
3388
3389 return expat_parse(self, data, data_len, 0);
3390}
3391
3392static PyObject*
3393xmlparser_parse(XMLParserObject* self, PyObject* args)
3394{
3395 /* (internal) parse until end of input stream */
3396
3397 PyObject* reader;
3398 PyObject* buffer;
Eli Benderskyf996e772012-03-16 05:53:30 +02003399 PyObject* temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003400 PyObject* res;
3401
3402 PyObject* fileobj;
3403 if (!PyArg_ParseTuple(args, "O:_parse", &fileobj))
3404 return NULL;
3405
3406 reader = PyObject_GetAttrString(fileobj, "read");
3407 if (!reader)
3408 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003409
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003410 /* read from open file object */
3411 for (;;) {
3412
3413 buffer = PyObject_CallFunction(reader, "i", 64*1024);
3414
3415 if (!buffer) {
3416 /* read failed (e.g. due to KeyboardInterrupt) */
3417 Py_DECREF(reader);
3418 return NULL;
3419 }
3420
Eli Benderskyf996e772012-03-16 05:53:30 +02003421 if (PyUnicode_CheckExact(buffer)) {
3422 /* A unicode object is encoded into bytes using UTF-8 */
3423 if (PyUnicode_GET_SIZE(buffer) == 0) {
3424 Py_DECREF(buffer);
3425 break;
3426 }
3427 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
Antoine Pitrouc1948842012-10-01 23:40:37 +02003428 Py_DECREF(buffer);
Eli Benderskyf996e772012-03-16 05:53:30 +02003429 if (!temp) {
3430 /* Propagate exception from PyUnicode_AsEncodedString */
Eli Benderskyf996e772012-03-16 05:53:30 +02003431 Py_DECREF(reader);
3432 return NULL;
3433 }
Eli Benderskyf996e772012-03-16 05:53:30 +02003434 buffer = temp;
3435 }
3436 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003437 Py_DECREF(buffer);
3438 break;
3439 }
3440
3441 res = expat_parse(
Christian Heimes72b710a2008-05-26 13:28:38 +00003442 self, PyBytes_AS_STRING(buffer), PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003443 );
3444
3445 Py_DECREF(buffer);
3446
3447 if (!res) {
3448 Py_DECREF(reader);
3449 return NULL;
3450 }
3451 Py_DECREF(res);
3452
3453 }
3454
3455 Py_DECREF(reader);
3456
3457 res = expat_parse(self, "", 0, 1);
3458
3459 if (res && TreeBuilder_CheckExact(self->target)) {
3460 Py_DECREF(res);
3461 return treebuilder_done((TreeBuilderObject*) self->target);
3462 }
3463
3464 return res;
3465}
3466
3467static PyObject*
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003468xmlparser_doctype(XMLParserObject *self, PyObject *args)
3469{
3470 Py_RETURN_NONE;
3471}
3472
3473static PyObject*
3474xmlparser_setevents(XMLParserObject *self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003475{
3476 /* activate element event reporting */
3477
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003478 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003479 TreeBuilderObject* target;
3480
3481 PyObject* events; /* event collector */
3482 PyObject* event_set = Py_None;
3483 if (!PyArg_ParseTuple(args, "O!|O:_setevents", &PyList_Type, &events,
3484 &event_set))
3485 return NULL;
3486
3487 if (!TreeBuilder_CheckExact(self->target)) {
3488 PyErr_SetString(
3489 PyExc_TypeError,
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003490 "event handling only supported for ElementTree.TreeBuilder "
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003491 "targets"
3492 );
3493 return NULL;
3494 }
3495
3496 target = (TreeBuilderObject*) self->target;
3497
3498 Py_INCREF(events);
3499 Py_XDECREF(target->events);
3500 target->events = events;
3501
3502 /* clear out existing events */
Antoine Pitrouc1948842012-10-01 23:40:37 +02003503 Py_CLEAR(target->start_event_obj);
3504 Py_CLEAR(target->end_event_obj);
3505 Py_CLEAR(target->start_ns_event_obj);
3506 Py_CLEAR(target->end_ns_event_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003507
3508 if (event_set == Py_None) {
3509 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003510 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003511 Py_RETURN_NONE;
3512 }
3513
3514 if (!PyTuple_Check(event_set)) /* FIXME: handle arbitrary sequences */
3515 goto error;
3516
3517 for (i = 0; i < PyTuple_GET_SIZE(event_set); i++) {
3518 PyObject* item = PyTuple_GET_ITEM(event_set, i);
3519 char* event;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003520 if (PyUnicode_Check(item)) {
3521 event = _PyUnicode_AsString(item);
Victor Stinner0477bf32010-03-22 12:11:44 +00003522 if (event == NULL)
3523 goto error;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003524 } else if (PyBytes_Check(item))
3525 event = PyBytes_AS_STRING(item);
3526 else {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003527 goto error;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003528 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003529 if (strcmp(event, "start") == 0) {
3530 Py_INCREF(item);
3531 target->start_event_obj = item;
3532 } else if (strcmp(event, "end") == 0) {
3533 Py_INCREF(item);
3534 Py_XDECREF(target->end_event_obj);
3535 target->end_event_obj = item;
3536 } else if (strcmp(event, "start-ns") == 0) {
3537 Py_INCREF(item);
3538 Py_XDECREF(target->start_ns_event_obj);
3539 target->start_ns_event_obj = item;
3540 EXPAT(SetNamespaceDeclHandler)(
3541 self->parser,
3542 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3543 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3544 );
3545 } else if (strcmp(event, "end-ns") == 0) {
3546 Py_INCREF(item);
3547 Py_XDECREF(target->end_ns_event_obj);
3548 target->end_ns_event_obj = item;
3549 EXPAT(SetNamespaceDeclHandler)(
3550 self->parser,
3551 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3552 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3553 );
3554 } else {
3555 PyErr_Format(
3556 PyExc_ValueError,
3557 "unknown event '%s'", event
3558 );
3559 return NULL;
3560 }
3561 }
3562
3563 Py_RETURN_NONE;
3564
3565 error:
3566 PyErr_SetString(
3567 PyExc_TypeError,
3568 "invalid event tuple"
3569 );
3570 return NULL;
3571}
3572
3573static PyMethodDef xmlparser_methods[] = {
3574 {"feed", (PyCFunction) xmlparser_feed, METH_VARARGS},
3575 {"close", (PyCFunction) xmlparser_close, METH_VARARGS},
3576 {"_parse", (PyCFunction) xmlparser_parse, METH_VARARGS},
3577 {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003578 {"doctype", (PyCFunction) xmlparser_doctype, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003579 {NULL, NULL}
3580};
3581
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003582static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003583xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003584{
Alexander Belopolskye239d232010-12-08 23:31:48 +00003585 if (PyUnicode_Check(nameobj)) {
3586 PyObject* res;
3587 if (PyUnicode_CompareWithASCIIString(nameobj, "entity") == 0)
3588 res = self->entity;
3589 else if (PyUnicode_CompareWithASCIIString(nameobj, "target") == 0)
3590 res = self->target;
3591 else if (PyUnicode_CompareWithASCIIString(nameobj, "version") == 0) {
3592 return PyUnicode_FromFormat(
3593 "Expat %d.%d.%d", XML_MAJOR_VERSION,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003594 XML_MINOR_VERSION, XML_MICRO_VERSION);
Alexander Belopolskye239d232010-12-08 23:31:48 +00003595 }
3596 else
3597 goto generic;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003598
Alexander Belopolskye239d232010-12-08 23:31:48 +00003599 Py_INCREF(res);
3600 return res;
3601 }
3602 generic:
3603 return PyObject_GenericGetAttr((PyObject*) self, nameobj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003604}
3605
Neal Norwitz227b5332006-03-22 09:28:35 +00003606static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003607 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08003608 "xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003609 /* methods */
Eli Bendersky52467b12012-06-01 07:13:08 +03003610 (destructor)xmlparser_dealloc, /* tp_dealloc */
3611 0, /* tp_print */
3612 0, /* tp_getattr */
3613 0, /* tp_setattr */
3614 0, /* tp_reserved */
3615 0, /* tp_repr */
3616 0, /* tp_as_number */
3617 0, /* tp_as_sequence */
3618 0, /* tp_as_mapping */
3619 0, /* tp_hash */
3620 0, /* tp_call */
3621 0, /* tp_str */
3622 (getattrofunc)xmlparser_getattro, /* tp_getattro */
3623 0, /* tp_setattro */
3624 0, /* tp_as_buffer */
3625 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3626 /* tp_flags */
3627 0, /* tp_doc */
3628 (traverseproc)xmlparser_gc_traverse, /* tp_traverse */
3629 (inquiry)xmlparser_gc_clear, /* tp_clear */
3630 0, /* tp_richcompare */
3631 0, /* tp_weaklistoffset */
3632 0, /* tp_iter */
3633 0, /* tp_iternext */
3634 xmlparser_methods, /* tp_methods */
3635 0, /* tp_members */
3636 0, /* tp_getset */
3637 0, /* tp_base */
3638 0, /* tp_dict */
3639 0, /* tp_descr_get */
3640 0, /* tp_descr_set */
3641 0, /* tp_dictoffset */
3642 (initproc)xmlparser_init, /* tp_init */
3643 PyType_GenericAlloc, /* tp_alloc */
3644 xmlparser_new, /* tp_new */
3645 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003646};
3647
3648#endif
3649
3650/* ==================================================================== */
3651/* python module interface */
3652
3653static PyMethodDef _functions[] = {
Eli Benderskya8736902013-01-05 06:26:39 -08003654 {"SubElement", (PyCFunction) subelement, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003655 {NULL, NULL}
3656};
3657
Martin v. Löwis1a214512008-06-11 05:26:20 +00003658
3659static struct PyModuleDef _elementtreemodule = {
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003660 PyModuleDef_HEAD_INIT,
3661 "_elementtree",
3662 NULL,
3663 -1,
3664 _functions,
3665 NULL,
3666 NULL,
3667 NULL,
3668 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00003669};
3670
Neal Norwitzf6657e62006-12-28 04:47:50 +00003671PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00003672PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003673{
Eli Bendersky64d11e62012-06-15 07:42:50 +03003674 PyObject *m, *temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003675
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003676 /* Initialize object types */
3677 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003678 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003679 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003680 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003681#if defined(USE_EXPAT)
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003682 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003683 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003684#endif
3685
Martin v. Löwis1a214512008-06-11 05:26:20 +00003686 m = PyModule_Create(&_elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003687 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00003688 return NULL;
3689
Eli Bendersky828efde2012-04-05 05:40:58 +03003690 if (!(temp = PyImport_ImportModule("copy")))
3691 return NULL;
3692 elementtree_deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
3693 Py_XDECREF(temp);
3694
3695 if (!(elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
3696 return NULL;
3697
Eli Bendersky20d41742012-06-01 09:48:37 +03003698 /* link against pyexpat */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003699 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
3700 if (expat_capi) {
3701 /* check that it's usable */
3702 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
3703 expat_capi->size < sizeof(struct PyExpat_CAPI) ||
3704 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
3705 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
Eli Bendersky52467b12012-06-01 07:13:08 +03003706 expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
Eli Benderskyef391ac2012-07-21 20:28:46 +03003707 PyErr_SetString(PyExc_ImportError,
3708 "pyexpat version is incompatible");
3709 return NULL;
Eli Bendersky52467b12012-06-01 07:13:08 +03003710 }
Eli Benderskyef391ac2012-07-21 20:28:46 +03003711 } else {
Eli Bendersky52467b12012-06-01 07:13:08 +03003712 return NULL;
Eli Benderskyef391ac2012-07-21 20:28:46 +03003713 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003714
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003715 elementtree_parseerror_obj = PyErr_NewException(
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003716 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003717 );
3718 Py_INCREF(elementtree_parseerror_obj);
3719 PyModule_AddObject(m, "ParseError", elementtree_parseerror_obj);
3720
Eli Bendersky092af1f2012-03-04 07:14:03 +02003721 Py_INCREF((PyObject *)&Element_Type);
3722 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
3723
Eli Bendersky58d548d2012-05-29 15:45:16 +03003724 Py_INCREF((PyObject *)&TreeBuilder_Type);
3725 PyModule_AddObject(m, "TreeBuilder", (PyObject *)&TreeBuilder_Type);
3726
Eli Bendersky52467b12012-06-01 07:13:08 +03003727#if defined(USE_EXPAT)
3728 Py_INCREF((PyObject *)&XMLParser_Type);
3729 PyModule_AddObject(m, "XMLParser", (PyObject *)&XMLParser_Type);
3730#endif
3731
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003732 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003733}