blob: 7bc18808faa55eb99f8b7f09cc1b235cf8da287b [file] [log] [blame]
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001/*
2 * ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003 * $Id: _elementtree.c 3473 2009-01-11 22:53:55Z fredrik $
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
5 * elementtree accelerator
6 *
7 * History:
8 * 1999-06-20 fl created (as part of sgmlop)
9 * 2001-05-29 fl effdom edition
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000010 * 2003-02-27 fl elementtree edition (alpha)
11 * 2004-06-03 fl updates for elementtree 1.2
Florent Xiclunaf15351d2010-03-13 23:24:31 +000012 * 2005-01-05 fl major optimization effort
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000013 * 2005-01-11 fl first public release (cElementTree 0.8)
14 * 2005-01-12 fl split element object into base and extras
15 * 2005-01-13 fl use tagged pointers for tail/text (cElementTree 0.9)
16 * 2005-01-17 fl added treebuilder close method
17 * 2005-01-17 fl fixed crash in getchildren
18 * 2005-01-18 fl removed observer api, added iterparse (cElementTree 0.9.3)
19 * 2005-01-23 fl revised iterparse api; added namespace event support (0.9.8)
20 * 2005-01-26 fl added VERSION module property (cElementTree 1.0)
21 * 2005-01-28 fl added remove method (1.0.1)
22 * 2005-03-01 fl added iselement function; fixed makeelement aliasing (1.0.2)
23 * 2005-03-13 fl export Comment and ProcessingInstruction/PI helpers
24 * 2005-03-26 fl added Comment and PI support to XMLParser
25 * 2005-03-27 fl event optimizations; complain about bogus events
26 * 2005-08-08 fl fixed read error handling in parse
27 * 2005-08-11 fl added runtime test for copy workaround (1.0.3)
28 * 2005-12-13 fl added expat_capi support (for xml.etree) (1.0.4)
29 * 2005-12-16 fl added support for non-standard encodings
Fredrik Lundh44ed4db2006-03-12 21:06:35 +000030 * 2006-03-08 fl fixed a couple of potential null-refs and leaks
31 * 2006-03-12 fl merge in 2.5 ssize_t changes
Florent Xiclunaf15351d2010-03-13 23:24:31 +000032 * 2007-08-25 fl call custom builder's close method from XMLParser
33 * 2007-08-31 fl added iter, extend from ET 1.3
34 * 2007-09-01 fl fixed ParseError exception, setslice source type, etc
35 * 2007-09-03 fl fixed handling of negative insert indexes
36 * 2007-09-04 fl added itertext from ET 1.3
37 * 2007-09-06 fl added position attribute to ParseError exception
38 * 2008-06-06 fl delay error reporting in iterparse (from Hrvoje Niksic)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000039 *
Florent Xiclunaf15351d2010-03-13 23:24:31 +000040 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
41 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000042 *
43 * info@pythonware.com
44 * http://www.pythonware.com
45 */
46
Fredrik Lundh6d52b552005-12-16 22:06:43 +000047/* Licensed to PSF under a Contributor Agreement. */
Florent Xiclunaf15351d2010-03-13 23:24:31 +000048/* See http://www.python.org/psf/license for licensing details. */
Fredrik Lundh6d52b552005-12-16 22:06:43 +000049
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000050#include "Python.h"
Eli Benderskyebf37a22012-04-03 22:02:37 +030051#include "structmember.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000052
Thomas Wouters00ee7ba2006-08-21 19:07:27 +000053#define VERSION "1.0.6"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000054
55/* -------------------------------------------------------------------- */
56/* configuration */
57
58/* Leave defined to include the expat-based XMLParser type */
59#define USE_EXPAT
60
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000061/* An element can hold this many children without extra memory
62 allocations. */
63#define STATIC_CHILDREN 4
64
65/* For best performance, chose a value so that 80-90% of all nodes
66 have no more than the given number of children. Set this to zero
67 to minimize the size of the element structure itself (this only
68 helps if you have lots of leaf nodes with attributes). */
69
70/* Also note that pymalloc always allocates blocks in multiples of
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010071 eight bytes. For the current C version of ElementTree, this means
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000072 that the number of children should be an even number, at least on
73 32-bit platforms. */
74
75/* -------------------------------------------------------------------- */
76
77#if 0
78static int memory = 0;
79#define ALLOC(size, comment)\
80do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
81#define RELEASE(size, comment)\
82do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
83#else
84#define ALLOC(size, comment)
85#define RELEASE(size, comment)
86#endif
87
88/* compiler tweaks */
89#if defined(_MSC_VER)
90#define LOCAL(type) static __inline type __fastcall
91#else
92#define LOCAL(type) static type
93#endif
94
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000095/* macros used to store 'join' flags in string object pointers. note
96 that all use of text and tail as object pointers must be wrapped in
97 JOIN_OBJ. see comments in the ElementObject definition for more
98 info. */
99#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
100#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
101#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~1))
102
103/* glue functions (see the init function for details) */
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000104static PyObject* elementtree_parseerror_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000105static PyObject* elementtree_deepcopy_obj;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000106static PyObject* elementtree_iter_obj;
107static PyObject* elementtree_itertext_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000108static PyObject* elementpath_obj;
109
110/* helpers */
111
112LOCAL(PyObject*)
113deepcopy(PyObject* object, PyObject* memo)
114{
115 /* do a deep copy of the given object */
116
117 PyObject* args;
118 PyObject* result;
119
120 if (!elementtree_deepcopy_obj) {
121 PyErr_SetString(
122 PyExc_RuntimeError,
123 "deepcopy helper not found"
124 );
125 return NULL;
126 }
127
128 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000129 if (!args)
130 return NULL;
131
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000132 Py_INCREF(object); PyTuple_SET_ITEM(args, 0, (PyObject*) object);
133 Py_INCREF(memo); PyTuple_SET_ITEM(args, 1, (PyObject*) memo);
134
135 result = PyObject_CallObject(elementtree_deepcopy_obj, args);
136
137 Py_DECREF(args);
138
139 return result;
140}
141
142LOCAL(PyObject*)
143list_join(PyObject* list)
144{
145 /* join list elements (destroying the list in the process) */
146
147 PyObject* joiner;
148 PyObject* function;
149 PyObject* args;
150 PyObject* result;
151
152 switch (PyList_GET_SIZE(list)) {
153 case 0:
154 Py_DECREF(list);
Christian Heimes72b710a2008-05-26 13:28:38 +0000155 return PyBytes_FromString("");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000156 case 1:
157 result = PyList_GET_ITEM(list, 0);
158 Py_INCREF(result);
159 Py_DECREF(list);
160 return result;
161 }
162
163 /* two or more elements: slice out a suitable separator from the
164 first member, and use that to join the entire list */
165
166 joiner = PySequence_GetSlice(PyList_GET_ITEM(list, 0), 0, 0);
167 if (!joiner)
168 return NULL;
169
170 function = PyObject_GetAttrString(joiner, "join");
171 if (!function) {
172 Py_DECREF(joiner);
173 return NULL;
174 }
175
176 args = PyTuple_New(1);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000177 if (!args)
178 return NULL;
179
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000180 PyTuple_SET_ITEM(args, 0, list);
181
182 result = PyObject_CallObject(function, args);
183
184 Py_DECREF(args); /* also removes list */
185 Py_DECREF(function);
186 Py_DECREF(joiner);
187
188 return result;
189}
190
Eli Bendersky48d358b2012-05-30 17:57:50 +0300191/* Is the given object an empty dictionary?
192*/
193static int
194is_empty_dict(PyObject *obj)
195{
196 return PyDict_CheckExact(obj) && PyDict_Size(obj) == 0;
197}
198
199
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000200/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200201/* the Element type */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000202
203typedef struct {
204
205 /* attributes (a dictionary object), or None if no attributes */
206 PyObject* attrib;
207
208 /* child elements */
209 int length; /* actual number of items */
210 int allocated; /* allocated items */
211
212 /* this either points to _children or to a malloced buffer */
213 PyObject* *children;
214
215 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100216
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000217} ElementObjectExtra;
218
219typedef struct {
220 PyObject_HEAD
221
222 /* element tag (a string). */
223 PyObject* tag;
224
225 /* text before first child. note that this is a tagged pointer;
226 use JOIN_OBJ to get the object pointer. the join flag is used
227 to distinguish lists created by the tree builder from lists
228 assigned to the attribute by application code; the former
229 should be joined before being returned to the user, the latter
230 should be left intact. */
231 PyObject* text;
232
233 /* text after this element, in parent. note that this is a tagged
234 pointer; use JOIN_OBJ to get the object pointer. */
235 PyObject* tail;
236
237 ElementObjectExtra* extra;
238
Eli Benderskyebf37a22012-04-03 22:02:37 +0300239 PyObject *weakreflist; /* For tp_weaklistoffset */
240
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000241} ElementObject;
242
Neal Norwitz227b5332006-03-22 09:28:35 +0000243static PyTypeObject Element_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000244
Christian Heimes90aa7642007-12-19 02:45:37 +0000245#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000246
247/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200248/* Element constructors and destructor */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000249
250LOCAL(int)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200251create_extra(ElementObject* self, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000252{
253 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
254 if (!self->extra)
255 return -1;
256
257 if (!attrib)
258 attrib = Py_None;
259
260 Py_INCREF(attrib);
261 self->extra->attrib = attrib;
262
263 self->extra->length = 0;
264 self->extra->allocated = STATIC_CHILDREN;
265 self->extra->children = self->extra->_children;
266
267 return 0;
268}
269
270LOCAL(void)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200271dealloc_extra(ElementObject* self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000272{
Eli Bendersky08b85292012-04-04 15:55:07 +0300273 ElementObjectExtra *myextra;
274 int i;
275
Eli Benderskyebf37a22012-04-03 22:02:37 +0300276 if (!self->extra)
277 return;
278
279 /* Avoid DECREFs calling into this code again (cycles, etc.)
280 */
Eli Bendersky08b85292012-04-04 15:55:07 +0300281 myextra = self->extra;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300282 self->extra = NULL;
283
284 Py_DECREF(myextra->attrib);
285
Eli Benderskyebf37a22012-04-03 22:02:37 +0300286 for (i = 0; i < myextra->length; i++)
287 Py_DECREF(myextra->children[i]);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000288
Eli Benderskyebf37a22012-04-03 22:02:37 +0300289 if (myextra->children != myextra->_children)
290 PyObject_Free(myextra->children);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000291
Eli Benderskyebf37a22012-04-03 22:02:37 +0300292 PyObject_Free(myextra);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000293}
294
Eli Bendersky092af1f2012-03-04 07:14:03 +0200295/* Convenience internal function to create new Element objects with the given
296 * tag and attributes.
297*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000298LOCAL(PyObject*)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200299create_new_element(PyObject* tag, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000300{
301 ElementObject* self;
302
Eli Bendersky0192ba32012-03-30 16:38:33 +0300303 self = PyObject_GC_New(ElementObject, &Element_Type);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000304 if (self == NULL)
305 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000306 self->extra = NULL;
307
Eli Bendersky48d358b2012-05-30 17:57:50 +0300308 if (attrib != Py_None && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200309 if (create_extra(self, attrib) < 0) {
Thomas Wouters477c8d52006-05-27 19:21:47 +0000310 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000311 return NULL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000312 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000313 }
314
315 Py_INCREF(tag);
316 self->tag = tag;
317
318 Py_INCREF(Py_None);
319 self->text = Py_None;
320
321 Py_INCREF(Py_None);
322 self->tail = Py_None;
323
Eli Benderskyebf37a22012-04-03 22:02:37 +0300324 self->weakreflist = NULL;
325
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000326 ALLOC(sizeof(ElementObject), "create element");
Eli Bendersky0192ba32012-03-30 16:38:33 +0300327 PyObject_GC_Track(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000328 return (PyObject*) self;
329}
330
Eli Bendersky092af1f2012-03-04 07:14:03 +0200331static PyObject *
332element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
333{
334 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
335 if (e != NULL) {
336 Py_INCREF(Py_None);
337 e->tag = Py_None;
338
339 Py_INCREF(Py_None);
340 e->text = Py_None;
341
342 Py_INCREF(Py_None);
343 e->tail = Py_None;
344
345 e->extra = NULL;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300346 e->weakreflist = NULL;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200347 }
348 return (PyObject *)e;
349}
350
Eli Bendersky737b1732012-05-29 06:02:56 +0300351/* Helper function for extracting the attrib dictionary from a keywords dict.
352 * This is required by some constructors/functions in this module that can
353 * either accept attrib as a keyword argument or all attributes splashed
354 * directly into *kwds.
355 * If there is no 'attrib' keyword, return an empty dict.
356 */
357static PyObject*
358get_attrib_from_keywords(PyObject *kwds)
359{
360 PyObject *attrib_str = PyUnicode_FromString("attrib");
361 PyObject *attrib = PyDict_GetItem(kwds, attrib_str);
362
363 if (attrib) {
364 /* If attrib was found in kwds, copy its value and remove it from
365 * kwds
366 */
367 if (!PyDict_Check(attrib)) {
368 Py_DECREF(attrib_str);
369 PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
370 Py_TYPE(attrib)->tp_name);
371 return NULL;
372 }
373 attrib = PyDict_Copy(attrib);
374 PyDict_DelItem(kwds, attrib_str);
375 } else {
376 attrib = PyDict_New();
377 }
378
379 Py_DECREF(attrib_str);
380
381 if (attrib)
382 PyDict_Update(attrib, kwds);
383 return attrib;
384}
385
Eli Bendersky092af1f2012-03-04 07:14:03 +0200386static int
387element_init(PyObject *self, PyObject *args, PyObject *kwds)
388{
389 PyObject *tag;
390 PyObject *tmp;
391 PyObject *attrib = NULL;
392 ElementObject *self_elem;
393
394 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
395 return -1;
396
Eli Bendersky737b1732012-05-29 06:02:56 +0300397 if (attrib) {
398 /* attrib passed as positional arg */
399 attrib = PyDict_Copy(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200400 if (!attrib)
401 return -1;
Eli Bendersky737b1732012-05-29 06:02:56 +0300402 if (kwds) {
403 if (PyDict_Update(attrib, kwds) < 0) {
404 return -1;
405 }
406 }
407 } else if (kwds) {
408 /* have keywords args */
409 attrib = get_attrib_from_keywords(kwds);
410 if (!attrib)
411 return -1;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200412 } else {
Eli Bendersky737b1732012-05-29 06:02:56 +0300413 /* no attrib arg, no kwds, so no attributes */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200414 Py_INCREF(Py_None);
415 attrib = Py_None;
416 }
417
418 self_elem = (ElementObject *)self;
419
Eli Bendersky48d358b2012-05-30 17:57:50 +0300420 if (attrib != Py_None && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200421 if (create_extra(self_elem, attrib) < 0) {
422 PyObject_Del(self_elem);
423 return -1;
424 }
425 }
426
Eli Bendersky48d358b2012-05-30 17:57:50 +0300427 /* We own a reference to attrib here and it's no longer needed. */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200428 Py_DECREF(attrib);
429
430 /* Replace the objects already pointed to by tag, text and tail. */
431 tmp = self_elem->tag;
432 self_elem->tag = tag;
433 Py_INCREF(tag);
434 Py_DECREF(tmp);
435
436 tmp = self_elem->text;
437 self_elem->text = Py_None;
438 Py_INCREF(Py_None);
439 Py_DECREF(JOIN_OBJ(tmp));
440
441 tmp = self_elem->tail;
442 self_elem->tail = Py_None;
443 Py_INCREF(Py_None);
444 Py_DECREF(JOIN_OBJ(tmp));
445
446 return 0;
447}
448
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000449LOCAL(int)
450element_resize(ElementObject* self, int extra)
451{
452 int size;
453 PyObject* *children;
454
455 /* make sure self->children can hold the given number of extra
456 elements. set an exception and return -1 if allocation failed */
457
458 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200459 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000460
461 size = self->extra->length + extra;
462
463 if (size > self->extra->allocated) {
464 /* use Python 2.4's list growth strategy */
465 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000466 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100467 * which needs at least 4 bytes.
468 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000469 * be safe.
470 */
471 size = size ? size : 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000472 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000473 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100474 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000475 * false alarm always assume at least one child to be safe.
476 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000477 children = PyObject_Realloc(self->extra->children,
478 size * sizeof(PyObject*));
479 if (!children)
480 goto nomemory;
481 } else {
482 children = PyObject_Malloc(size * sizeof(PyObject*));
483 if (!children)
484 goto nomemory;
485 /* copy existing children from static area to malloc buffer */
486 memcpy(children, self->extra->children,
487 self->extra->length * sizeof(PyObject*));
488 }
489 self->extra->children = children;
490 self->extra->allocated = size;
491 }
492
493 return 0;
494
495 nomemory:
496 PyErr_NoMemory();
497 return -1;
498}
499
500LOCAL(int)
501element_add_subelement(ElementObject* self, PyObject* element)
502{
503 /* add a child element to a parent */
504
505 if (element_resize(self, 1) < 0)
506 return -1;
507
508 Py_INCREF(element);
509 self->extra->children[self->extra->length] = element;
510
511 self->extra->length++;
512
513 return 0;
514}
515
516LOCAL(PyObject*)
517element_get_attrib(ElementObject* self)
518{
519 /* return borrowed reference to attrib dictionary */
520 /* note: this function assumes that the extra section exists */
521
522 PyObject* res = self->extra->attrib;
523
524 if (res == Py_None) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000525 Py_DECREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000526 /* create missing dictionary */
527 res = PyDict_New();
528 if (!res)
529 return NULL;
530 self->extra->attrib = res;
531 }
532
533 return res;
534}
535
536LOCAL(PyObject*)
537element_get_text(ElementObject* self)
538{
539 /* return borrowed reference to text attribute */
540
541 PyObject* res = self->text;
542
543 if (JOIN_GET(res)) {
544 res = JOIN_OBJ(res);
545 if (PyList_CheckExact(res)) {
546 res = list_join(res);
547 if (!res)
548 return NULL;
549 self->text = res;
550 }
551 }
552
553 return res;
554}
555
556LOCAL(PyObject*)
557element_get_tail(ElementObject* self)
558{
559 /* return borrowed reference to text attribute */
560
561 PyObject* res = self->tail;
562
563 if (JOIN_GET(res)) {
564 res = JOIN_OBJ(res);
565 if (PyList_CheckExact(res)) {
566 res = list_join(res);
567 if (!res)
568 return NULL;
569 self->tail = res;
570 }
571 }
572
573 return res;
574}
575
576static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300577subelement(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000578{
579 PyObject* elem;
580
581 ElementObject* parent;
582 PyObject* tag;
583 PyObject* attrib = NULL;
584 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
585 &Element_Type, &parent, &tag,
586 &PyDict_Type, &attrib))
587 return NULL;
588
Eli Bendersky737b1732012-05-29 06:02:56 +0300589 if (attrib) {
590 /* attrib passed as positional arg */
591 attrib = PyDict_Copy(attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000592 if (!attrib)
593 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300594 if (kwds) {
595 if (PyDict_Update(attrib, kwds) < 0) {
596 return NULL;
597 }
598 }
599 } else if (kwds) {
600 /* have keyword args */
601 attrib = get_attrib_from_keywords(kwds);
602 if (!attrib)
603 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000604 } else {
Eli Bendersky737b1732012-05-29 06:02:56 +0300605 /* no attrib arg, no kwds, so no attribute */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000606 Py_INCREF(Py_None);
607 attrib = Py_None;
608 }
609
Eli Bendersky092af1f2012-03-04 07:14:03 +0200610 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000611
612 Py_DECREF(attrib);
613
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000614 if (element_add_subelement(parent, elem) < 0) {
615 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000616 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000617 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000618
619 return elem;
620}
621
Eli Bendersky0192ba32012-03-30 16:38:33 +0300622static int
623element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
624{
625 Py_VISIT(self->tag);
626 Py_VISIT(JOIN_OBJ(self->text));
627 Py_VISIT(JOIN_OBJ(self->tail));
628
629 if (self->extra) {
630 int i;
631 Py_VISIT(self->extra->attrib);
632
633 for (i = 0; i < self->extra->length; ++i)
634 Py_VISIT(self->extra->children[i]);
635 }
636 return 0;
637}
638
639static int
640element_gc_clear(ElementObject *self)
641{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300642 Py_CLEAR(self->tag);
Eli Benderskyebf37a22012-04-03 22:02:37 +0300643
644 /* The following is like Py_CLEAR for self->text and self->tail, but
645 * written explicitily because the real pointers hide behind access
646 * macros.
647 */
648 if (self->text) {
649 PyObject *tmp = JOIN_OBJ(self->text);
650 self->text = NULL;
651 Py_DECREF(tmp);
652 }
653
654 if (self->tail) {
655 PyObject *tmp = JOIN_OBJ(self->tail);
656 self->tail = NULL;
657 Py_DECREF(tmp);
658 }
Eli Bendersky0192ba32012-03-30 16:38:33 +0300659
660 /* After dropping all references from extra, it's no longer valid anyway,
Eli Benderskyebf37a22012-04-03 22:02:37 +0300661 * so fully deallocate it.
Eli Bendersky0192ba32012-03-30 16:38:33 +0300662 */
Eli Benderskyebf37a22012-04-03 22:02:37 +0300663 dealloc_extra(self);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300664 return 0;
665}
666
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000667static void
668element_dealloc(ElementObject* self)
669{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300670 PyObject_GC_UnTrack(self);
Eli Benderskyebf37a22012-04-03 22:02:37 +0300671
672 if (self->weakreflist != NULL)
673 PyObject_ClearWeakRefs((PyObject *) self);
674
Eli Bendersky0192ba32012-03-30 16:38:33 +0300675 /* element_gc_clear clears all references and deallocates extra
676 */
677 element_gc_clear(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000678
679 RELEASE(sizeof(ElementObject), "destroy element");
Eli Bendersky092af1f2012-03-04 07:14:03 +0200680 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000681}
682
683/* -------------------------------------------------------------------- */
684/* methods (in alphabetical order) */
685
686static PyObject*
687element_append(ElementObject* self, PyObject* args)
688{
689 PyObject* element;
690 if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element))
691 return NULL;
692
693 if (element_add_subelement(self, element) < 0)
694 return NULL;
695
696 Py_RETURN_NONE;
697}
698
699static PyObject*
Eli Bendersky0192ba32012-03-30 16:38:33 +0300700element_clearmethod(ElementObject* self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000701{
702 if (!PyArg_ParseTuple(args, ":clear"))
703 return NULL;
704
Eli Benderskyebf37a22012-04-03 22:02:37 +0300705 dealloc_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000706
707 Py_INCREF(Py_None);
708 Py_DECREF(JOIN_OBJ(self->text));
709 self->text = Py_None;
710
711 Py_INCREF(Py_None);
712 Py_DECREF(JOIN_OBJ(self->tail));
713 self->tail = Py_None;
714
715 Py_RETURN_NONE;
716}
717
718static PyObject*
719element_copy(ElementObject* self, PyObject* args)
720{
721 int i;
722 ElementObject* element;
723
724 if (!PyArg_ParseTuple(args, ":__copy__"))
725 return NULL;
726
Eli Bendersky092af1f2012-03-04 07:14:03 +0200727 element = (ElementObject*) create_new_element(
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000728 self->tag, (self->extra) ? self->extra->attrib : Py_None
729 );
730 if (!element)
731 return NULL;
732
733 Py_DECREF(JOIN_OBJ(element->text));
734 element->text = self->text;
735 Py_INCREF(JOIN_OBJ(element->text));
736
737 Py_DECREF(JOIN_OBJ(element->tail));
738 element->tail = self->tail;
739 Py_INCREF(JOIN_OBJ(element->tail));
740
741 if (self->extra) {
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100742
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000743 if (element_resize(element, self->extra->length) < 0) {
744 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000745 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000746 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000747
748 for (i = 0; i < self->extra->length; i++) {
749 Py_INCREF(self->extra->children[i]);
750 element->extra->children[i] = self->extra->children[i];
751 }
752
753 element->extra->length = self->extra->length;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100754
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000755 }
756
757 return (PyObject*) element;
758}
759
760static PyObject*
761element_deepcopy(ElementObject* self, PyObject* args)
762{
763 int i;
764 ElementObject* element;
765 PyObject* tag;
766 PyObject* attrib;
767 PyObject* text;
768 PyObject* tail;
769 PyObject* id;
770
771 PyObject* memo;
772 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
773 return NULL;
774
775 tag = deepcopy(self->tag, memo);
776 if (!tag)
777 return NULL;
778
779 if (self->extra) {
780 attrib = deepcopy(self->extra->attrib, memo);
781 if (!attrib) {
782 Py_DECREF(tag);
783 return NULL;
784 }
785 } else {
786 Py_INCREF(Py_None);
787 attrib = Py_None;
788 }
789
Eli Bendersky092af1f2012-03-04 07:14:03 +0200790 element = (ElementObject*) create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000791
792 Py_DECREF(tag);
793 Py_DECREF(attrib);
794
795 if (!element)
796 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100797
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000798 text = deepcopy(JOIN_OBJ(self->text), memo);
799 if (!text)
800 goto error;
801 Py_DECREF(element->text);
802 element->text = JOIN_SET(text, JOIN_GET(self->text));
803
804 tail = deepcopy(JOIN_OBJ(self->tail), memo);
805 if (!tail)
806 goto error;
807 Py_DECREF(element->tail);
808 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
809
810 if (self->extra) {
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100811
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000812 if (element_resize(element, self->extra->length) < 0)
813 goto error;
814
815 for (i = 0; i < self->extra->length; i++) {
816 PyObject* child = deepcopy(self->extra->children[i], memo);
817 if (!child) {
818 element->extra->length = i;
819 goto error;
820 }
821 element->extra->children[i] = child;
822 }
823
824 element->extra->length = self->extra->length;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100825
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000826 }
827
828 /* add object to memo dictionary (so deepcopy won't visit it again) */
Christian Heimes217cfd12007-12-02 14:31:20 +0000829 id = PyLong_FromLong((Py_uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000830 if (!id)
831 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000832
833 i = PyDict_SetItem(memo, id, (PyObject*) element);
834
835 Py_DECREF(id);
836
837 if (i < 0)
838 goto error;
839
840 return (PyObject*) element;
841
842 error:
843 Py_DECREF(element);
844 return NULL;
845}
846
847LOCAL(int)
848checkpath(PyObject* tag)
849{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000850 Py_ssize_t i;
851 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000852
853 /* check if a tag contains an xpath character */
854
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000855#define PATHCHAR(ch) \
856 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000857
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000858 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200859 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
860 void *data = PyUnicode_DATA(tag);
861 unsigned int kind = PyUnicode_KIND(tag);
862 for (i = 0; i < len; i++) {
863 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
864 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000865 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200866 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000867 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200868 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000869 return 1;
870 }
871 return 0;
872 }
Christian Heimes72b710a2008-05-26 13:28:38 +0000873 if (PyBytes_Check(tag)) {
874 char *p = PyBytes_AS_STRING(tag);
875 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000876 if (p[i] == '{')
877 check = 0;
878 else if (p[i] == '}')
879 check = 1;
880 else if (check && PATHCHAR(p[i]))
881 return 1;
882 }
883 return 0;
884 }
885
886 return 1; /* unknown type; might be path expression */
887}
888
889static PyObject*
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000890element_extend(ElementObject* self, PyObject* args)
891{
892 PyObject* seq;
893 Py_ssize_t i, seqlen = 0;
894
895 PyObject* seq_in;
896 if (!PyArg_ParseTuple(args, "O:extend", &seq_in))
897 return NULL;
898
899 seq = PySequence_Fast(seq_in, "");
900 if (!seq) {
901 PyErr_Format(
902 PyExc_TypeError,
903 "expected sequence, not \"%.200s\"", Py_TYPE(seq_in)->tp_name
904 );
905 return NULL;
906 }
907
908 seqlen = PySequence_Size(seq);
909 for (i = 0; i < seqlen; i++) {
910 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
Eli Bendersky396e8fc2012-03-23 14:24:20 +0200911 if (!PyObject_IsInstance(element, (PyObject *)&Element_Type)) {
912 Py_DECREF(seq);
913 PyErr_Format(
914 PyExc_TypeError,
915 "expected an Element, not \"%.200s\"",
916 Py_TYPE(element)->tp_name);
917 return NULL;
918 }
919
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000920 if (element_add_subelement(self, element) < 0) {
921 Py_DECREF(seq);
922 return NULL;
923 }
924 }
925
926 Py_DECREF(seq);
927
928 Py_RETURN_NONE;
929}
930
931static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300932element_find(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000933{
934 int i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000935 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000936 PyObject* namespaces = Py_None;
Eli Bendersky737b1732012-05-29 06:02:56 +0300937 static char *kwlist[] = {"path", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200938
Eli Bendersky737b1732012-05-29 06:02:56 +0300939 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:find", kwlist,
940 &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000941 return NULL;
942
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200943 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200944 _Py_IDENTIFIER(find);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200945 return _PyObject_CallMethodId(
946 elementpath_obj, &PyId_find, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000947 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200948 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000949
950 if (!self->extra)
951 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100952
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000953 for (i = 0; i < self->extra->length; i++) {
954 PyObject* item = self->extra->children[i];
955 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +0000956 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000957 Py_INCREF(item);
958 return item;
959 }
960 }
961
962 Py_RETURN_NONE;
963}
964
965static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300966element_findtext(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000967{
968 int i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000969 PyObject* tag;
970 PyObject* default_value = Py_None;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000971 PyObject* namespaces = Py_None;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200972 _Py_IDENTIFIER(findtext);
Eli Bendersky737b1732012-05-29 06:02:56 +0300973 static char *kwlist[] = {"path", "default", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200974
Eli Bendersky737b1732012-05-29 06:02:56 +0300975 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO:findtext", kwlist,
976 &tag, &default_value, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000977 return NULL;
978
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000979 if (checkpath(tag) || namespaces != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200980 return _PyObject_CallMethodId(
981 elementpath_obj, &PyId_findtext, "OOOO", self, tag, default_value, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000982 );
983
984 if (!self->extra) {
985 Py_INCREF(default_value);
986 return default_value;
987 }
988
989 for (i = 0; i < self->extra->length; i++) {
990 ElementObject* item = (ElementObject*) self->extra->children[i];
Mark Dickinson211c6252009-02-01 10:28:51 +0000991 if (Element_CheckExact(item) && (PyObject_RichCompareBool(item->tag, tag, Py_EQ) == 1)) {
992
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000993 PyObject* text = element_get_text(item);
994 if (text == Py_None)
Christian Heimes72b710a2008-05-26 13:28:38 +0000995 return PyBytes_FromString("");
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000996 Py_XINCREF(text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000997 return text;
998 }
999 }
1000
1001 Py_INCREF(default_value);
1002 return default_value;
1003}
1004
1005static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001006element_findall(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001007{
1008 int i;
1009 PyObject* out;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001010 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001011 PyObject* namespaces = Py_None;
Eli Bendersky737b1732012-05-29 06:02:56 +03001012 static char *kwlist[] = {"path", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001013
Eli Bendersky737b1732012-05-29 06:02:56 +03001014 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:findall", kwlist,
1015 &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001016 return NULL;
1017
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001018 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001019 _Py_IDENTIFIER(findall);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001020 return _PyObject_CallMethodId(
1021 elementpath_obj, &PyId_findall, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001022 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001023 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001024
1025 out = PyList_New(0);
1026 if (!out)
1027 return NULL;
1028
1029 if (!self->extra)
1030 return out;
1031
1032 for (i = 0; i < self->extra->length; i++) {
1033 PyObject* item = self->extra->children[i];
1034 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +00001035 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001036 if (PyList_Append(out, item) < 0) {
1037 Py_DECREF(out);
1038 return NULL;
1039 }
1040 }
1041 }
1042
1043 return out;
1044}
1045
1046static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001047element_iterfind(ElementObject *self, PyObject *args, PyObject *kwds)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001048{
1049 PyObject* tag;
1050 PyObject* namespaces = Py_None;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001051 _Py_IDENTIFIER(iterfind);
Eli Bendersky737b1732012-05-29 06:02:56 +03001052 static char *kwlist[] = {"path", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001053
Eli Bendersky737b1732012-05-29 06:02:56 +03001054 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:iterfind", kwlist,
1055 &tag, &namespaces))
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001056 return NULL;
1057
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001058 return _PyObject_CallMethodId(
1059 elementpath_obj, &PyId_iterfind, "OOO", self, tag, namespaces
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001060 );
1061}
1062
1063static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001064element_get(ElementObject* self, PyObject* args)
1065{
1066 PyObject* value;
1067
1068 PyObject* key;
1069 PyObject* default_value = Py_None;
1070 if (!PyArg_ParseTuple(args, "O|O:get", &key, &default_value))
1071 return NULL;
1072
1073 if (!self->extra || self->extra->attrib == Py_None)
1074 value = default_value;
1075 else {
1076 value = PyDict_GetItem(self->extra->attrib, key);
1077 if (!value)
1078 value = default_value;
1079 }
1080
1081 Py_INCREF(value);
1082 return value;
1083}
1084
1085static PyObject*
1086element_getchildren(ElementObject* self, PyObject* args)
1087{
1088 int i;
1089 PyObject* list;
1090
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001091 /* FIXME: report as deprecated? */
1092
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001093 if (!PyArg_ParseTuple(args, ":getchildren"))
1094 return NULL;
1095
1096 if (!self->extra)
1097 return PyList_New(0);
1098
1099 list = PyList_New(self->extra->length);
1100 if (!list)
1101 return NULL;
1102
1103 for (i = 0; i < self->extra->length; i++) {
1104 PyObject* item = self->extra->children[i];
1105 Py_INCREF(item);
1106 PyList_SET_ITEM(list, i, item);
1107 }
1108
1109 return list;
1110}
1111
1112static PyObject*
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001113element_iter(ElementObject* self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001114{
1115 PyObject* result;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001116
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001117 PyObject* tag = Py_None;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001118 if (!PyArg_ParseTuple(args, "|O:iter", &tag))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001119 return NULL;
1120
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001121 if (!elementtree_iter_obj) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001122 PyErr_SetString(
1123 PyExc_RuntimeError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001124 "iter helper not found"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001125 );
1126 return NULL;
1127 }
1128
1129 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001130 if (!args)
1131 return NULL;
Neal Norwitz02876df2006-02-07 06:58:52 +00001132
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001133 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
1134 Py_INCREF(tag); PyTuple_SET_ITEM(args, 1, (PyObject*) tag);
1135
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001136 result = PyObject_CallObject(elementtree_iter_obj, args);
1137
1138 Py_DECREF(args);
1139
1140 return result;
1141}
1142
1143
1144static PyObject*
1145element_itertext(ElementObject* self, PyObject* args)
1146{
1147 PyObject* result;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001148
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001149 if (!PyArg_ParseTuple(args, ":itertext"))
1150 return NULL;
1151
1152 if (!elementtree_itertext_obj) {
1153 PyErr_SetString(
1154 PyExc_RuntimeError,
1155 "itertext helper not found"
1156 );
1157 return NULL;
1158 }
1159
1160 args = PyTuple_New(1);
1161 if (!args)
1162 return NULL;
1163
1164 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
1165
1166 result = PyObject_CallObject(elementtree_itertext_obj, args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001167
1168 Py_DECREF(args);
1169
1170 return result;
1171}
1172
1173static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001174element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001175{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001176 ElementObject* self = (ElementObject*) self_;
1177
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001178 if (!self->extra || index < 0 || index >= self->extra->length) {
1179 PyErr_SetString(
1180 PyExc_IndexError,
1181 "child index out of range"
1182 );
1183 return NULL;
1184 }
1185
1186 Py_INCREF(self->extra->children[index]);
1187 return self->extra->children[index];
1188}
1189
1190static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001191element_insert(ElementObject* self, PyObject* args)
1192{
1193 int i;
1194
1195 int index;
1196 PyObject* element;
1197 if (!PyArg_ParseTuple(args, "iO!:insert", &index,
1198 &Element_Type, &element))
1199 return NULL;
1200
1201 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001202 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001203
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001204 if (index < 0) {
1205 index += self->extra->length;
1206 if (index < 0)
1207 index = 0;
1208 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001209 if (index > self->extra->length)
1210 index = self->extra->length;
1211
1212 if (element_resize(self, 1) < 0)
1213 return NULL;
1214
1215 for (i = self->extra->length; i > index; i--)
1216 self->extra->children[i] = self->extra->children[i-1];
1217
1218 Py_INCREF(element);
1219 self->extra->children[index] = element;
1220
1221 self->extra->length++;
1222
1223 Py_RETURN_NONE;
1224}
1225
1226static PyObject*
1227element_items(ElementObject* self, PyObject* args)
1228{
1229 if (!PyArg_ParseTuple(args, ":items"))
1230 return NULL;
1231
1232 if (!self->extra || self->extra->attrib == Py_None)
1233 return PyList_New(0);
1234
1235 return PyDict_Items(self->extra->attrib);
1236}
1237
1238static PyObject*
1239element_keys(ElementObject* self, PyObject* args)
1240{
1241 if (!PyArg_ParseTuple(args, ":keys"))
1242 return NULL;
1243
1244 if (!self->extra || self->extra->attrib == Py_None)
1245 return PyList_New(0);
1246
1247 return PyDict_Keys(self->extra->attrib);
1248}
1249
Martin v. Löwis18e16552006-02-15 17:27:45 +00001250static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001251element_length(ElementObject* self)
1252{
1253 if (!self->extra)
1254 return 0;
1255
1256 return self->extra->length;
1257}
1258
1259static PyObject*
1260element_makeelement(PyObject* self, PyObject* args, PyObject* kw)
1261{
1262 PyObject* elem;
1263
1264 PyObject* tag;
1265 PyObject* attrib;
1266 if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib))
1267 return NULL;
1268
1269 attrib = PyDict_Copy(attrib);
1270 if (!attrib)
1271 return NULL;
1272
Eli Bendersky092af1f2012-03-04 07:14:03 +02001273 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001274
1275 Py_DECREF(attrib);
1276
1277 return elem;
1278}
1279
1280static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001281element_remove(ElementObject* self, PyObject* args)
1282{
1283 int i;
1284
1285 PyObject* element;
1286 if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element))
1287 return NULL;
1288
1289 if (!self->extra) {
1290 /* element has no children, so raise exception */
1291 PyErr_SetString(
1292 PyExc_ValueError,
1293 "list.remove(x): x not in list"
1294 );
1295 return NULL;
1296 }
1297
1298 for (i = 0; i < self->extra->length; i++) {
1299 if (self->extra->children[i] == element)
1300 break;
Mark Dickinson211c6252009-02-01 10:28:51 +00001301 if (PyObject_RichCompareBool(self->extra->children[i], element, Py_EQ) == 1)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001302 break;
1303 }
1304
1305 if (i == self->extra->length) {
1306 /* element is not in children, so raise exception */
1307 PyErr_SetString(
1308 PyExc_ValueError,
1309 "list.remove(x): x not in list"
1310 );
1311 return NULL;
1312 }
1313
1314 Py_DECREF(self->extra->children[i]);
1315
1316 self->extra->length--;
1317
1318 for (; i < self->extra->length; i++)
1319 self->extra->children[i] = self->extra->children[i+1];
1320
1321 Py_RETURN_NONE;
1322}
1323
1324static PyObject*
1325element_repr(ElementObject* self)
1326{
Eli Bendersky092af1f2012-03-04 07:14:03 +02001327 if (self->tag)
1328 return PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1329 else
1330 return PyUnicode_FromFormat("<Element at %p>", self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001331}
1332
1333static PyObject*
1334element_set(ElementObject* self, PyObject* args)
1335{
1336 PyObject* attrib;
1337
1338 PyObject* key;
1339 PyObject* value;
1340 if (!PyArg_ParseTuple(args, "OO:set", &key, &value))
1341 return NULL;
1342
1343 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001344 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001345
1346 attrib = element_get_attrib(self);
1347 if (!attrib)
1348 return NULL;
1349
1350 if (PyDict_SetItem(attrib, key, value) < 0)
1351 return NULL;
1352
1353 Py_RETURN_NONE;
1354}
1355
1356static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001357element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001358{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001359 ElementObject* self = (ElementObject*) self_;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001360 int i;
1361 PyObject* old;
1362
1363 if (!self->extra || index < 0 || index >= self->extra->length) {
1364 PyErr_SetString(
1365 PyExc_IndexError,
1366 "child assignment index out of range");
1367 return -1;
1368 }
1369
1370 old = self->extra->children[index];
1371
1372 if (item) {
1373 Py_INCREF(item);
1374 self->extra->children[index] = item;
1375 } else {
1376 self->extra->length--;
1377 for (i = index; i < self->extra->length; i++)
1378 self->extra->children[i] = self->extra->children[i+1];
1379 }
1380
1381 Py_DECREF(old);
1382
1383 return 0;
1384}
1385
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001386static PyObject*
1387element_subscr(PyObject* self_, PyObject* item)
1388{
1389 ElementObject* self = (ElementObject*) self_;
1390
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001391 if (PyIndex_Check(item)) {
1392 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001393
1394 if (i == -1 && PyErr_Occurred()) {
1395 return NULL;
1396 }
1397 if (i < 0 && self->extra)
1398 i += self->extra->length;
1399 return element_getitem(self_, i);
1400 }
1401 else if (PySlice_Check(item)) {
1402 Py_ssize_t start, stop, step, slicelen, cur, i;
1403 PyObject* list;
1404
1405 if (!self->extra)
1406 return PyList_New(0);
1407
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001408 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001409 self->extra->length,
1410 &start, &stop, &step, &slicelen) < 0) {
1411 return NULL;
1412 }
1413
1414 if (slicelen <= 0)
1415 return PyList_New(0);
1416 else {
1417 list = PyList_New(slicelen);
1418 if (!list)
1419 return NULL;
1420
1421 for (cur = start, i = 0; i < slicelen;
1422 cur += step, i++) {
1423 PyObject* item = self->extra->children[cur];
1424 Py_INCREF(item);
1425 PyList_SET_ITEM(list, i, item);
1426 }
1427
1428 return list;
1429 }
1430 }
1431 else {
1432 PyErr_SetString(PyExc_TypeError,
1433 "element indices must be integers");
1434 return NULL;
1435 }
1436}
1437
1438static int
1439element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1440{
1441 ElementObject* self = (ElementObject*) self_;
1442
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001443 if (PyIndex_Check(item)) {
1444 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001445
1446 if (i == -1 && PyErr_Occurred()) {
1447 return -1;
1448 }
1449 if (i < 0 && self->extra)
1450 i += self->extra->length;
1451 return element_setitem(self_, i, value);
1452 }
1453 else if (PySlice_Check(item)) {
1454 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1455
1456 PyObject* recycle = NULL;
1457 PyObject* seq = NULL;
1458
1459 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001460 create_extra(self, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001461
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001462 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001463 self->extra->length,
1464 &start, &stop, &step, &slicelen) < 0) {
1465 return -1;
1466 }
1467
Eli Bendersky865756a2012-03-09 13:38:15 +02001468 if (value == NULL) {
1469 /* Delete slice */
1470 size_t cur;
1471 Py_ssize_t i;
1472
1473 if (slicelen <= 0)
1474 return 0;
1475
1476 /* Since we're deleting, the direction of the range doesn't matter,
1477 * so for simplicity make it always ascending.
1478 */
1479 if (step < 0) {
1480 stop = start + 1;
1481 start = stop + step * (slicelen - 1) - 1;
1482 step = -step;
1483 }
1484
1485 assert((size_t)slicelen <= PY_SIZE_MAX / sizeof(PyObject *));
1486
1487 /* recycle is a list that will contain all the children
1488 * scheduled for removal.
1489 */
1490 if (!(recycle = PyList_New(slicelen))) {
1491 PyErr_NoMemory();
1492 return -1;
1493 }
1494
1495 /* This loop walks over all the children that have to be deleted,
1496 * with cur pointing at them. num_moved is the amount of children
1497 * until the next deleted child that have to be "shifted down" to
1498 * occupy the deleted's places.
1499 * Note that in the ith iteration, shifting is done i+i places down
1500 * because i children were already removed.
1501 */
1502 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1503 /* Compute how many children have to be moved, clipping at the
1504 * list end.
1505 */
1506 Py_ssize_t num_moved = step - 1;
1507 if (cur + step >= (size_t)self->extra->length) {
1508 num_moved = self->extra->length - cur - 1;
1509 }
1510
1511 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1512
1513 memmove(
1514 self->extra->children + cur - i,
1515 self->extra->children + cur + 1,
1516 num_moved * sizeof(PyObject *));
1517 }
1518
1519 /* Leftover "tail" after the last removed child */
1520 cur = start + (size_t)slicelen * step;
1521 if (cur < (size_t)self->extra->length) {
1522 memmove(
1523 self->extra->children + cur - slicelen,
1524 self->extra->children + cur,
1525 (self->extra->length - cur) * sizeof(PyObject *));
1526 }
1527
1528 self->extra->length -= slicelen;
1529
1530 /* Discard the recycle list with all the deleted sub-elements */
1531 Py_XDECREF(recycle);
1532 return 0;
1533 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001534 else {
Eli Bendersky865756a2012-03-09 13:38:15 +02001535 /* A new slice is actually being assigned */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001536 seq = PySequence_Fast(value, "");
1537 if (!seq) {
1538 PyErr_Format(
1539 PyExc_TypeError,
1540 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1541 );
1542 return -1;
1543 }
1544 newlen = PySequence_Size(seq);
1545 }
1546
1547 if (step != 1 && newlen != slicelen)
1548 {
1549 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001550 "attempt to assign sequence of size %zd "
1551 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001552 newlen, slicelen
1553 );
1554 return -1;
1555 }
1556
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001557 /* Resize before creating the recycle bin, to prevent refleaks. */
1558 if (newlen > slicelen) {
1559 if (element_resize(self, newlen - slicelen) < 0) {
1560 if (seq) {
1561 Py_DECREF(seq);
1562 }
1563 return -1;
1564 }
1565 }
1566
1567 if (slicelen > 0) {
1568 /* to avoid recursive calls to this method (via decref), move
1569 old items to the recycle bin here, and get rid of them when
1570 we're done modifying the element */
1571 recycle = PyList_New(slicelen);
1572 if (!recycle) {
1573 if (seq) {
1574 Py_DECREF(seq);
1575 }
1576 return -1;
1577 }
1578 for (cur = start, i = 0; i < slicelen;
1579 cur += step, i++)
1580 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1581 }
1582
1583 if (newlen < slicelen) {
1584 /* delete slice */
1585 for (i = stop; i < self->extra->length; i++)
1586 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1587 } else if (newlen > slicelen) {
1588 /* insert slice */
1589 for (i = self->extra->length-1; i >= stop; i--)
1590 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1591 }
1592
1593 /* replace the slice */
1594 for (cur = start, i = 0; i < newlen;
1595 cur += step, i++) {
1596 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1597 Py_INCREF(element);
1598 self->extra->children[cur] = element;
1599 }
1600
1601 self->extra->length += newlen - slicelen;
1602
1603 if (seq) {
1604 Py_DECREF(seq);
1605 }
1606
1607 /* discard the recycle bin, and everything in it */
1608 Py_XDECREF(recycle);
1609
1610 return 0;
1611 }
1612 else {
1613 PyErr_SetString(PyExc_TypeError,
1614 "element indices must be integers");
1615 return -1;
1616 }
1617}
1618
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001619static PyMethodDef element_methods[] = {
1620
Eli Bendersky0192ba32012-03-30 16:38:33 +03001621 {"clear", (PyCFunction) element_clearmethod, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001622
1623 {"get", (PyCFunction) element_get, METH_VARARGS},
1624 {"set", (PyCFunction) element_set, METH_VARARGS},
1625
Eli Bendersky737b1732012-05-29 06:02:56 +03001626 {"find", (PyCFunction) element_find, METH_VARARGS | METH_KEYWORDS},
1627 {"findtext", (PyCFunction) element_findtext, METH_VARARGS | METH_KEYWORDS},
1628 {"findall", (PyCFunction) element_findall, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001629
1630 {"append", (PyCFunction) element_append, METH_VARARGS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001631 {"extend", (PyCFunction) element_extend, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001632 {"insert", (PyCFunction) element_insert, METH_VARARGS},
1633 {"remove", (PyCFunction) element_remove, METH_VARARGS},
1634
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001635 {"iter", (PyCFunction) element_iter, METH_VARARGS},
1636 {"itertext", (PyCFunction) element_itertext, METH_VARARGS},
Eli Bendersky737b1732012-05-29 06:02:56 +03001637 {"iterfind", (PyCFunction) element_iterfind, METH_VARARGS | METH_KEYWORDS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001638
1639 {"getiterator", (PyCFunction) element_iter, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001640 {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS},
1641
1642 {"items", (PyCFunction) element_items, METH_VARARGS},
1643 {"keys", (PyCFunction) element_keys, METH_VARARGS},
1644
1645 {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS},
1646
1647 {"__copy__", (PyCFunction) element_copy, METH_VARARGS},
1648 {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS},
1649
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001650 {NULL, NULL}
1651};
1652
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001653static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001654element_getattro(ElementObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001655{
1656 PyObject* res;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001657 char *name = "";
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001658
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001659 if (PyUnicode_Check(nameobj))
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001660 name = _PyUnicode_AsString(nameobj);
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001661
Alexander Belopolskye239d232010-12-08 23:31:48 +00001662 if (name == NULL)
1663 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001664
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001665 /* handle common attributes first */
1666 if (strcmp(name, "tag") == 0) {
1667 res = self->tag;
1668 Py_INCREF(res);
1669 return res;
1670 } else if (strcmp(name, "text") == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001671 res = element_get_text(self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001672 Py_INCREF(res);
1673 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001674 }
1675
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001676 /* methods */
1677 res = PyObject_GenericGetAttr((PyObject*) self, nameobj);
1678 if (res)
1679 return res;
1680
1681 /* less common attributes */
1682 if (strcmp(name, "tail") == 0) {
1683 PyErr_Clear();
1684 res = element_get_tail(self);
1685 } else if (strcmp(name, "attrib") == 0) {
1686 PyErr_Clear();
1687 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001688 create_extra(self, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001689 res = element_get_attrib(self);
1690 }
1691
1692 if (!res)
1693 return NULL;
1694
1695 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001696 return res;
1697}
1698
Eli Benderskyb20df952012-05-20 06:33:29 +03001699static PyObject*
1700element_setattro(ElementObject* self, PyObject* nameobj, PyObject* value)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001701{
Eli Benderskyb20df952012-05-20 06:33:29 +03001702 char *name = "";
1703 if (PyUnicode_Check(nameobj))
1704 name = _PyUnicode_AsString(nameobj);
1705
1706 if (name == NULL)
1707 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001708
1709 if (strcmp(name, "tag") == 0) {
1710 Py_DECREF(self->tag);
1711 self->tag = value;
1712 Py_INCREF(self->tag);
1713 } else if (strcmp(name, "text") == 0) {
1714 Py_DECREF(JOIN_OBJ(self->text));
1715 self->text = value;
1716 Py_INCREF(self->text);
1717 } else if (strcmp(name, "tail") == 0) {
1718 Py_DECREF(JOIN_OBJ(self->tail));
1719 self->tail = value;
1720 Py_INCREF(self->tail);
1721 } else if (strcmp(name, "attrib") == 0) {
1722 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001723 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001724 Py_DECREF(self->extra->attrib);
1725 self->extra->attrib = value;
1726 Py_INCREF(self->extra->attrib);
1727 } else {
1728 PyErr_SetString(PyExc_AttributeError, name);
Eli Benderskyb20df952012-05-20 06:33:29 +03001729 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001730 }
1731
Eli Benderskyb20df952012-05-20 06:33:29 +03001732 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001733}
1734
1735static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001736 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001737 0, /* sq_concat */
1738 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001739 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001740 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001741 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001742 0,
1743};
1744
1745static PyMappingMethods element_as_mapping = {
1746 (lenfunc) element_length,
1747 (binaryfunc) element_subscr,
1748 (objobjargproc) element_ass_subscr,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001749};
1750
Neal Norwitz227b5332006-03-22 09:28:35 +00001751static PyTypeObject Element_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001752 PyVarObject_HEAD_INIT(NULL, 0)
1753 "Element", sizeof(ElementObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001754 /* methods */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001755 (destructor)element_dealloc, /* tp_dealloc */
1756 0, /* tp_print */
1757 0, /* tp_getattr */
Eli Benderskyb20df952012-05-20 06:33:29 +03001758 0, /* tp_setattr */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001759 0, /* tp_reserved */
1760 (reprfunc)element_repr, /* tp_repr */
1761 0, /* tp_as_number */
1762 &element_as_sequence, /* tp_as_sequence */
1763 &element_as_mapping, /* tp_as_mapping */
1764 0, /* tp_hash */
1765 0, /* tp_call */
1766 0, /* tp_str */
1767 (getattrofunc)element_getattro, /* tp_getattro */
Eli Benderskyb20df952012-05-20 06:33:29 +03001768 (setattrofunc)element_setattro, /* tp_setattro */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001769 0, /* tp_as_buffer */
Eli Bendersky0192ba32012-03-30 16:38:33 +03001770 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
1771 /* tp_flags */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001772 0, /* tp_doc */
Eli Bendersky0192ba32012-03-30 16:38:33 +03001773 (traverseproc)element_gc_traverse, /* tp_traverse */
1774 (inquiry)element_gc_clear, /* tp_clear */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001775 0, /* tp_richcompare */
Eli Benderskyebf37a22012-04-03 22:02:37 +03001776 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001777 0, /* tp_iter */
1778 0, /* tp_iternext */
1779 element_methods, /* tp_methods */
1780 0, /* tp_members */
1781 0, /* tp_getset */
1782 0, /* tp_base */
1783 0, /* tp_dict */
1784 0, /* tp_descr_get */
1785 0, /* tp_descr_set */
1786 0, /* tp_dictoffset */
1787 (initproc)element_init, /* tp_init */
1788 PyType_GenericAlloc, /* tp_alloc */
1789 element_new, /* tp_new */
1790 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001791};
1792
1793/* ==================================================================== */
1794/* the tree builder type */
1795
1796typedef struct {
1797 PyObject_HEAD
1798
Eli Bendersky58d548d2012-05-29 15:45:16 +03001799 PyObject *root; /* root node (first created node) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001800
Eli Bendersky58d548d2012-05-29 15:45:16 +03001801 ElementObject *this; /* current node */
1802 ElementObject *last; /* most recently created node */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001803
Eli Bendersky58d548d2012-05-29 15:45:16 +03001804 PyObject *data; /* data collector (string or list), or NULL */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001805
Eli Bendersky58d548d2012-05-29 15:45:16 +03001806 PyObject *stack; /* element stack */
1807 Py_ssize_t index; /* current stack size (0 means empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001808
Eli Bendersky48d358b2012-05-30 17:57:50 +03001809 PyObject *element_factory;
1810
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001811 /* element tracing */
Eli Bendersky58d548d2012-05-29 15:45:16 +03001812 PyObject *events; /* list of events, or NULL if not collecting */
1813 PyObject *start_event_obj; /* event objects (NULL to ignore) */
1814 PyObject *end_event_obj;
1815 PyObject *start_ns_event_obj;
1816 PyObject *end_ns_event_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001817} TreeBuilderObject;
1818
Neal Norwitz227b5332006-03-22 09:28:35 +00001819static PyTypeObject TreeBuilder_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001820
Christian Heimes90aa7642007-12-19 02:45:37 +00001821#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001822
1823/* -------------------------------------------------------------------- */
1824/* constructor and destructor */
1825
Eli Bendersky58d548d2012-05-29 15:45:16 +03001826static PyObject *
1827treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001828{
Eli Bendersky58d548d2012-05-29 15:45:16 +03001829 TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
1830 if (t != NULL) {
1831 t->root = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001832
Eli Bendersky58d548d2012-05-29 15:45:16 +03001833 Py_INCREF(Py_None);
1834 t->this = (ElementObject *)Py_None;
1835 Py_INCREF(Py_None);
1836 t->last = (ElementObject *)Py_None;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001837
Eli Bendersky58d548d2012-05-29 15:45:16 +03001838 t->data = NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03001839 t->element_factory = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03001840 t->stack = PyList_New(20);
1841 if (!t->stack) {
1842 Py_DECREF(t->this);
1843 Py_DECREF(t->last);
1844 return NULL;
1845 }
1846 t->index = 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001847
Eli Bendersky58d548d2012-05-29 15:45:16 +03001848 t->events = NULL;
1849 t->start_event_obj = t->end_event_obj = NULL;
1850 t->start_ns_event_obj = t->end_ns_event_obj = NULL;
1851 }
1852 return (PyObject *)t;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001853}
1854
Eli Bendersky58d548d2012-05-29 15:45:16 +03001855static int
1856treebuilder_init(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001857{
Eli Bendersky48d358b2012-05-30 17:57:50 +03001858 static char *kwlist[] = {"element_factory", NULL};
1859 PyObject *element_factory = NULL;
1860 TreeBuilderObject *self_tb = (TreeBuilderObject *)self;
1861
1862 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:TreeBuilder", kwlist,
1863 &element_factory)) {
1864 return -1;
1865 }
1866
1867 if (element_factory) {
1868 Py_INCREF(element_factory);
1869 Py_XDECREF(self_tb->element_factory);
1870 self_tb->element_factory = element_factory;
1871 }
1872
Eli Bendersky58d548d2012-05-29 15:45:16 +03001873 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001874}
1875
Eli Bendersky48d358b2012-05-30 17:57:50 +03001876static int
1877treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
1878{
1879 Py_VISIT(self->root);
1880 Py_VISIT(self->this);
1881 Py_VISIT(self->last);
1882 Py_VISIT(self->data);
1883 Py_VISIT(self->stack);
1884 Py_VISIT(self->element_factory);
1885 return 0;
1886}
1887
1888static int
1889treebuilder_gc_clear(TreeBuilderObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001890{
1891 Py_XDECREF(self->end_ns_event_obj);
1892 Py_XDECREF(self->start_ns_event_obj);
1893 Py_XDECREF(self->end_event_obj);
1894 Py_XDECREF(self->start_event_obj);
1895 Py_XDECREF(self->events);
1896 Py_DECREF(self->stack);
1897 Py_XDECREF(self->data);
1898 Py_DECREF(self->last);
1899 Py_DECREF(self->this);
Eli Bendersky48d358b2012-05-30 17:57:50 +03001900 Py_CLEAR(self->element_factory);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001901 Py_XDECREF(self->root);
Eli Bendersky48d358b2012-05-30 17:57:50 +03001902 return 0;
1903}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001904
Eli Bendersky48d358b2012-05-30 17:57:50 +03001905static void
1906treebuilder_dealloc(TreeBuilderObject *self)
1907{
1908 PyObject_GC_UnTrack(self);
1909 treebuilder_gc_clear(self);
Eli Bendersky58d548d2012-05-29 15:45:16 +03001910 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001911}
1912
1913/* -------------------------------------------------------------------- */
1914/* handlers */
1915
1916LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001917treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
1918 PyObject* attrib)
1919{
1920 PyObject* node;
1921 PyObject* this;
1922
1923 if (self->data) {
1924 if (self->this == self->last) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001925 Py_DECREF(JOIN_OBJ(self->last->text));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001926 self->last->text = JOIN_SET(
1927 self->data, PyList_CheckExact(self->data)
1928 );
1929 } else {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001930 Py_DECREF(JOIN_OBJ(self->last->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001931 self->last->tail = JOIN_SET(
1932 self->data, PyList_CheckExact(self->data)
1933 );
1934 }
1935 self->data = NULL;
1936 }
1937
Eli Bendersky48d358b2012-05-30 17:57:50 +03001938 if (self->element_factory) {
1939 node = PyObject_CallFunction(self->element_factory, "OO", tag, attrib);
1940 } else {
1941 node = create_new_element(tag, attrib);
1942 }
1943 if (!node) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001944 return NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03001945 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001946
1947 this = (PyObject*) self->this;
1948
1949 if (this != Py_None) {
1950 if (element_add_subelement((ElementObject*) this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001951 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001952 } else {
1953 if (self->root) {
1954 PyErr_SetString(
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001955 elementtree_parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001956 "multiple elements on top level"
1957 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001958 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001959 }
1960 Py_INCREF(node);
1961 self->root = node;
1962 }
1963
1964 if (self->index < PyList_GET_SIZE(self->stack)) {
1965 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001966 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001967 Py_INCREF(this);
1968 } else {
1969 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001970 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001971 }
1972 self->index++;
1973
1974 Py_DECREF(this);
1975 Py_INCREF(node);
1976 self->this = (ElementObject*) node;
1977
1978 Py_DECREF(self->last);
1979 Py_INCREF(node);
1980 self->last = (ElementObject*) node;
1981
1982 if (self->start_event_obj) {
1983 PyObject* res;
1984 PyObject* action = self->start_event_obj;
1985 res = PyTuple_New(2);
1986 if (res) {
1987 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
1988 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
1989 PyList_Append(self->events, res);
1990 Py_DECREF(res);
1991 } else
1992 PyErr_Clear(); /* FIXME: propagate error */
1993 }
1994
1995 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001996
1997 error:
1998 Py_DECREF(node);
1999 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002000}
2001
2002LOCAL(PyObject*)
2003treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
2004{
2005 if (!self->data) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00002006 if (self->last == (ElementObject*) Py_None) {
2007 /* ignore calls to data before the first call to start */
2008 Py_RETURN_NONE;
2009 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002010 /* store the first item as is */
2011 Py_INCREF(data); self->data = data;
2012 } else {
2013 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00002014 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
2015 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002016 /* expat often generates single character data sections; handle
2017 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00002018 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
2019 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002020 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00002021 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002022 } else if (PyList_CheckExact(self->data)) {
2023 if (PyList_Append(self->data, data) < 0)
2024 return NULL;
2025 } else {
2026 PyObject* list = PyList_New(2);
2027 if (!list)
2028 return NULL;
2029 PyList_SET_ITEM(list, 0, self->data);
2030 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2031 self->data = list;
2032 }
2033 }
2034
2035 Py_RETURN_NONE;
2036}
2037
2038LOCAL(PyObject*)
2039treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2040{
2041 PyObject* item;
2042
2043 if (self->data) {
2044 if (self->this == self->last) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002045 Py_DECREF(JOIN_OBJ(self->last->text));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002046 self->last->text = JOIN_SET(
2047 self->data, PyList_CheckExact(self->data)
2048 );
2049 } else {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002050 Py_DECREF(JOIN_OBJ(self->last->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002051 self->last->tail = JOIN_SET(
2052 self->data, PyList_CheckExact(self->data)
2053 );
2054 }
2055 self->data = NULL;
2056 }
2057
2058 if (self->index == 0) {
2059 PyErr_SetString(
2060 PyExc_IndexError,
2061 "pop from empty stack"
2062 );
2063 return NULL;
2064 }
2065
2066 self->index--;
2067
2068 item = PyList_GET_ITEM(self->stack, self->index);
2069 Py_INCREF(item);
2070
2071 Py_DECREF(self->last);
2072
2073 self->last = (ElementObject*) self->this;
2074 self->this = (ElementObject*) item;
2075
2076 if (self->end_event_obj) {
2077 PyObject* res;
2078 PyObject* action = self->end_event_obj;
2079 PyObject* node = (PyObject*) self->last;
2080 res = PyTuple_New(2);
2081 if (res) {
2082 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
2083 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
2084 PyList_Append(self->events, res);
2085 Py_DECREF(res);
2086 } else
2087 PyErr_Clear(); /* FIXME: propagate error */
2088 }
2089
2090 Py_INCREF(self->last);
2091 return (PyObject*) self->last;
2092}
2093
2094LOCAL(void)
2095treebuilder_handle_namespace(TreeBuilderObject* self, int start,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002096 PyObject *prefix, PyObject *uri)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002097{
2098 PyObject* res;
2099 PyObject* action;
2100 PyObject* parcel;
2101
2102 if (!self->events)
2103 return;
2104
2105 if (start) {
2106 if (!self->start_ns_event_obj)
2107 return;
2108 action = self->start_ns_event_obj;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002109 parcel = Py_BuildValue("OO", prefix, uri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002110 if (!parcel)
2111 return;
2112 Py_INCREF(action);
2113 } else {
2114 if (!self->end_ns_event_obj)
2115 return;
2116 action = self->end_ns_event_obj;
2117 Py_INCREF(action);
2118 parcel = Py_None;
2119 Py_INCREF(parcel);
2120 }
2121
2122 res = PyTuple_New(2);
2123
2124 if (res) {
2125 PyTuple_SET_ITEM(res, 0, action);
2126 PyTuple_SET_ITEM(res, 1, parcel);
2127 PyList_Append(self->events, res);
2128 Py_DECREF(res);
2129 } else
2130 PyErr_Clear(); /* FIXME: propagate error */
2131}
2132
2133/* -------------------------------------------------------------------- */
2134/* methods (in alphabetical order) */
2135
2136static PyObject*
2137treebuilder_data(TreeBuilderObject* self, PyObject* args)
2138{
2139 PyObject* data;
2140 if (!PyArg_ParseTuple(args, "O:data", &data))
2141 return NULL;
2142
2143 return treebuilder_handle_data(self, data);
2144}
2145
2146static PyObject*
2147treebuilder_end(TreeBuilderObject* self, PyObject* args)
2148{
2149 PyObject* tag;
2150 if (!PyArg_ParseTuple(args, "O:end", &tag))
2151 return NULL;
2152
2153 return treebuilder_handle_end(self, tag);
2154}
2155
2156LOCAL(PyObject*)
2157treebuilder_done(TreeBuilderObject* self)
2158{
2159 PyObject* res;
2160
2161 /* FIXME: check stack size? */
2162
2163 if (self->root)
2164 res = self->root;
2165 else
2166 res = Py_None;
2167
2168 Py_INCREF(res);
2169 return res;
2170}
2171
2172static PyObject*
2173treebuilder_close(TreeBuilderObject* self, PyObject* args)
2174{
2175 if (!PyArg_ParseTuple(args, ":close"))
2176 return NULL;
2177
2178 return treebuilder_done(self);
2179}
2180
2181static PyObject*
2182treebuilder_start(TreeBuilderObject* self, PyObject* args)
2183{
2184 PyObject* tag;
2185 PyObject* attrib = Py_None;
2186 if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib))
2187 return NULL;
2188
2189 return treebuilder_handle_start(self, tag, attrib);
2190}
2191
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002192static PyMethodDef treebuilder_methods[] = {
2193 {"data", (PyCFunction) treebuilder_data, METH_VARARGS},
2194 {"start", (PyCFunction) treebuilder_start, METH_VARARGS},
2195 {"end", (PyCFunction) treebuilder_end, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002196 {"close", (PyCFunction) treebuilder_close, METH_VARARGS},
2197 {NULL, NULL}
2198};
2199
Neal Norwitz227b5332006-03-22 09:28:35 +00002200static PyTypeObject TreeBuilder_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002201 PyVarObject_HEAD_INIT(NULL, 0)
2202 "TreeBuilder", sizeof(TreeBuilderObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002203 /* methods */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002204 (destructor)treebuilder_dealloc, /* tp_dealloc */
2205 0, /* tp_print */
2206 0, /* tp_getattr */
2207 0, /* tp_setattr */
2208 0, /* tp_reserved */
2209 0, /* tp_repr */
2210 0, /* tp_as_number */
2211 0, /* tp_as_sequence */
2212 0, /* tp_as_mapping */
2213 0, /* tp_hash */
2214 0, /* tp_call */
2215 0, /* tp_str */
2216 0, /* tp_getattro */
2217 0, /* tp_setattro */
2218 0, /* tp_as_buffer */
Eli Bendersky48d358b2012-05-30 17:57:50 +03002219 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
2220 /* tp_flags */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002221 0, /* tp_doc */
Eli Bendersky48d358b2012-05-30 17:57:50 +03002222 (traverseproc)treebuilder_gc_traverse, /* tp_traverse */
2223 (inquiry)treebuilder_gc_clear, /* tp_clear */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002224 0, /* tp_richcompare */
2225 0, /* tp_weaklistoffset */
2226 0, /* tp_iter */
2227 0, /* tp_iternext */
2228 treebuilder_methods, /* tp_methods */
2229 0, /* tp_members */
2230 0, /* tp_getset */
2231 0, /* tp_base */
2232 0, /* tp_dict */
2233 0, /* tp_descr_get */
2234 0, /* tp_descr_set */
2235 0, /* tp_dictoffset */
2236 (initproc)treebuilder_init, /* tp_init */
2237 PyType_GenericAlloc, /* tp_alloc */
2238 treebuilder_new, /* tp_new */
2239 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002240};
2241
2242/* ==================================================================== */
2243/* the expat interface */
2244
2245#if defined(USE_EXPAT)
2246
2247#include "expat.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002248#include "pyexpat.h"
Eli Bendersky20d41742012-06-01 09:48:37 +03002249static struct PyExpat_CAPI *expat_capi;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002250#define EXPAT(func) (expat_capi->func)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002251
Eli Bendersky52467b12012-06-01 07:13:08 +03002252static XML_Memory_Handling_Suite ExpatMemoryHandler = {
2253 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
2254
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002255typedef struct {
2256 PyObject_HEAD
2257
2258 XML_Parser parser;
2259
2260 PyObject* target;
2261 PyObject* entity;
2262
2263 PyObject* names;
2264
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002265 PyObject* handle_start;
2266 PyObject* handle_data;
2267 PyObject* handle_end;
2268
2269 PyObject* handle_comment;
2270 PyObject* handle_pi;
2271
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002272 PyObject* handle_close;
2273
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002274} XMLParserObject;
2275
Neal Norwitz227b5332006-03-22 09:28:35 +00002276static PyTypeObject XMLParser_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002277
2278/* helpers */
2279
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002280LOCAL(PyObject*)
2281makeuniversal(XMLParserObject* self, const char* string)
2282{
2283 /* convert a UTF-8 tag/attribute name from the expat parser
2284 to a universal name string */
2285
2286 int size = strlen(string);
2287 PyObject* key;
2288 PyObject* value;
2289
2290 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00002291 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002292 if (!key)
2293 return NULL;
2294
2295 value = PyDict_GetItem(self->names, key);
2296
2297 if (value) {
2298 Py_INCREF(value);
2299 } else {
2300 /* new name. convert to universal name, and decode as
2301 necessary */
2302
2303 PyObject* tag;
2304 char* p;
2305 int i;
2306
2307 /* look for namespace separator */
2308 for (i = 0; i < size; i++)
2309 if (string[i] == '}')
2310 break;
2311 if (i != size) {
2312 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002313 tag = PyBytes_FromStringAndSize(NULL, size+1);
2314 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002315 p[0] = '{';
2316 memcpy(p+1, string, size);
2317 size++;
2318 } else {
2319 /* plain name; use key as tag */
2320 Py_INCREF(key);
2321 tag = key;
2322 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002323
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002324 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002325 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00002326 value = PyUnicode_DecodeUTF8(p, size, "strict");
2327 Py_DECREF(tag);
2328 if (!value) {
2329 Py_DECREF(key);
2330 return NULL;
2331 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002332
2333 /* add to names dictionary */
2334 if (PyDict_SetItem(self->names, key, value) < 0) {
2335 Py_DECREF(key);
2336 Py_DECREF(value);
2337 return NULL;
2338 }
2339 }
2340
2341 Py_DECREF(key);
2342 return value;
2343}
2344
Eli Bendersky5b77d812012-03-16 08:20:05 +02002345/* Set the ParseError exception with the given parameters.
2346 * If message is not NULL, it's used as the error string. Otherwise, the
2347 * message string is the default for the given error_code.
2348*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002349static void
Eli Bendersky5b77d812012-03-16 08:20:05 +02002350expat_set_error(enum XML_Error error_code, int line, int column, char *message)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002351{
Eli Bendersky5b77d812012-03-16 08:20:05 +02002352 PyObject *errmsg, *error, *position, *code;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002353
Victor Stinner499dfcf2011-03-21 13:26:24 +01002354 errmsg = PyUnicode_FromFormat("%s: line %d, column %d",
Eli Bendersky5b77d812012-03-16 08:20:05 +02002355 message ? message : EXPAT(ErrorString)(error_code),
2356 line, column);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002357 if (errmsg == NULL)
2358 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002359
Victor Stinner499dfcf2011-03-21 13:26:24 +01002360 error = PyObject_CallFunction(elementtree_parseerror_obj, "O", errmsg);
2361 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002362 if (!error)
2363 return;
2364
Eli Bendersky5b77d812012-03-16 08:20:05 +02002365 /* Add code and position attributes */
2366 code = PyLong_FromLong((long)error_code);
2367 if (!code) {
2368 Py_DECREF(error);
2369 return;
2370 }
2371 if (PyObject_SetAttrString(error, "code", code) == -1) {
2372 Py_DECREF(error);
2373 Py_DECREF(code);
2374 return;
2375 }
2376 Py_DECREF(code);
2377
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002378 position = Py_BuildValue("(ii)", line, column);
2379 if (!position) {
2380 Py_DECREF(error);
2381 return;
2382 }
2383 if (PyObject_SetAttrString(error, "position", position) == -1) {
2384 Py_DECREF(error);
2385 Py_DECREF(position);
2386 return;
2387 }
2388 Py_DECREF(position);
2389
2390 PyErr_SetObject(elementtree_parseerror_obj, error);
2391 Py_DECREF(error);
2392}
2393
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002394/* -------------------------------------------------------------------- */
2395/* handlers */
2396
2397static void
2398expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2399 int data_len)
2400{
2401 PyObject* key;
2402 PyObject* value;
2403 PyObject* res;
2404
2405 if (data_len < 2 || data_in[0] != '&')
2406 return;
2407
Neal Norwitz0269b912007-08-08 06:56:02 +00002408 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002409 if (!key)
2410 return;
2411
2412 value = PyDict_GetItem(self->entity, key);
2413
2414 if (value) {
2415 if (TreeBuilder_CheckExact(self->target))
2416 res = treebuilder_handle_data(
2417 (TreeBuilderObject*) self->target, value
2418 );
2419 else if (self->handle_data)
2420 res = PyObject_CallFunction(self->handle_data, "O", value);
2421 else
2422 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002423 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002424 } else if (!PyErr_Occurred()) {
2425 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00002426 char message[128] = "undefined entity ";
2427 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002428 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02002429 XML_ERROR_UNDEFINED_ENTITY,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002430 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02002431 EXPAT(GetErrorColumnNumber)(self->parser),
2432 message
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002433 );
2434 }
2435
2436 Py_DECREF(key);
2437}
2438
2439static void
2440expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2441 const XML_Char **attrib_in)
2442{
2443 PyObject* res;
2444 PyObject* tag;
2445 PyObject* attrib;
2446 int ok;
2447
2448 /* tag name */
2449 tag = makeuniversal(self, tag_in);
2450 if (!tag)
2451 return; /* parser will look for errors */
2452
2453 /* attributes */
2454 if (attrib_in[0]) {
2455 attrib = PyDict_New();
2456 if (!attrib)
2457 return;
2458 while (attrib_in[0] && attrib_in[1]) {
2459 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00002460 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002461 if (!key || !value) {
2462 Py_XDECREF(value);
2463 Py_XDECREF(key);
2464 Py_DECREF(attrib);
2465 return;
2466 }
2467 ok = PyDict_SetItem(attrib, key, value);
2468 Py_DECREF(value);
2469 Py_DECREF(key);
2470 if (ok < 0) {
2471 Py_DECREF(attrib);
2472 return;
2473 }
2474 attrib_in += 2;
2475 }
2476 } else {
2477 Py_INCREF(Py_None);
2478 attrib = Py_None;
2479 }
2480
Eli Bendersky48d358b2012-05-30 17:57:50 +03002481 /* If we get None, pass an empty dictionary on */
2482 if (attrib == Py_None) {
2483 Py_DECREF(attrib);
2484 attrib = PyDict_New();
2485 if (!attrib)
2486 return;
2487 }
2488
2489 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002490 /* shortcut */
2491 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2492 tag, attrib);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002493 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002494 else if (self->handle_start) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002495 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002496 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002497 res = NULL;
2498
2499 Py_DECREF(tag);
2500 Py_DECREF(attrib);
2501
2502 Py_XDECREF(res);
2503}
2504
2505static void
2506expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2507 int data_len)
2508{
2509 PyObject* data;
2510 PyObject* res;
2511
Neal Norwitz0269b912007-08-08 06:56:02 +00002512 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002513 if (!data)
2514 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002515
2516 if (TreeBuilder_CheckExact(self->target))
2517 /* shortcut */
2518 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
2519 else if (self->handle_data)
2520 res = PyObject_CallFunction(self->handle_data, "O", data);
2521 else
2522 res = NULL;
2523
2524 Py_DECREF(data);
2525
2526 Py_XDECREF(res);
2527}
2528
2529static void
2530expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
2531{
2532 PyObject* tag;
2533 PyObject* res = NULL;
2534
2535 if (TreeBuilder_CheckExact(self->target))
2536 /* shortcut */
2537 /* the standard tree builder doesn't look at the end tag */
2538 res = treebuilder_handle_end(
2539 (TreeBuilderObject*) self->target, Py_None
2540 );
2541 else if (self->handle_end) {
2542 tag = makeuniversal(self, tag_in);
2543 if (tag) {
2544 res = PyObject_CallFunction(self->handle_end, "O", tag);
2545 Py_DECREF(tag);
2546 }
2547 }
2548
2549 Py_XDECREF(res);
2550}
2551
2552static void
2553expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
2554 const XML_Char *uri)
2555{
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002556 PyObject* sprefix = NULL;
2557 PyObject* suri = NULL;
2558
2559 suri = PyUnicode_DecodeUTF8(uri, strlen(uri), "strict");
2560 if (!suri)
2561 return;
2562
2563 if (prefix)
2564 sprefix = PyUnicode_DecodeUTF8(prefix, strlen(prefix), "strict");
2565 else
2566 sprefix = PyUnicode_FromString("");
2567 if (!sprefix) {
2568 Py_DECREF(suri);
2569 return;
2570 }
2571
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002572 treebuilder_handle_namespace(
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002573 (TreeBuilderObject*) self->target, 1, sprefix, suri
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002574 );
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002575
2576 Py_DECREF(sprefix);
2577 Py_DECREF(suri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002578}
2579
2580static void
2581expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
2582{
2583 treebuilder_handle_namespace(
2584 (TreeBuilderObject*) self->target, 0, NULL, NULL
2585 );
2586}
2587
2588static void
2589expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
2590{
2591 PyObject* comment;
2592 PyObject* res;
2593
2594 if (self->handle_comment) {
Neal Norwitz0269b912007-08-08 06:56:02 +00002595 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002596 if (comment) {
2597 res = PyObject_CallFunction(self->handle_comment, "O", comment);
2598 Py_XDECREF(res);
2599 Py_DECREF(comment);
2600 }
2601 }
2602}
2603
2604static void
2605expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
2606 const XML_Char* data_in)
2607{
2608 PyObject* target;
2609 PyObject* data;
2610 PyObject* res;
2611
2612 if (self->handle_pi) {
Neal Norwitz0269b912007-08-08 06:56:02 +00002613 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
2614 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002615 if (target && data) {
2616 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
2617 Py_XDECREF(res);
2618 Py_DECREF(data);
2619 Py_DECREF(target);
2620 } else {
2621 Py_XDECREF(data);
2622 Py_XDECREF(target);
2623 }
2624 }
2625}
2626
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002627static int
2628expat_unknown_encoding_handler(XMLParserObject *self, const XML_Char *name,
2629 XML_Encoding *info)
2630{
2631 PyObject* u;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002632 unsigned char s[256];
2633 int i;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002634 void *data;
2635 unsigned int kind;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002636
2637 memset(info, 0, sizeof(XML_Encoding));
2638
2639 for (i = 0; i < 256; i++)
2640 s[i] = i;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002641
Fredrik Lundhc3389992005-12-25 11:40:19 +00002642 u = PyUnicode_Decode((char*) s, 256, name, "replace");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002643 if (!u)
2644 return XML_STATUS_ERROR;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002645 if (PyUnicode_READY(u))
2646 return XML_STATUS_ERROR;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002647
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002648 if (PyUnicode_GET_LENGTH(u) != 256) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002649 Py_DECREF(u);
2650 return XML_STATUS_ERROR;
2651 }
2652
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002653 kind = PyUnicode_KIND(u);
2654 data = PyUnicode_DATA(u);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002655 for (i = 0; i < 256; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002656 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
2657 if (ch != Py_UNICODE_REPLACEMENT_CHARACTER)
2658 info->map[i] = ch;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002659 else
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002660 info->map[i] = -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002661 }
2662
2663 Py_DECREF(u);
2664
2665 return XML_STATUS_OK;
2666}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002667
2668/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002669
Eli Bendersky52467b12012-06-01 07:13:08 +03002670static PyObject *
2671xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002672{
Eli Bendersky52467b12012-06-01 07:13:08 +03002673 XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
2674 if (self) {
2675 self->parser = NULL;
2676 self->target = self->entity = self->names = NULL;
2677 self->handle_start = self->handle_data = self->handle_end = NULL;
2678 self->handle_comment = self->handle_pi = self->handle_close = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002679 }
Eli Bendersky52467b12012-06-01 07:13:08 +03002680 return (PyObject *)self;
2681}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002682
Eli Bendersky52467b12012-06-01 07:13:08 +03002683static int
2684xmlparser_init(PyObject *self, PyObject *args, PyObject *kwds)
2685{
2686 XMLParserObject *self_xp = (XMLParserObject *)self;
2687 PyObject *target = NULL, *html = NULL;
2688 char *encoding = NULL;
2689 static char *kwlist[] = {"html", "target", "encoding"};
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002690
Eli Bendersky52467b12012-06-01 07:13:08 +03002691 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|OOz:XMLParser", kwlist,
2692 &html, &target, &encoding)) {
2693 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002694 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002695
Eli Bendersky52467b12012-06-01 07:13:08 +03002696 self_xp->entity = PyDict_New();
2697 if (!self_xp->entity)
2698 return -1;
2699
2700 self_xp->names = PyDict_New();
2701 if (!self_xp->names) {
2702 Py_XDECREF(self_xp->entity);
2703 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002704 }
2705
Eli Bendersky52467b12012-06-01 07:13:08 +03002706 self_xp->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
2707 if (!self_xp->parser) {
2708 Py_XDECREF(self_xp->entity);
2709 Py_XDECREF(self_xp->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002710 PyErr_NoMemory();
Eli Bendersky52467b12012-06-01 07:13:08 +03002711 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002712 }
2713
Eli Bendersky52467b12012-06-01 07:13:08 +03002714 if (target) {
2715 Py_INCREF(target);
2716 } else {
Eli Bendersky58d548d2012-05-29 15:45:16 +03002717 target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002718 if (!target) {
Eli Bendersky52467b12012-06-01 07:13:08 +03002719 Py_XDECREF(self_xp->entity);
2720 Py_XDECREF(self_xp->names);
2721 EXPAT(ParserFree)(self_xp->parser);
2722 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002723 }
Eli Bendersky52467b12012-06-01 07:13:08 +03002724 }
2725 self_xp->target = target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002726
Eli Bendersky52467b12012-06-01 07:13:08 +03002727 self_xp->handle_start = PyObject_GetAttrString(target, "start");
2728 self_xp->handle_data = PyObject_GetAttrString(target, "data");
2729 self_xp->handle_end = PyObject_GetAttrString(target, "end");
2730 self_xp->handle_comment = PyObject_GetAttrString(target, "comment");
2731 self_xp->handle_pi = PyObject_GetAttrString(target, "pi");
2732 self_xp->handle_close = PyObject_GetAttrString(target, "close");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002733
2734 PyErr_Clear();
Eli Bendersky52467b12012-06-01 07:13:08 +03002735
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002736 /* configure parser */
Eli Bendersky52467b12012-06-01 07:13:08 +03002737 EXPAT(SetUserData)(self_xp->parser, self_xp);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002738 EXPAT(SetElementHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03002739 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002740 (XML_StartElementHandler) expat_start_handler,
2741 (XML_EndElementHandler) expat_end_handler
2742 );
2743 EXPAT(SetDefaultHandlerExpand)(
Eli Bendersky52467b12012-06-01 07:13:08 +03002744 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002745 (XML_DefaultHandler) expat_default_handler
2746 );
2747 EXPAT(SetCharacterDataHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03002748 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002749 (XML_CharacterDataHandler) expat_data_handler
2750 );
Eli Bendersky52467b12012-06-01 07:13:08 +03002751 if (self_xp->handle_comment)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002752 EXPAT(SetCommentHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03002753 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002754 (XML_CommentHandler) expat_comment_handler
2755 );
Eli Bendersky52467b12012-06-01 07:13:08 +03002756 if (self_xp->handle_pi)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002757 EXPAT(SetProcessingInstructionHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03002758 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002759 (XML_ProcessingInstructionHandler) expat_pi_handler
2760 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002761 EXPAT(SetUnknownEncodingHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03002762 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002763 (XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL
2764 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002765
Eli Bendersky52467b12012-06-01 07:13:08 +03002766 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002767}
2768
Eli Bendersky52467b12012-06-01 07:13:08 +03002769static int
2770xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
2771{
2772 Py_VISIT(self->handle_close);
2773 Py_VISIT(self->handle_pi);
2774 Py_VISIT(self->handle_comment);
2775 Py_VISIT(self->handle_end);
2776 Py_VISIT(self->handle_data);
2777 Py_VISIT(self->handle_start);
2778
2779 Py_VISIT(self->target);
2780 Py_VISIT(self->entity);
2781 Py_VISIT(self->names);
2782
2783 return 0;
2784}
2785
2786static int
2787xmlparser_gc_clear(XMLParserObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002788{
2789 EXPAT(ParserFree)(self->parser);
2790
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002791 Py_XDECREF(self->handle_close);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002792 Py_XDECREF(self->handle_pi);
2793 Py_XDECREF(self->handle_comment);
2794 Py_XDECREF(self->handle_end);
2795 Py_XDECREF(self->handle_data);
2796 Py_XDECREF(self->handle_start);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002797
Eli Bendersky52467b12012-06-01 07:13:08 +03002798 Py_XDECREF(self->target);
2799 Py_XDECREF(self->entity);
2800 Py_XDECREF(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002801
Eli Bendersky52467b12012-06-01 07:13:08 +03002802 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002803}
2804
Eli Bendersky52467b12012-06-01 07:13:08 +03002805static void
2806xmlparser_dealloc(XMLParserObject* self)
2807{
2808 PyObject_GC_UnTrack(self);
2809 xmlparser_gc_clear(self);
2810 Py_TYPE(self)->tp_free((PyObject *)self);
2811}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002812
2813LOCAL(PyObject*)
2814expat_parse(XMLParserObject* self, char* data, int data_len, int final)
2815{
2816 int ok;
2817
2818 ok = EXPAT(Parse)(self->parser, data, data_len, final);
2819
2820 if (PyErr_Occurred())
2821 return NULL;
2822
2823 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002824 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02002825 EXPAT(GetErrorCode)(self->parser),
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002826 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02002827 EXPAT(GetErrorColumnNumber)(self->parser),
2828 NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002829 );
2830 return NULL;
2831 }
2832
2833 Py_RETURN_NONE;
2834}
2835
2836static PyObject*
2837xmlparser_close(XMLParserObject* self, PyObject* args)
2838{
2839 /* end feeding data to parser */
2840
2841 PyObject* res;
2842 if (!PyArg_ParseTuple(args, ":close"))
2843 return NULL;
2844
2845 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002846 if (!res)
2847 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002848
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002849 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002850 Py_DECREF(res);
2851 return treebuilder_done((TreeBuilderObject*) self->target);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002852 } if (self->handle_close) {
2853 Py_DECREF(res);
2854 return PyObject_CallFunction(self->handle_close, "");
2855 } else
2856 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002857}
2858
2859static PyObject*
2860xmlparser_feed(XMLParserObject* self, PyObject* args)
2861{
2862 /* feed data to parser */
2863
2864 char* data;
2865 int data_len;
2866 if (!PyArg_ParseTuple(args, "s#:feed", &data, &data_len))
2867 return NULL;
2868
2869 return expat_parse(self, data, data_len, 0);
2870}
2871
2872static PyObject*
2873xmlparser_parse(XMLParserObject* self, PyObject* args)
2874{
2875 /* (internal) parse until end of input stream */
2876
2877 PyObject* reader;
2878 PyObject* buffer;
Eli Benderskyf996e772012-03-16 05:53:30 +02002879 PyObject* temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002880 PyObject* res;
2881
2882 PyObject* fileobj;
2883 if (!PyArg_ParseTuple(args, "O:_parse", &fileobj))
2884 return NULL;
2885
2886 reader = PyObject_GetAttrString(fileobj, "read");
2887 if (!reader)
2888 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002889
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002890 /* read from open file object */
2891 for (;;) {
2892
2893 buffer = PyObject_CallFunction(reader, "i", 64*1024);
2894
2895 if (!buffer) {
2896 /* read failed (e.g. due to KeyboardInterrupt) */
2897 Py_DECREF(reader);
2898 return NULL;
2899 }
2900
Eli Benderskyf996e772012-03-16 05:53:30 +02002901 if (PyUnicode_CheckExact(buffer)) {
2902 /* A unicode object is encoded into bytes using UTF-8 */
2903 if (PyUnicode_GET_SIZE(buffer) == 0) {
2904 Py_DECREF(buffer);
2905 break;
2906 }
2907 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
2908 if (!temp) {
2909 /* Propagate exception from PyUnicode_AsEncodedString */
2910 Py_DECREF(buffer);
2911 Py_DECREF(reader);
2912 return NULL;
2913 }
2914
2915 /* Here we no longer need the original buffer since it contains
2916 * unicode. Make it point to the encoded bytes object.
2917 */
2918 Py_DECREF(buffer);
2919 buffer = temp;
2920 }
2921 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002922 Py_DECREF(buffer);
2923 break;
2924 }
2925
2926 res = expat_parse(
Christian Heimes72b710a2008-05-26 13:28:38 +00002927 self, PyBytes_AS_STRING(buffer), PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002928 );
2929
2930 Py_DECREF(buffer);
2931
2932 if (!res) {
2933 Py_DECREF(reader);
2934 return NULL;
2935 }
2936 Py_DECREF(res);
2937
2938 }
2939
2940 Py_DECREF(reader);
2941
2942 res = expat_parse(self, "", 0, 1);
2943
2944 if (res && TreeBuilder_CheckExact(self->target)) {
2945 Py_DECREF(res);
2946 return treebuilder_done((TreeBuilderObject*) self->target);
2947 }
2948
2949 return res;
2950}
2951
2952static PyObject*
2953xmlparser_setevents(XMLParserObject* self, PyObject* args)
2954{
2955 /* activate element event reporting */
2956
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002957 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002958 TreeBuilderObject* target;
2959
2960 PyObject* events; /* event collector */
2961 PyObject* event_set = Py_None;
2962 if (!PyArg_ParseTuple(args, "O!|O:_setevents", &PyList_Type, &events,
2963 &event_set))
2964 return NULL;
2965
2966 if (!TreeBuilder_CheckExact(self->target)) {
2967 PyErr_SetString(
2968 PyExc_TypeError,
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01002969 "event handling only supported for ElementTree.TreeBuilder "
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002970 "targets"
2971 );
2972 return NULL;
2973 }
2974
2975 target = (TreeBuilderObject*) self->target;
2976
2977 Py_INCREF(events);
2978 Py_XDECREF(target->events);
2979 target->events = events;
2980
2981 /* clear out existing events */
2982 Py_XDECREF(target->start_event_obj); target->start_event_obj = NULL;
2983 Py_XDECREF(target->end_event_obj); target->end_event_obj = NULL;
2984 Py_XDECREF(target->start_ns_event_obj); target->start_ns_event_obj = NULL;
2985 Py_XDECREF(target->end_ns_event_obj); target->end_ns_event_obj = NULL;
2986
2987 if (event_set == Py_None) {
2988 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002989 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002990 Py_RETURN_NONE;
2991 }
2992
2993 if (!PyTuple_Check(event_set)) /* FIXME: handle arbitrary sequences */
2994 goto error;
2995
2996 for (i = 0; i < PyTuple_GET_SIZE(event_set); i++) {
2997 PyObject* item = PyTuple_GET_ITEM(event_set, i);
2998 char* event;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002999 if (PyUnicode_Check(item)) {
3000 event = _PyUnicode_AsString(item);
Victor Stinner0477bf32010-03-22 12:11:44 +00003001 if (event == NULL)
3002 goto error;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003003 } else if (PyBytes_Check(item))
3004 event = PyBytes_AS_STRING(item);
3005 else {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003006 goto error;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003007 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003008 if (strcmp(event, "start") == 0) {
3009 Py_INCREF(item);
3010 target->start_event_obj = item;
3011 } else if (strcmp(event, "end") == 0) {
3012 Py_INCREF(item);
3013 Py_XDECREF(target->end_event_obj);
3014 target->end_event_obj = item;
3015 } else if (strcmp(event, "start-ns") == 0) {
3016 Py_INCREF(item);
3017 Py_XDECREF(target->start_ns_event_obj);
3018 target->start_ns_event_obj = item;
3019 EXPAT(SetNamespaceDeclHandler)(
3020 self->parser,
3021 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3022 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3023 );
3024 } else if (strcmp(event, "end-ns") == 0) {
3025 Py_INCREF(item);
3026 Py_XDECREF(target->end_ns_event_obj);
3027 target->end_ns_event_obj = item;
3028 EXPAT(SetNamespaceDeclHandler)(
3029 self->parser,
3030 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3031 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3032 );
3033 } else {
3034 PyErr_Format(
3035 PyExc_ValueError,
3036 "unknown event '%s'", event
3037 );
3038 return NULL;
3039 }
3040 }
3041
3042 Py_RETURN_NONE;
3043
3044 error:
3045 PyErr_SetString(
3046 PyExc_TypeError,
3047 "invalid event tuple"
3048 );
3049 return NULL;
3050}
3051
3052static PyMethodDef xmlparser_methods[] = {
3053 {"feed", (PyCFunction) xmlparser_feed, METH_VARARGS},
3054 {"close", (PyCFunction) xmlparser_close, METH_VARARGS},
3055 {"_parse", (PyCFunction) xmlparser_parse, METH_VARARGS},
3056 {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
3057 {NULL, NULL}
3058};
3059
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003060static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003061xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003062{
Alexander Belopolskye239d232010-12-08 23:31:48 +00003063 if (PyUnicode_Check(nameobj)) {
3064 PyObject* res;
3065 if (PyUnicode_CompareWithASCIIString(nameobj, "entity") == 0)
3066 res = self->entity;
3067 else if (PyUnicode_CompareWithASCIIString(nameobj, "target") == 0)
3068 res = self->target;
3069 else if (PyUnicode_CompareWithASCIIString(nameobj, "version") == 0) {
3070 return PyUnicode_FromFormat(
3071 "Expat %d.%d.%d", XML_MAJOR_VERSION,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003072 XML_MINOR_VERSION, XML_MICRO_VERSION);
Alexander Belopolskye239d232010-12-08 23:31:48 +00003073 }
3074 else
3075 goto generic;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003076
Alexander Belopolskye239d232010-12-08 23:31:48 +00003077 Py_INCREF(res);
3078 return res;
3079 }
3080 generic:
3081 return PyObject_GenericGetAttr((PyObject*) self, nameobj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003082}
3083
Neal Norwitz227b5332006-03-22 09:28:35 +00003084static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003085 PyVarObject_HEAD_INIT(NULL, 0)
3086 "XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003087 /* methods */
Eli Bendersky52467b12012-06-01 07:13:08 +03003088 (destructor)xmlparser_dealloc, /* tp_dealloc */
3089 0, /* tp_print */
3090 0, /* tp_getattr */
3091 0, /* tp_setattr */
3092 0, /* tp_reserved */
3093 0, /* tp_repr */
3094 0, /* tp_as_number */
3095 0, /* tp_as_sequence */
3096 0, /* tp_as_mapping */
3097 0, /* tp_hash */
3098 0, /* tp_call */
3099 0, /* tp_str */
3100 (getattrofunc)xmlparser_getattro, /* tp_getattro */
3101 0, /* tp_setattro */
3102 0, /* tp_as_buffer */
3103 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3104 /* tp_flags */
3105 0, /* tp_doc */
3106 (traverseproc)xmlparser_gc_traverse, /* tp_traverse */
3107 (inquiry)xmlparser_gc_clear, /* tp_clear */
3108 0, /* tp_richcompare */
3109 0, /* tp_weaklistoffset */
3110 0, /* tp_iter */
3111 0, /* tp_iternext */
3112 xmlparser_methods, /* tp_methods */
3113 0, /* tp_members */
3114 0, /* tp_getset */
3115 0, /* tp_base */
3116 0, /* tp_dict */
3117 0, /* tp_descr_get */
3118 0, /* tp_descr_set */
3119 0, /* tp_dictoffset */
3120 (initproc)xmlparser_init, /* tp_init */
3121 PyType_GenericAlloc, /* tp_alloc */
3122 xmlparser_new, /* tp_new */
3123 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003124};
3125
3126#endif
3127
3128/* ==================================================================== */
3129/* python module interface */
3130
3131static PyMethodDef _functions[] = {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003132 {"SubElement", (PyCFunction) subelement, METH_VARARGS|METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003133 {NULL, NULL}
3134};
3135
Martin v. Löwis1a214512008-06-11 05:26:20 +00003136
3137static struct PyModuleDef _elementtreemodule = {
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003138 PyModuleDef_HEAD_INIT,
3139 "_elementtree",
3140 NULL,
3141 -1,
3142 _functions,
3143 NULL,
3144 NULL,
3145 NULL,
3146 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00003147};
3148
Neal Norwitzf6657e62006-12-28 04:47:50 +00003149PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00003150PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003151{
Eli Bendersky828efde2012-04-05 05:40:58 +03003152 PyObject *m, *g, *temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003153 char* bootstrap;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003154
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003155 /* Initialize object types */
3156 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003157 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003158 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003159 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003160#if defined(USE_EXPAT)
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003161 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003162 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003163#endif
3164
Martin v. Löwis1a214512008-06-11 05:26:20 +00003165 m = PyModule_Create(&_elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003166 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00003167 return NULL;
3168
3169 /* The code below requires that the module gets already added
3170 to sys.modules. */
3171 PyDict_SetItemString(PyImport_GetModuleDict(),
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003172 _elementtreemodule.m_name,
3173 m);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003174
3175 /* python glue code */
3176
3177 g = PyDict_New();
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003178 if (!g)
Martin v. Löwis1a214512008-06-11 05:26:20 +00003179 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003180
3181 PyDict_SetItemString(g, "__builtins__", PyEval_GetBuiltins());
3182
3183 bootstrap = (
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003184 "def iter(node, tag=None):\n" /* helper */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003185 " if tag == '*':\n"
3186 " tag = None\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003187 " if tag is None or node.tag == tag:\n"
3188 " yield node\n"
3189 " for node in node:\n"
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003190 " for node in iter(node, tag):\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003191 " yield node\n"
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003192
3193 "def itertext(node):\n" /* helper */
3194 " if node.text:\n"
3195 " yield node.text\n"
3196 " for e in node:\n"
3197 " for s in e.itertext():\n"
3198 " yield s\n"
3199 " if e.tail:\n"
3200 " yield e.tail\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003201
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003202 );
3203
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003204 if (!PyRun_String(bootstrap, Py_file_input, g, NULL))
3205 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003206
Eli Bendersky828efde2012-04-05 05:40:58 +03003207 if (!(temp = PyImport_ImportModule("copy")))
3208 return NULL;
3209 elementtree_deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
3210 Py_XDECREF(temp);
3211
3212 if (!(elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
3213 return NULL;
3214
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003215 elementtree_iter_obj = PyDict_GetItemString(g, "iter");
3216 elementtree_itertext_obj = PyDict_GetItemString(g, "itertext");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003217
Eli Bendersky20d41742012-06-01 09:48:37 +03003218 /* link against pyexpat */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003219 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
3220 if (expat_capi) {
3221 /* check that it's usable */
3222 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
3223 expat_capi->size < sizeof(struct PyExpat_CAPI) ||
3224 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
3225 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
Eli Bendersky52467b12012-06-01 07:13:08 +03003226 expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003227 expat_capi = NULL;
Eli Bendersky52467b12012-06-01 07:13:08 +03003228 }
3229 }
3230 if (!expat_capi) {
3231 PyErr_SetString(
3232 PyExc_RuntimeError, "cannot load dispatch table from pyexpat"
3233 );
3234 return NULL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003235 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003236
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003237 elementtree_parseerror_obj = PyErr_NewException(
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003238 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003239 );
3240 Py_INCREF(elementtree_parseerror_obj);
3241 PyModule_AddObject(m, "ParseError", elementtree_parseerror_obj);
3242
Eli Bendersky092af1f2012-03-04 07:14:03 +02003243 Py_INCREF((PyObject *)&Element_Type);
3244 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
3245
Eli Bendersky58d548d2012-05-29 15:45:16 +03003246 Py_INCREF((PyObject *)&TreeBuilder_Type);
3247 PyModule_AddObject(m, "TreeBuilder", (PyObject *)&TreeBuilder_Type);
3248
Eli Bendersky52467b12012-06-01 07:13:08 +03003249#if defined(USE_EXPAT)
3250 Py_INCREF((PyObject *)&XMLParser_Type);
3251 PyModule_AddObject(m, "XMLParser", (PyObject *)&XMLParser_Type);
3252#endif
3253
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003254 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003255}