blob: f2a1e64b87276ba527d587dede29015fd449ea80 [file] [log] [blame]
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001/*
2 * ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003 * $Id: _elementtree.c 3473 2009-01-11 22:53:55Z fredrik $
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
5 * elementtree accelerator
6 *
7 * History:
8 * 1999-06-20 fl created (as part of sgmlop)
9 * 2001-05-29 fl effdom edition
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000010 * 2003-02-27 fl elementtree edition (alpha)
11 * 2004-06-03 fl updates for elementtree 1.2
Florent Xiclunaf15351d2010-03-13 23:24:31 +000012 * 2005-01-05 fl major optimization effort
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000013 * 2005-01-11 fl first public release (cElementTree 0.8)
14 * 2005-01-12 fl split element object into base and extras
15 * 2005-01-13 fl use tagged pointers for tail/text (cElementTree 0.9)
16 * 2005-01-17 fl added treebuilder close method
17 * 2005-01-17 fl fixed crash in getchildren
18 * 2005-01-18 fl removed observer api, added iterparse (cElementTree 0.9.3)
19 * 2005-01-23 fl revised iterparse api; added namespace event support (0.9.8)
20 * 2005-01-26 fl added VERSION module property (cElementTree 1.0)
21 * 2005-01-28 fl added remove method (1.0.1)
22 * 2005-03-01 fl added iselement function; fixed makeelement aliasing (1.0.2)
23 * 2005-03-13 fl export Comment and ProcessingInstruction/PI helpers
24 * 2005-03-26 fl added Comment and PI support to XMLParser
25 * 2005-03-27 fl event optimizations; complain about bogus events
26 * 2005-08-08 fl fixed read error handling in parse
27 * 2005-08-11 fl added runtime test for copy workaround (1.0.3)
28 * 2005-12-13 fl added expat_capi support (for xml.etree) (1.0.4)
29 * 2005-12-16 fl added support for non-standard encodings
Fredrik Lundh44ed4db2006-03-12 21:06:35 +000030 * 2006-03-08 fl fixed a couple of potential null-refs and leaks
31 * 2006-03-12 fl merge in 2.5 ssize_t changes
Florent Xiclunaf15351d2010-03-13 23:24:31 +000032 * 2007-08-25 fl call custom builder's close method from XMLParser
33 * 2007-08-31 fl added iter, extend from ET 1.3
34 * 2007-09-01 fl fixed ParseError exception, setslice source type, etc
35 * 2007-09-03 fl fixed handling of negative insert indexes
36 * 2007-09-04 fl added itertext from ET 1.3
37 * 2007-09-06 fl added position attribute to ParseError exception
38 * 2008-06-06 fl delay error reporting in iterparse (from Hrvoje Niksic)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000039 *
Florent Xiclunaf15351d2010-03-13 23:24:31 +000040 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
41 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000042 *
43 * info@pythonware.com
44 * http://www.pythonware.com
45 */
46
Fredrik Lundh6d52b552005-12-16 22:06:43 +000047/* Licensed to PSF under a Contributor Agreement. */
Florent Xiclunaf15351d2010-03-13 23:24:31 +000048/* See http://www.python.org/psf/license for licensing details. */
Fredrik Lundh6d52b552005-12-16 22:06:43 +000049
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000050#include "Python.h"
Eli Benderskyebf37a22012-04-03 22:02:37 +030051#include "structmember.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000052
Thomas Wouters00ee7ba2006-08-21 19:07:27 +000053#define VERSION "1.0.6"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000054
55/* -------------------------------------------------------------------- */
56/* configuration */
57
58/* Leave defined to include the expat-based XMLParser type */
59#define USE_EXPAT
60
Florent Xiclunaf15351d2010-03-13 23:24:31 +000061/* Define to do all expat calls via pyexpat's embedded expat library */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000062/* #define USE_PYEXPAT_CAPI */
63
64/* An element can hold this many children without extra memory
65 allocations. */
66#define STATIC_CHILDREN 4
67
68/* For best performance, chose a value so that 80-90% of all nodes
69 have no more than the given number of children. Set this to zero
70 to minimize the size of the element structure itself (this only
71 helps if you have lots of leaf nodes with attributes). */
72
73/* Also note that pymalloc always allocates blocks in multiples of
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010074 eight bytes. For the current C version of ElementTree, this means
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000075 that the number of children should be an even number, at least on
76 32-bit platforms. */
77
78/* -------------------------------------------------------------------- */
79
80#if 0
81static int memory = 0;
82#define ALLOC(size, comment)\
83do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
84#define RELEASE(size, comment)\
85do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
86#else
87#define ALLOC(size, comment)
88#define RELEASE(size, comment)
89#endif
90
91/* compiler tweaks */
92#if defined(_MSC_VER)
93#define LOCAL(type) static __inline type __fastcall
94#else
95#define LOCAL(type) static type
96#endif
97
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000098/* macros used to store 'join' flags in string object pointers. note
99 that all use of text and tail as object pointers must be wrapped in
100 JOIN_OBJ. see comments in the ElementObject definition for more
101 info. */
102#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
103#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
104#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~1))
105
106/* glue functions (see the init function for details) */
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000107static PyObject* elementtree_parseerror_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000108static PyObject* elementtree_deepcopy_obj;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000109static PyObject* elementtree_iter_obj;
110static PyObject* elementtree_itertext_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000111static PyObject* elementpath_obj;
112
113/* helpers */
114
115LOCAL(PyObject*)
116deepcopy(PyObject* object, PyObject* memo)
117{
118 /* do a deep copy of the given object */
119
120 PyObject* args;
121 PyObject* result;
122
123 if (!elementtree_deepcopy_obj) {
124 PyErr_SetString(
125 PyExc_RuntimeError,
126 "deepcopy helper not found"
127 );
128 return NULL;
129 }
130
131 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000132 if (!args)
133 return NULL;
134
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000135 Py_INCREF(object); PyTuple_SET_ITEM(args, 0, (PyObject*) object);
136 Py_INCREF(memo); PyTuple_SET_ITEM(args, 1, (PyObject*) memo);
137
138 result = PyObject_CallObject(elementtree_deepcopy_obj, args);
139
140 Py_DECREF(args);
141
142 return result;
143}
144
145LOCAL(PyObject*)
146list_join(PyObject* list)
147{
148 /* join list elements (destroying the list in the process) */
149
150 PyObject* joiner;
151 PyObject* function;
152 PyObject* args;
153 PyObject* result;
154
155 switch (PyList_GET_SIZE(list)) {
156 case 0:
157 Py_DECREF(list);
Christian Heimes72b710a2008-05-26 13:28:38 +0000158 return PyBytes_FromString("");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000159 case 1:
160 result = PyList_GET_ITEM(list, 0);
161 Py_INCREF(result);
162 Py_DECREF(list);
163 return result;
164 }
165
166 /* two or more elements: slice out a suitable separator from the
167 first member, and use that to join the entire list */
168
169 joiner = PySequence_GetSlice(PyList_GET_ITEM(list, 0), 0, 0);
170 if (!joiner)
171 return NULL;
172
173 function = PyObject_GetAttrString(joiner, "join");
174 if (!function) {
175 Py_DECREF(joiner);
176 return NULL;
177 }
178
179 args = PyTuple_New(1);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000180 if (!args)
181 return NULL;
182
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000183 PyTuple_SET_ITEM(args, 0, list);
184
185 result = PyObject_CallObject(function, args);
186
187 Py_DECREF(args); /* also removes list */
188 Py_DECREF(function);
189 Py_DECREF(joiner);
190
191 return result;
192}
193
Eli Bendersky48d358b2012-05-30 17:57:50 +0300194/* Is the given object an empty dictionary?
195*/
196static int
197is_empty_dict(PyObject *obj)
198{
199 return PyDict_CheckExact(obj) && PyDict_Size(obj) == 0;
200}
201
202
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000203/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200204/* the Element type */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000205
206typedef struct {
207
208 /* attributes (a dictionary object), or None if no attributes */
209 PyObject* attrib;
210
211 /* child elements */
212 int length; /* actual number of items */
213 int allocated; /* allocated items */
214
215 /* this either points to _children or to a malloced buffer */
216 PyObject* *children;
217
218 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100219
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000220} ElementObjectExtra;
221
222typedef struct {
223 PyObject_HEAD
224
225 /* element tag (a string). */
226 PyObject* tag;
227
228 /* text before first child. note that this is a tagged pointer;
229 use JOIN_OBJ to get the object pointer. the join flag is used
230 to distinguish lists created by the tree builder from lists
231 assigned to the attribute by application code; the former
232 should be joined before being returned to the user, the latter
233 should be left intact. */
234 PyObject* text;
235
236 /* text after this element, in parent. note that this is a tagged
237 pointer; use JOIN_OBJ to get the object pointer. */
238 PyObject* tail;
239
240 ElementObjectExtra* extra;
241
Eli Benderskyebf37a22012-04-03 22:02:37 +0300242 PyObject *weakreflist; /* For tp_weaklistoffset */
243
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000244} ElementObject;
245
Neal Norwitz227b5332006-03-22 09:28:35 +0000246static PyTypeObject Element_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000247
Christian Heimes90aa7642007-12-19 02:45:37 +0000248#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000249
250/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200251/* Element constructors and destructor */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000252
253LOCAL(int)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200254create_extra(ElementObject* self, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000255{
256 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
257 if (!self->extra)
258 return -1;
259
260 if (!attrib)
261 attrib = Py_None;
262
263 Py_INCREF(attrib);
264 self->extra->attrib = attrib;
265
266 self->extra->length = 0;
267 self->extra->allocated = STATIC_CHILDREN;
268 self->extra->children = self->extra->_children;
269
270 return 0;
271}
272
273LOCAL(void)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200274dealloc_extra(ElementObject* self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000275{
Eli Bendersky08b85292012-04-04 15:55:07 +0300276 ElementObjectExtra *myextra;
277 int i;
278
Eli Benderskyebf37a22012-04-03 22:02:37 +0300279 if (!self->extra)
280 return;
281
282 /* Avoid DECREFs calling into this code again (cycles, etc.)
283 */
Eli Bendersky08b85292012-04-04 15:55:07 +0300284 myextra = self->extra;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300285 self->extra = NULL;
286
287 Py_DECREF(myextra->attrib);
288
Eli Benderskyebf37a22012-04-03 22:02:37 +0300289 for (i = 0; i < myextra->length; i++)
290 Py_DECREF(myextra->children[i]);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000291
Eli Benderskyebf37a22012-04-03 22:02:37 +0300292 if (myextra->children != myextra->_children)
293 PyObject_Free(myextra->children);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000294
Eli Benderskyebf37a22012-04-03 22:02:37 +0300295 PyObject_Free(myextra);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000296}
297
Eli Bendersky092af1f2012-03-04 07:14:03 +0200298/* Convenience internal function to create new Element objects with the given
299 * tag and attributes.
300*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000301LOCAL(PyObject*)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200302create_new_element(PyObject* tag, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000303{
304 ElementObject* self;
305
Eli Bendersky0192ba32012-03-30 16:38:33 +0300306 self = PyObject_GC_New(ElementObject, &Element_Type);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000307 if (self == NULL)
308 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000309 self->extra = NULL;
310
Eli Bendersky48d358b2012-05-30 17:57:50 +0300311 if (attrib != Py_None && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200312 if (create_extra(self, attrib) < 0) {
Thomas Wouters477c8d52006-05-27 19:21:47 +0000313 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000314 return NULL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000315 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000316 }
317
318 Py_INCREF(tag);
319 self->tag = tag;
320
321 Py_INCREF(Py_None);
322 self->text = Py_None;
323
324 Py_INCREF(Py_None);
325 self->tail = Py_None;
326
Eli Benderskyebf37a22012-04-03 22:02:37 +0300327 self->weakreflist = NULL;
328
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000329 ALLOC(sizeof(ElementObject), "create element");
Eli Bendersky0192ba32012-03-30 16:38:33 +0300330 PyObject_GC_Track(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000331 return (PyObject*) self;
332}
333
Eli Bendersky092af1f2012-03-04 07:14:03 +0200334static PyObject *
335element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
336{
337 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
338 if (e != NULL) {
339 Py_INCREF(Py_None);
340 e->tag = Py_None;
341
342 Py_INCREF(Py_None);
343 e->text = Py_None;
344
345 Py_INCREF(Py_None);
346 e->tail = Py_None;
347
348 e->extra = NULL;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300349 e->weakreflist = NULL;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200350 }
351 return (PyObject *)e;
352}
353
Eli Bendersky737b1732012-05-29 06:02:56 +0300354/* Helper function for extracting the attrib dictionary from a keywords dict.
355 * This is required by some constructors/functions in this module that can
356 * either accept attrib as a keyword argument or all attributes splashed
357 * directly into *kwds.
358 * If there is no 'attrib' keyword, return an empty dict.
359 */
360static PyObject*
361get_attrib_from_keywords(PyObject *kwds)
362{
363 PyObject *attrib_str = PyUnicode_FromString("attrib");
364 PyObject *attrib = PyDict_GetItem(kwds, attrib_str);
365
366 if (attrib) {
367 /* If attrib was found in kwds, copy its value and remove it from
368 * kwds
369 */
370 if (!PyDict_Check(attrib)) {
371 Py_DECREF(attrib_str);
372 PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
373 Py_TYPE(attrib)->tp_name);
374 return NULL;
375 }
376 attrib = PyDict_Copy(attrib);
377 PyDict_DelItem(kwds, attrib_str);
378 } else {
379 attrib = PyDict_New();
380 }
381
382 Py_DECREF(attrib_str);
383
384 if (attrib)
385 PyDict_Update(attrib, kwds);
386 return attrib;
387}
388
Eli Bendersky092af1f2012-03-04 07:14:03 +0200389static int
390element_init(PyObject *self, PyObject *args, PyObject *kwds)
391{
392 PyObject *tag;
393 PyObject *tmp;
394 PyObject *attrib = NULL;
395 ElementObject *self_elem;
396
397 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
398 return -1;
399
Eli Bendersky737b1732012-05-29 06:02:56 +0300400 if (attrib) {
401 /* attrib passed as positional arg */
402 attrib = PyDict_Copy(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200403 if (!attrib)
404 return -1;
Eli Bendersky737b1732012-05-29 06:02:56 +0300405 if (kwds) {
406 if (PyDict_Update(attrib, kwds) < 0) {
407 return -1;
408 }
409 }
410 } else if (kwds) {
411 /* have keywords args */
412 attrib = get_attrib_from_keywords(kwds);
413 if (!attrib)
414 return -1;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200415 } else {
Eli Bendersky737b1732012-05-29 06:02:56 +0300416 /* no attrib arg, no kwds, so no attributes */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200417 Py_INCREF(Py_None);
418 attrib = Py_None;
419 }
420
421 self_elem = (ElementObject *)self;
422
Eli Bendersky48d358b2012-05-30 17:57:50 +0300423 if (attrib != Py_None && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200424 if (create_extra(self_elem, attrib) < 0) {
425 PyObject_Del(self_elem);
426 return -1;
427 }
428 }
429
Eli Bendersky48d358b2012-05-30 17:57:50 +0300430 /* We own a reference to attrib here and it's no longer needed. */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200431 Py_DECREF(attrib);
432
433 /* Replace the objects already pointed to by tag, text and tail. */
434 tmp = self_elem->tag;
435 self_elem->tag = tag;
436 Py_INCREF(tag);
437 Py_DECREF(tmp);
438
439 tmp = self_elem->text;
440 self_elem->text = Py_None;
441 Py_INCREF(Py_None);
442 Py_DECREF(JOIN_OBJ(tmp));
443
444 tmp = self_elem->tail;
445 self_elem->tail = Py_None;
446 Py_INCREF(Py_None);
447 Py_DECREF(JOIN_OBJ(tmp));
448
449 return 0;
450}
451
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000452LOCAL(int)
453element_resize(ElementObject* self, int extra)
454{
455 int size;
456 PyObject* *children;
457
458 /* make sure self->children can hold the given number of extra
459 elements. set an exception and return -1 if allocation failed */
460
461 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200462 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000463
464 size = self->extra->length + extra;
465
466 if (size > self->extra->allocated) {
467 /* use Python 2.4's list growth strategy */
468 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000469 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100470 * which needs at least 4 bytes.
471 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000472 * be safe.
473 */
474 size = size ? size : 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000475 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000476 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100477 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000478 * false alarm always assume at least one child to be safe.
479 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000480 children = PyObject_Realloc(self->extra->children,
481 size * sizeof(PyObject*));
482 if (!children)
483 goto nomemory;
484 } else {
485 children = PyObject_Malloc(size * sizeof(PyObject*));
486 if (!children)
487 goto nomemory;
488 /* copy existing children from static area to malloc buffer */
489 memcpy(children, self->extra->children,
490 self->extra->length * sizeof(PyObject*));
491 }
492 self->extra->children = children;
493 self->extra->allocated = size;
494 }
495
496 return 0;
497
498 nomemory:
499 PyErr_NoMemory();
500 return -1;
501}
502
503LOCAL(int)
504element_add_subelement(ElementObject* self, PyObject* element)
505{
506 /* add a child element to a parent */
507
508 if (element_resize(self, 1) < 0)
509 return -1;
510
511 Py_INCREF(element);
512 self->extra->children[self->extra->length] = element;
513
514 self->extra->length++;
515
516 return 0;
517}
518
519LOCAL(PyObject*)
520element_get_attrib(ElementObject* self)
521{
522 /* return borrowed reference to attrib dictionary */
523 /* note: this function assumes that the extra section exists */
524
525 PyObject* res = self->extra->attrib;
526
527 if (res == Py_None) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000528 Py_DECREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000529 /* create missing dictionary */
530 res = PyDict_New();
531 if (!res)
532 return NULL;
533 self->extra->attrib = res;
534 }
535
536 return res;
537}
538
539LOCAL(PyObject*)
540element_get_text(ElementObject* self)
541{
542 /* return borrowed reference to text attribute */
543
544 PyObject* res = self->text;
545
546 if (JOIN_GET(res)) {
547 res = JOIN_OBJ(res);
548 if (PyList_CheckExact(res)) {
549 res = list_join(res);
550 if (!res)
551 return NULL;
552 self->text = res;
553 }
554 }
555
556 return res;
557}
558
559LOCAL(PyObject*)
560element_get_tail(ElementObject* self)
561{
562 /* return borrowed reference to text attribute */
563
564 PyObject* res = self->tail;
565
566 if (JOIN_GET(res)) {
567 res = JOIN_OBJ(res);
568 if (PyList_CheckExact(res)) {
569 res = list_join(res);
570 if (!res)
571 return NULL;
572 self->tail = res;
573 }
574 }
575
576 return res;
577}
578
579static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300580subelement(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000581{
582 PyObject* elem;
583
584 ElementObject* parent;
585 PyObject* tag;
586 PyObject* attrib = NULL;
587 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
588 &Element_Type, &parent, &tag,
589 &PyDict_Type, &attrib))
590 return NULL;
591
Eli Bendersky737b1732012-05-29 06:02:56 +0300592 if (attrib) {
593 /* attrib passed as positional arg */
594 attrib = PyDict_Copy(attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000595 if (!attrib)
596 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300597 if (kwds) {
598 if (PyDict_Update(attrib, kwds) < 0) {
599 return NULL;
600 }
601 }
602 } else if (kwds) {
603 /* have keyword args */
604 attrib = get_attrib_from_keywords(kwds);
605 if (!attrib)
606 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000607 } else {
Eli Bendersky737b1732012-05-29 06:02:56 +0300608 /* no attrib arg, no kwds, so no attribute */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000609 Py_INCREF(Py_None);
610 attrib = Py_None;
611 }
612
Eli Bendersky092af1f2012-03-04 07:14:03 +0200613 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000614
615 Py_DECREF(attrib);
616
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000617 if (element_add_subelement(parent, elem) < 0) {
618 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000619 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000620 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000621
622 return elem;
623}
624
Eli Bendersky0192ba32012-03-30 16:38:33 +0300625static int
626element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
627{
628 Py_VISIT(self->tag);
629 Py_VISIT(JOIN_OBJ(self->text));
630 Py_VISIT(JOIN_OBJ(self->tail));
631
632 if (self->extra) {
633 int i;
634 Py_VISIT(self->extra->attrib);
635
636 for (i = 0; i < self->extra->length; ++i)
637 Py_VISIT(self->extra->children[i]);
638 }
639 return 0;
640}
641
642static int
643element_gc_clear(ElementObject *self)
644{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300645 Py_CLEAR(self->tag);
Eli Benderskyebf37a22012-04-03 22:02:37 +0300646
647 /* The following is like Py_CLEAR for self->text and self->tail, but
648 * written explicitily because the real pointers hide behind access
649 * macros.
650 */
651 if (self->text) {
652 PyObject *tmp = JOIN_OBJ(self->text);
653 self->text = NULL;
654 Py_DECREF(tmp);
655 }
656
657 if (self->tail) {
658 PyObject *tmp = JOIN_OBJ(self->tail);
659 self->tail = NULL;
660 Py_DECREF(tmp);
661 }
Eli Bendersky0192ba32012-03-30 16:38:33 +0300662
663 /* After dropping all references from extra, it's no longer valid anyway,
Eli Benderskyebf37a22012-04-03 22:02:37 +0300664 * so fully deallocate it.
Eli Bendersky0192ba32012-03-30 16:38:33 +0300665 */
Eli Benderskyebf37a22012-04-03 22:02:37 +0300666 dealloc_extra(self);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300667 return 0;
668}
669
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000670static void
671element_dealloc(ElementObject* self)
672{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300673 PyObject_GC_UnTrack(self);
Eli Benderskyebf37a22012-04-03 22:02:37 +0300674
675 if (self->weakreflist != NULL)
676 PyObject_ClearWeakRefs((PyObject *) self);
677
Eli Bendersky0192ba32012-03-30 16:38:33 +0300678 /* element_gc_clear clears all references and deallocates extra
679 */
680 element_gc_clear(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000681
682 RELEASE(sizeof(ElementObject), "destroy element");
Eli Bendersky092af1f2012-03-04 07:14:03 +0200683 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000684}
685
686/* -------------------------------------------------------------------- */
687/* methods (in alphabetical order) */
688
689static PyObject*
690element_append(ElementObject* self, PyObject* args)
691{
692 PyObject* element;
693 if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element))
694 return NULL;
695
696 if (element_add_subelement(self, element) < 0)
697 return NULL;
698
699 Py_RETURN_NONE;
700}
701
702static PyObject*
Eli Bendersky0192ba32012-03-30 16:38:33 +0300703element_clearmethod(ElementObject* self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000704{
705 if (!PyArg_ParseTuple(args, ":clear"))
706 return NULL;
707
Eli Benderskyebf37a22012-04-03 22:02:37 +0300708 dealloc_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000709
710 Py_INCREF(Py_None);
711 Py_DECREF(JOIN_OBJ(self->text));
712 self->text = Py_None;
713
714 Py_INCREF(Py_None);
715 Py_DECREF(JOIN_OBJ(self->tail));
716 self->tail = Py_None;
717
718 Py_RETURN_NONE;
719}
720
721static PyObject*
722element_copy(ElementObject* self, PyObject* args)
723{
724 int i;
725 ElementObject* element;
726
727 if (!PyArg_ParseTuple(args, ":__copy__"))
728 return NULL;
729
Eli Bendersky092af1f2012-03-04 07:14:03 +0200730 element = (ElementObject*) create_new_element(
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000731 self->tag, (self->extra) ? self->extra->attrib : Py_None
732 );
733 if (!element)
734 return NULL;
735
736 Py_DECREF(JOIN_OBJ(element->text));
737 element->text = self->text;
738 Py_INCREF(JOIN_OBJ(element->text));
739
740 Py_DECREF(JOIN_OBJ(element->tail));
741 element->tail = self->tail;
742 Py_INCREF(JOIN_OBJ(element->tail));
743
744 if (self->extra) {
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100745
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000746 if (element_resize(element, self->extra->length) < 0) {
747 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000748 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000749 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000750
751 for (i = 0; i < self->extra->length; i++) {
752 Py_INCREF(self->extra->children[i]);
753 element->extra->children[i] = self->extra->children[i];
754 }
755
756 element->extra->length = self->extra->length;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100757
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000758 }
759
760 return (PyObject*) element;
761}
762
763static PyObject*
764element_deepcopy(ElementObject* self, PyObject* args)
765{
766 int i;
767 ElementObject* element;
768 PyObject* tag;
769 PyObject* attrib;
770 PyObject* text;
771 PyObject* tail;
772 PyObject* id;
773
774 PyObject* memo;
775 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
776 return NULL;
777
778 tag = deepcopy(self->tag, memo);
779 if (!tag)
780 return NULL;
781
782 if (self->extra) {
783 attrib = deepcopy(self->extra->attrib, memo);
784 if (!attrib) {
785 Py_DECREF(tag);
786 return NULL;
787 }
788 } else {
789 Py_INCREF(Py_None);
790 attrib = Py_None;
791 }
792
Eli Bendersky092af1f2012-03-04 07:14:03 +0200793 element = (ElementObject*) create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000794
795 Py_DECREF(tag);
796 Py_DECREF(attrib);
797
798 if (!element)
799 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100800
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000801 text = deepcopy(JOIN_OBJ(self->text), memo);
802 if (!text)
803 goto error;
804 Py_DECREF(element->text);
805 element->text = JOIN_SET(text, JOIN_GET(self->text));
806
807 tail = deepcopy(JOIN_OBJ(self->tail), memo);
808 if (!tail)
809 goto error;
810 Py_DECREF(element->tail);
811 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
812
813 if (self->extra) {
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100814
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000815 if (element_resize(element, self->extra->length) < 0)
816 goto error;
817
818 for (i = 0; i < self->extra->length; i++) {
819 PyObject* child = deepcopy(self->extra->children[i], memo);
820 if (!child) {
821 element->extra->length = i;
822 goto error;
823 }
824 element->extra->children[i] = child;
825 }
826
827 element->extra->length = self->extra->length;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100828
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000829 }
830
831 /* add object to memo dictionary (so deepcopy won't visit it again) */
Christian Heimes217cfd12007-12-02 14:31:20 +0000832 id = PyLong_FromLong((Py_uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000833 if (!id)
834 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000835
836 i = PyDict_SetItem(memo, id, (PyObject*) element);
837
838 Py_DECREF(id);
839
840 if (i < 0)
841 goto error;
842
843 return (PyObject*) element;
844
845 error:
846 Py_DECREF(element);
847 return NULL;
848}
849
850LOCAL(int)
851checkpath(PyObject* tag)
852{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000853 Py_ssize_t i;
854 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000855
856 /* check if a tag contains an xpath character */
857
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000858#define PATHCHAR(ch) \
859 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000860
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000861 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200862 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
863 void *data = PyUnicode_DATA(tag);
864 unsigned int kind = PyUnicode_KIND(tag);
865 for (i = 0; i < len; i++) {
866 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
867 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000868 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200869 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000870 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200871 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000872 return 1;
873 }
874 return 0;
875 }
Christian Heimes72b710a2008-05-26 13:28:38 +0000876 if (PyBytes_Check(tag)) {
877 char *p = PyBytes_AS_STRING(tag);
878 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000879 if (p[i] == '{')
880 check = 0;
881 else if (p[i] == '}')
882 check = 1;
883 else if (check && PATHCHAR(p[i]))
884 return 1;
885 }
886 return 0;
887 }
888
889 return 1; /* unknown type; might be path expression */
890}
891
892static PyObject*
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000893element_extend(ElementObject* self, PyObject* args)
894{
895 PyObject* seq;
896 Py_ssize_t i, seqlen = 0;
897
898 PyObject* seq_in;
899 if (!PyArg_ParseTuple(args, "O:extend", &seq_in))
900 return NULL;
901
902 seq = PySequence_Fast(seq_in, "");
903 if (!seq) {
904 PyErr_Format(
905 PyExc_TypeError,
906 "expected sequence, not \"%.200s\"", Py_TYPE(seq_in)->tp_name
907 );
908 return NULL;
909 }
910
911 seqlen = PySequence_Size(seq);
912 for (i = 0; i < seqlen; i++) {
913 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
Eli Bendersky396e8fc2012-03-23 14:24:20 +0200914 if (!PyObject_IsInstance(element, (PyObject *)&Element_Type)) {
915 Py_DECREF(seq);
916 PyErr_Format(
917 PyExc_TypeError,
918 "expected an Element, not \"%.200s\"",
919 Py_TYPE(element)->tp_name);
920 return NULL;
921 }
922
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000923 if (element_add_subelement(self, element) < 0) {
924 Py_DECREF(seq);
925 return NULL;
926 }
927 }
928
929 Py_DECREF(seq);
930
931 Py_RETURN_NONE;
932}
933
934static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300935element_find(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000936{
937 int i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000938 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000939 PyObject* namespaces = Py_None;
Eli Bendersky737b1732012-05-29 06:02:56 +0300940 static char *kwlist[] = {"path", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200941
Eli Bendersky737b1732012-05-29 06:02:56 +0300942 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:find", kwlist,
943 &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000944 return NULL;
945
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200946 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200947 _Py_IDENTIFIER(find);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200948 return _PyObject_CallMethodId(
949 elementpath_obj, &PyId_find, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000950 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200951 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000952
953 if (!self->extra)
954 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100955
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000956 for (i = 0; i < self->extra->length; i++) {
957 PyObject* item = self->extra->children[i];
958 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +0000959 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000960 Py_INCREF(item);
961 return item;
962 }
963 }
964
965 Py_RETURN_NONE;
966}
967
968static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300969element_findtext(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000970{
971 int i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000972 PyObject* tag;
973 PyObject* default_value = Py_None;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000974 PyObject* namespaces = Py_None;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200975 _Py_IDENTIFIER(findtext);
Eli Bendersky737b1732012-05-29 06:02:56 +0300976 static char *kwlist[] = {"path", "default", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200977
Eli Bendersky737b1732012-05-29 06:02:56 +0300978 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO:findtext", kwlist,
979 &tag, &default_value, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000980 return NULL;
981
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000982 if (checkpath(tag) || namespaces != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200983 return _PyObject_CallMethodId(
984 elementpath_obj, &PyId_findtext, "OOOO", self, tag, default_value, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000985 );
986
987 if (!self->extra) {
988 Py_INCREF(default_value);
989 return default_value;
990 }
991
992 for (i = 0; i < self->extra->length; i++) {
993 ElementObject* item = (ElementObject*) self->extra->children[i];
Mark Dickinson211c6252009-02-01 10:28:51 +0000994 if (Element_CheckExact(item) && (PyObject_RichCompareBool(item->tag, tag, Py_EQ) == 1)) {
995
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000996 PyObject* text = element_get_text(item);
997 if (text == Py_None)
Christian Heimes72b710a2008-05-26 13:28:38 +0000998 return PyBytes_FromString("");
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000999 Py_XINCREF(text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001000 return text;
1001 }
1002 }
1003
1004 Py_INCREF(default_value);
1005 return default_value;
1006}
1007
1008static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001009element_findall(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001010{
1011 int i;
1012 PyObject* out;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001013 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001014 PyObject* namespaces = Py_None;
Eli Bendersky737b1732012-05-29 06:02:56 +03001015 static char *kwlist[] = {"path", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001016
Eli Bendersky737b1732012-05-29 06:02:56 +03001017 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:findall", kwlist,
1018 &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001019 return NULL;
1020
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001021 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001022 _Py_IDENTIFIER(findall);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001023 return _PyObject_CallMethodId(
1024 elementpath_obj, &PyId_findall, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001025 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001026 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001027
1028 out = PyList_New(0);
1029 if (!out)
1030 return NULL;
1031
1032 if (!self->extra)
1033 return out;
1034
1035 for (i = 0; i < self->extra->length; i++) {
1036 PyObject* item = self->extra->children[i];
1037 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +00001038 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001039 if (PyList_Append(out, item) < 0) {
1040 Py_DECREF(out);
1041 return NULL;
1042 }
1043 }
1044 }
1045
1046 return out;
1047}
1048
1049static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001050element_iterfind(ElementObject *self, PyObject *args, PyObject *kwds)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001051{
1052 PyObject* tag;
1053 PyObject* namespaces = Py_None;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001054 _Py_IDENTIFIER(iterfind);
Eli Bendersky737b1732012-05-29 06:02:56 +03001055 static char *kwlist[] = {"path", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001056
Eli Bendersky737b1732012-05-29 06:02:56 +03001057 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:iterfind", kwlist,
1058 &tag, &namespaces))
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001059 return NULL;
1060
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001061 return _PyObject_CallMethodId(
1062 elementpath_obj, &PyId_iterfind, "OOO", self, tag, namespaces
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001063 );
1064}
1065
1066static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001067element_get(ElementObject* self, PyObject* args)
1068{
1069 PyObject* value;
1070
1071 PyObject* key;
1072 PyObject* default_value = Py_None;
1073 if (!PyArg_ParseTuple(args, "O|O:get", &key, &default_value))
1074 return NULL;
1075
1076 if (!self->extra || self->extra->attrib == Py_None)
1077 value = default_value;
1078 else {
1079 value = PyDict_GetItem(self->extra->attrib, key);
1080 if (!value)
1081 value = default_value;
1082 }
1083
1084 Py_INCREF(value);
1085 return value;
1086}
1087
1088static PyObject*
1089element_getchildren(ElementObject* self, PyObject* args)
1090{
1091 int i;
1092 PyObject* list;
1093
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001094 /* FIXME: report as deprecated? */
1095
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001096 if (!PyArg_ParseTuple(args, ":getchildren"))
1097 return NULL;
1098
1099 if (!self->extra)
1100 return PyList_New(0);
1101
1102 list = PyList_New(self->extra->length);
1103 if (!list)
1104 return NULL;
1105
1106 for (i = 0; i < self->extra->length; i++) {
1107 PyObject* item = self->extra->children[i];
1108 Py_INCREF(item);
1109 PyList_SET_ITEM(list, i, item);
1110 }
1111
1112 return list;
1113}
1114
1115static PyObject*
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001116element_iter(ElementObject* self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001117{
1118 PyObject* result;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001119
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001120 PyObject* tag = Py_None;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001121 if (!PyArg_ParseTuple(args, "|O:iter", &tag))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001122 return NULL;
1123
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001124 if (!elementtree_iter_obj) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001125 PyErr_SetString(
1126 PyExc_RuntimeError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001127 "iter helper not found"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001128 );
1129 return NULL;
1130 }
1131
1132 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001133 if (!args)
1134 return NULL;
Neal Norwitz02876df2006-02-07 06:58:52 +00001135
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001136 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
1137 Py_INCREF(tag); PyTuple_SET_ITEM(args, 1, (PyObject*) tag);
1138
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001139 result = PyObject_CallObject(elementtree_iter_obj, args);
1140
1141 Py_DECREF(args);
1142
1143 return result;
1144}
1145
1146
1147static PyObject*
1148element_itertext(ElementObject* self, PyObject* args)
1149{
1150 PyObject* result;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001151
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001152 if (!PyArg_ParseTuple(args, ":itertext"))
1153 return NULL;
1154
1155 if (!elementtree_itertext_obj) {
1156 PyErr_SetString(
1157 PyExc_RuntimeError,
1158 "itertext helper not found"
1159 );
1160 return NULL;
1161 }
1162
1163 args = PyTuple_New(1);
1164 if (!args)
1165 return NULL;
1166
1167 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
1168
1169 result = PyObject_CallObject(elementtree_itertext_obj, args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001170
1171 Py_DECREF(args);
1172
1173 return result;
1174}
1175
1176static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001177element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001178{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001179 ElementObject* self = (ElementObject*) self_;
1180
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001181 if (!self->extra || index < 0 || index >= self->extra->length) {
1182 PyErr_SetString(
1183 PyExc_IndexError,
1184 "child index out of range"
1185 );
1186 return NULL;
1187 }
1188
1189 Py_INCREF(self->extra->children[index]);
1190 return self->extra->children[index];
1191}
1192
1193static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001194element_insert(ElementObject* self, PyObject* args)
1195{
1196 int i;
1197
1198 int index;
1199 PyObject* element;
1200 if (!PyArg_ParseTuple(args, "iO!:insert", &index,
1201 &Element_Type, &element))
1202 return NULL;
1203
1204 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001205 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001206
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001207 if (index < 0) {
1208 index += self->extra->length;
1209 if (index < 0)
1210 index = 0;
1211 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001212 if (index > self->extra->length)
1213 index = self->extra->length;
1214
1215 if (element_resize(self, 1) < 0)
1216 return NULL;
1217
1218 for (i = self->extra->length; i > index; i--)
1219 self->extra->children[i] = self->extra->children[i-1];
1220
1221 Py_INCREF(element);
1222 self->extra->children[index] = element;
1223
1224 self->extra->length++;
1225
1226 Py_RETURN_NONE;
1227}
1228
1229static PyObject*
1230element_items(ElementObject* self, PyObject* args)
1231{
1232 if (!PyArg_ParseTuple(args, ":items"))
1233 return NULL;
1234
1235 if (!self->extra || self->extra->attrib == Py_None)
1236 return PyList_New(0);
1237
1238 return PyDict_Items(self->extra->attrib);
1239}
1240
1241static PyObject*
1242element_keys(ElementObject* self, PyObject* args)
1243{
1244 if (!PyArg_ParseTuple(args, ":keys"))
1245 return NULL;
1246
1247 if (!self->extra || self->extra->attrib == Py_None)
1248 return PyList_New(0);
1249
1250 return PyDict_Keys(self->extra->attrib);
1251}
1252
Martin v. Löwis18e16552006-02-15 17:27:45 +00001253static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001254element_length(ElementObject* self)
1255{
1256 if (!self->extra)
1257 return 0;
1258
1259 return self->extra->length;
1260}
1261
1262static PyObject*
1263element_makeelement(PyObject* self, PyObject* args, PyObject* kw)
1264{
1265 PyObject* elem;
1266
1267 PyObject* tag;
1268 PyObject* attrib;
1269 if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib))
1270 return NULL;
1271
1272 attrib = PyDict_Copy(attrib);
1273 if (!attrib)
1274 return NULL;
1275
Eli Bendersky092af1f2012-03-04 07:14:03 +02001276 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001277
1278 Py_DECREF(attrib);
1279
1280 return elem;
1281}
1282
1283static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001284element_remove(ElementObject* self, PyObject* args)
1285{
1286 int i;
1287
1288 PyObject* element;
1289 if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element))
1290 return NULL;
1291
1292 if (!self->extra) {
1293 /* element has no children, so raise exception */
1294 PyErr_SetString(
1295 PyExc_ValueError,
1296 "list.remove(x): x not in list"
1297 );
1298 return NULL;
1299 }
1300
1301 for (i = 0; i < self->extra->length; i++) {
1302 if (self->extra->children[i] == element)
1303 break;
Mark Dickinson211c6252009-02-01 10:28:51 +00001304 if (PyObject_RichCompareBool(self->extra->children[i], element, Py_EQ) == 1)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001305 break;
1306 }
1307
1308 if (i == self->extra->length) {
1309 /* element is not in children, so raise exception */
1310 PyErr_SetString(
1311 PyExc_ValueError,
1312 "list.remove(x): x not in list"
1313 );
1314 return NULL;
1315 }
1316
1317 Py_DECREF(self->extra->children[i]);
1318
1319 self->extra->length--;
1320
1321 for (; i < self->extra->length; i++)
1322 self->extra->children[i] = self->extra->children[i+1];
1323
1324 Py_RETURN_NONE;
1325}
1326
1327static PyObject*
1328element_repr(ElementObject* self)
1329{
Eli Bendersky092af1f2012-03-04 07:14:03 +02001330 if (self->tag)
1331 return PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1332 else
1333 return PyUnicode_FromFormat("<Element at %p>", self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001334}
1335
1336static PyObject*
1337element_set(ElementObject* self, PyObject* args)
1338{
1339 PyObject* attrib;
1340
1341 PyObject* key;
1342 PyObject* value;
1343 if (!PyArg_ParseTuple(args, "OO:set", &key, &value))
1344 return NULL;
1345
1346 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001347 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001348
1349 attrib = element_get_attrib(self);
1350 if (!attrib)
1351 return NULL;
1352
1353 if (PyDict_SetItem(attrib, key, value) < 0)
1354 return NULL;
1355
1356 Py_RETURN_NONE;
1357}
1358
1359static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001360element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001361{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001362 ElementObject* self = (ElementObject*) self_;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001363 int i;
1364 PyObject* old;
1365
1366 if (!self->extra || index < 0 || index >= self->extra->length) {
1367 PyErr_SetString(
1368 PyExc_IndexError,
1369 "child assignment index out of range");
1370 return -1;
1371 }
1372
1373 old = self->extra->children[index];
1374
1375 if (item) {
1376 Py_INCREF(item);
1377 self->extra->children[index] = item;
1378 } else {
1379 self->extra->length--;
1380 for (i = index; i < self->extra->length; i++)
1381 self->extra->children[i] = self->extra->children[i+1];
1382 }
1383
1384 Py_DECREF(old);
1385
1386 return 0;
1387}
1388
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001389static PyObject*
1390element_subscr(PyObject* self_, PyObject* item)
1391{
1392 ElementObject* self = (ElementObject*) self_;
1393
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001394 if (PyIndex_Check(item)) {
1395 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001396
1397 if (i == -1 && PyErr_Occurred()) {
1398 return NULL;
1399 }
1400 if (i < 0 && self->extra)
1401 i += self->extra->length;
1402 return element_getitem(self_, i);
1403 }
1404 else if (PySlice_Check(item)) {
1405 Py_ssize_t start, stop, step, slicelen, cur, i;
1406 PyObject* list;
1407
1408 if (!self->extra)
1409 return PyList_New(0);
1410
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001411 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001412 self->extra->length,
1413 &start, &stop, &step, &slicelen) < 0) {
1414 return NULL;
1415 }
1416
1417 if (slicelen <= 0)
1418 return PyList_New(0);
1419 else {
1420 list = PyList_New(slicelen);
1421 if (!list)
1422 return NULL;
1423
1424 for (cur = start, i = 0; i < slicelen;
1425 cur += step, i++) {
1426 PyObject* item = self->extra->children[cur];
1427 Py_INCREF(item);
1428 PyList_SET_ITEM(list, i, item);
1429 }
1430
1431 return list;
1432 }
1433 }
1434 else {
1435 PyErr_SetString(PyExc_TypeError,
1436 "element indices must be integers");
1437 return NULL;
1438 }
1439}
1440
1441static int
1442element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1443{
1444 ElementObject* self = (ElementObject*) self_;
1445
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001446 if (PyIndex_Check(item)) {
1447 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001448
1449 if (i == -1 && PyErr_Occurred()) {
1450 return -1;
1451 }
1452 if (i < 0 && self->extra)
1453 i += self->extra->length;
1454 return element_setitem(self_, i, value);
1455 }
1456 else if (PySlice_Check(item)) {
1457 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1458
1459 PyObject* recycle = NULL;
1460 PyObject* seq = NULL;
1461
1462 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001463 create_extra(self, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001464
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001465 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001466 self->extra->length,
1467 &start, &stop, &step, &slicelen) < 0) {
1468 return -1;
1469 }
1470
Eli Bendersky865756a2012-03-09 13:38:15 +02001471 if (value == NULL) {
1472 /* Delete slice */
1473 size_t cur;
1474 Py_ssize_t i;
1475
1476 if (slicelen <= 0)
1477 return 0;
1478
1479 /* Since we're deleting, the direction of the range doesn't matter,
1480 * so for simplicity make it always ascending.
1481 */
1482 if (step < 0) {
1483 stop = start + 1;
1484 start = stop + step * (slicelen - 1) - 1;
1485 step = -step;
1486 }
1487
1488 assert((size_t)slicelen <= PY_SIZE_MAX / sizeof(PyObject *));
1489
1490 /* recycle is a list that will contain all the children
1491 * scheduled for removal.
1492 */
1493 if (!(recycle = PyList_New(slicelen))) {
1494 PyErr_NoMemory();
1495 return -1;
1496 }
1497
1498 /* This loop walks over all the children that have to be deleted,
1499 * with cur pointing at them. num_moved is the amount of children
1500 * until the next deleted child that have to be "shifted down" to
1501 * occupy the deleted's places.
1502 * Note that in the ith iteration, shifting is done i+i places down
1503 * because i children were already removed.
1504 */
1505 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1506 /* Compute how many children have to be moved, clipping at the
1507 * list end.
1508 */
1509 Py_ssize_t num_moved = step - 1;
1510 if (cur + step >= (size_t)self->extra->length) {
1511 num_moved = self->extra->length - cur - 1;
1512 }
1513
1514 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1515
1516 memmove(
1517 self->extra->children + cur - i,
1518 self->extra->children + cur + 1,
1519 num_moved * sizeof(PyObject *));
1520 }
1521
1522 /* Leftover "tail" after the last removed child */
1523 cur = start + (size_t)slicelen * step;
1524 if (cur < (size_t)self->extra->length) {
1525 memmove(
1526 self->extra->children + cur - slicelen,
1527 self->extra->children + cur,
1528 (self->extra->length - cur) * sizeof(PyObject *));
1529 }
1530
1531 self->extra->length -= slicelen;
1532
1533 /* Discard the recycle list with all the deleted sub-elements */
1534 Py_XDECREF(recycle);
1535 return 0;
1536 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001537 else {
Eli Bendersky865756a2012-03-09 13:38:15 +02001538 /* A new slice is actually being assigned */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001539 seq = PySequence_Fast(value, "");
1540 if (!seq) {
1541 PyErr_Format(
1542 PyExc_TypeError,
1543 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1544 );
1545 return -1;
1546 }
1547 newlen = PySequence_Size(seq);
1548 }
1549
1550 if (step != 1 && newlen != slicelen)
1551 {
1552 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001553 "attempt to assign sequence of size %zd "
1554 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001555 newlen, slicelen
1556 );
1557 return -1;
1558 }
1559
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001560 /* Resize before creating the recycle bin, to prevent refleaks. */
1561 if (newlen > slicelen) {
1562 if (element_resize(self, newlen - slicelen) < 0) {
1563 if (seq) {
1564 Py_DECREF(seq);
1565 }
1566 return -1;
1567 }
1568 }
1569
1570 if (slicelen > 0) {
1571 /* to avoid recursive calls to this method (via decref), move
1572 old items to the recycle bin here, and get rid of them when
1573 we're done modifying the element */
1574 recycle = PyList_New(slicelen);
1575 if (!recycle) {
1576 if (seq) {
1577 Py_DECREF(seq);
1578 }
1579 return -1;
1580 }
1581 for (cur = start, i = 0; i < slicelen;
1582 cur += step, i++)
1583 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1584 }
1585
1586 if (newlen < slicelen) {
1587 /* delete slice */
1588 for (i = stop; i < self->extra->length; i++)
1589 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1590 } else if (newlen > slicelen) {
1591 /* insert slice */
1592 for (i = self->extra->length-1; i >= stop; i--)
1593 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1594 }
1595
1596 /* replace the slice */
1597 for (cur = start, i = 0; i < newlen;
1598 cur += step, i++) {
1599 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1600 Py_INCREF(element);
1601 self->extra->children[cur] = element;
1602 }
1603
1604 self->extra->length += newlen - slicelen;
1605
1606 if (seq) {
1607 Py_DECREF(seq);
1608 }
1609
1610 /* discard the recycle bin, and everything in it */
1611 Py_XDECREF(recycle);
1612
1613 return 0;
1614 }
1615 else {
1616 PyErr_SetString(PyExc_TypeError,
1617 "element indices must be integers");
1618 return -1;
1619 }
1620}
1621
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001622static PyMethodDef element_methods[] = {
1623
Eli Bendersky0192ba32012-03-30 16:38:33 +03001624 {"clear", (PyCFunction) element_clearmethod, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001625
1626 {"get", (PyCFunction) element_get, METH_VARARGS},
1627 {"set", (PyCFunction) element_set, METH_VARARGS},
1628
Eli Bendersky737b1732012-05-29 06:02:56 +03001629 {"find", (PyCFunction) element_find, METH_VARARGS | METH_KEYWORDS},
1630 {"findtext", (PyCFunction) element_findtext, METH_VARARGS | METH_KEYWORDS},
1631 {"findall", (PyCFunction) element_findall, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001632
1633 {"append", (PyCFunction) element_append, METH_VARARGS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001634 {"extend", (PyCFunction) element_extend, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001635 {"insert", (PyCFunction) element_insert, METH_VARARGS},
1636 {"remove", (PyCFunction) element_remove, METH_VARARGS},
1637
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001638 {"iter", (PyCFunction) element_iter, METH_VARARGS},
1639 {"itertext", (PyCFunction) element_itertext, METH_VARARGS},
Eli Bendersky737b1732012-05-29 06:02:56 +03001640 {"iterfind", (PyCFunction) element_iterfind, METH_VARARGS | METH_KEYWORDS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001641
1642 {"getiterator", (PyCFunction) element_iter, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001643 {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS},
1644
1645 {"items", (PyCFunction) element_items, METH_VARARGS},
1646 {"keys", (PyCFunction) element_keys, METH_VARARGS},
1647
1648 {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS},
1649
1650 {"__copy__", (PyCFunction) element_copy, METH_VARARGS},
1651 {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS},
1652
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001653 {NULL, NULL}
1654};
1655
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001656static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001657element_getattro(ElementObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001658{
1659 PyObject* res;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001660 char *name = "";
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001661
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001662 if (PyUnicode_Check(nameobj))
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001663 name = _PyUnicode_AsString(nameobj);
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001664
Alexander Belopolskye239d232010-12-08 23:31:48 +00001665 if (name == NULL)
1666 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001667
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001668 /* handle common attributes first */
1669 if (strcmp(name, "tag") == 0) {
1670 res = self->tag;
1671 Py_INCREF(res);
1672 return res;
1673 } else if (strcmp(name, "text") == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001674 res = element_get_text(self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001675 Py_INCREF(res);
1676 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001677 }
1678
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001679 /* methods */
1680 res = PyObject_GenericGetAttr((PyObject*) self, nameobj);
1681 if (res)
1682 return res;
1683
1684 /* less common attributes */
1685 if (strcmp(name, "tail") == 0) {
1686 PyErr_Clear();
1687 res = element_get_tail(self);
1688 } else if (strcmp(name, "attrib") == 0) {
1689 PyErr_Clear();
1690 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001691 create_extra(self, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001692 res = element_get_attrib(self);
1693 }
1694
1695 if (!res)
1696 return NULL;
1697
1698 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001699 return res;
1700}
1701
Eli Benderskyb20df952012-05-20 06:33:29 +03001702static PyObject*
1703element_setattro(ElementObject* self, PyObject* nameobj, PyObject* value)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001704{
Eli Benderskyb20df952012-05-20 06:33:29 +03001705 char *name = "";
1706 if (PyUnicode_Check(nameobj))
1707 name = _PyUnicode_AsString(nameobj);
1708
1709 if (name == NULL)
1710 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001711
1712 if (strcmp(name, "tag") == 0) {
1713 Py_DECREF(self->tag);
1714 self->tag = value;
1715 Py_INCREF(self->tag);
1716 } else if (strcmp(name, "text") == 0) {
1717 Py_DECREF(JOIN_OBJ(self->text));
1718 self->text = value;
1719 Py_INCREF(self->text);
1720 } else if (strcmp(name, "tail") == 0) {
1721 Py_DECREF(JOIN_OBJ(self->tail));
1722 self->tail = value;
1723 Py_INCREF(self->tail);
1724 } else if (strcmp(name, "attrib") == 0) {
1725 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001726 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001727 Py_DECREF(self->extra->attrib);
1728 self->extra->attrib = value;
1729 Py_INCREF(self->extra->attrib);
1730 } else {
1731 PyErr_SetString(PyExc_AttributeError, name);
Eli Benderskyb20df952012-05-20 06:33:29 +03001732 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001733 }
1734
Eli Benderskyb20df952012-05-20 06:33:29 +03001735 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001736}
1737
1738static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001739 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001740 0, /* sq_concat */
1741 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001742 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001743 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001744 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001745 0,
1746};
1747
1748static PyMappingMethods element_as_mapping = {
1749 (lenfunc) element_length,
1750 (binaryfunc) element_subscr,
1751 (objobjargproc) element_ass_subscr,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001752};
1753
Neal Norwitz227b5332006-03-22 09:28:35 +00001754static PyTypeObject Element_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001755 PyVarObject_HEAD_INIT(NULL, 0)
1756 "Element", sizeof(ElementObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001757 /* methods */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001758 (destructor)element_dealloc, /* tp_dealloc */
1759 0, /* tp_print */
1760 0, /* tp_getattr */
Eli Benderskyb20df952012-05-20 06:33:29 +03001761 0, /* tp_setattr */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001762 0, /* tp_reserved */
1763 (reprfunc)element_repr, /* tp_repr */
1764 0, /* tp_as_number */
1765 &element_as_sequence, /* tp_as_sequence */
1766 &element_as_mapping, /* tp_as_mapping */
1767 0, /* tp_hash */
1768 0, /* tp_call */
1769 0, /* tp_str */
1770 (getattrofunc)element_getattro, /* tp_getattro */
Eli Benderskyb20df952012-05-20 06:33:29 +03001771 (setattrofunc)element_setattro, /* tp_setattro */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001772 0, /* tp_as_buffer */
Eli Bendersky0192ba32012-03-30 16:38:33 +03001773 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
1774 /* tp_flags */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001775 0, /* tp_doc */
Eli Bendersky0192ba32012-03-30 16:38:33 +03001776 (traverseproc)element_gc_traverse, /* tp_traverse */
1777 (inquiry)element_gc_clear, /* tp_clear */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001778 0, /* tp_richcompare */
Eli Benderskyebf37a22012-04-03 22:02:37 +03001779 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001780 0, /* tp_iter */
1781 0, /* tp_iternext */
1782 element_methods, /* tp_methods */
1783 0, /* tp_members */
1784 0, /* tp_getset */
1785 0, /* tp_base */
1786 0, /* tp_dict */
1787 0, /* tp_descr_get */
1788 0, /* tp_descr_set */
1789 0, /* tp_dictoffset */
1790 (initproc)element_init, /* tp_init */
1791 PyType_GenericAlloc, /* tp_alloc */
1792 element_new, /* tp_new */
1793 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001794};
1795
1796/* ==================================================================== */
1797/* the tree builder type */
1798
1799typedef struct {
1800 PyObject_HEAD
1801
Eli Bendersky58d548d2012-05-29 15:45:16 +03001802 PyObject *root; /* root node (first created node) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001803
Eli Bendersky58d548d2012-05-29 15:45:16 +03001804 ElementObject *this; /* current node */
1805 ElementObject *last; /* most recently created node */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001806
Eli Bendersky58d548d2012-05-29 15:45:16 +03001807 PyObject *data; /* data collector (string or list), or NULL */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001808
Eli Bendersky58d548d2012-05-29 15:45:16 +03001809 PyObject *stack; /* element stack */
1810 Py_ssize_t index; /* current stack size (0 means empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001811
Eli Bendersky48d358b2012-05-30 17:57:50 +03001812 PyObject *element_factory;
1813
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001814 /* element tracing */
Eli Bendersky58d548d2012-05-29 15:45:16 +03001815 PyObject *events; /* list of events, or NULL if not collecting */
1816 PyObject *start_event_obj; /* event objects (NULL to ignore) */
1817 PyObject *end_event_obj;
1818 PyObject *start_ns_event_obj;
1819 PyObject *end_ns_event_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001820} TreeBuilderObject;
1821
Neal Norwitz227b5332006-03-22 09:28:35 +00001822static PyTypeObject TreeBuilder_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001823
Christian Heimes90aa7642007-12-19 02:45:37 +00001824#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001825
1826/* -------------------------------------------------------------------- */
1827/* constructor and destructor */
1828
Eli Bendersky58d548d2012-05-29 15:45:16 +03001829static PyObject *
1830treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001831{
Eli Bendersky58d548d2012-05-29 15:45:16 +03001832 TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
1833 if (t != NULL) {
1834 t->root = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001835
Eli Bendersky58d548d2012-05-29 15:45:16 +03001836 Py_INCREF(Py_None);
1837 t->this = (ElementObject *)Py_None;
1838 Py_INCREF(Py_None);
1839 t->last = (ElementObject *)Py_None;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001840
Eli Bendersky58d548d2012-05-29 15:45:16 +03001841 t->data = NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03001842 t->element_factory = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03001843 t->stack = PyList_New(20);
1844 if (!t->stack) {
1845 Py_DECREF(t->this);
1846 Py_DECREF(t->last);
1847 return NULL;
1848 }
1849 t->index = 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001850
Eli Bendersky58d548d2012-05-29 15:45:16 +03001851 t->events = NULL;
1852 t->start_event_obj = t->end_event_obj = NULL;
1853 t->start_ns_event_obj = t->end_ns_event_obj = NULL;
1854 }
1855 return (PyObject *)t;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001856}
1857
Eli Bendersky58d548d2012-05-29 15:45:16 +03001858static int
1859treebuilder_init(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001860{
Eli Bendersky48d358b2012-05-30 17:57:50 +03001861 static char *kwlist[] = {"element_factory", NULL};
1862 PyObject *element_factory = NULL;
1863 TreeBuilderObject *self_tb = (TreeBuilderObject *)self;
1864
1865 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:TreeBuilder", kwlist,
1866 &element_factory)) {
1867 return -1;
1868 }
1869
1870 if (element_factory) {
1871 Py_INCREF(element_factory);
1872 Py_XDECREF(self_tb->element_factory);
1873 self_tb->element_factory = element_factory;
1874 }
1875
Eli Bendersky58d548d2012-05-29 15:45:16 +03001876 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001877}
1878
Eli Bendersky48d358b2012-05-30 17:57:50 +03001879static int
1880treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
1881{
1882 Py_VISIT(self->root);
1883 Py_VISIT(self->this);
1884 Py_VISIT(self->last);
1885 Py_VISIT(self->data);
1886 Py_VISIT(self->stack);
1887 Py_VISIT(self->element_factory);
1888 return 0;
1889}
1890
1891static int
1892treebuilder_gc_clear(TreeBuilderObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001893{
1894 Py_XDECREF(self->end_ns_event_obj);
1895 Py_XDECREF(self->start_ns_event_obj);
1896 Py_XDECREF(self->end_event_obj);
1897 Py_XDECREF(self->start_event_obj);
1898 Py_XDECREF(self->events);
1899 Py_DECREF(self->stack);
1900 Py_XDECREF(self->data);
1901 Py_DECREF(self->last);
1902 Py_DECREF(self->this);
Eli Bendersky48d358b2012-05-30 17:57:50 +03001903 Py_CLEAR(self->element_factory);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001904 Py_XDECREF(self->root);
Eli Bendersky48d358b2012-05-30 17:57:50 +03001905 return 0;
1906}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001907
Eli Bendersky48d358b2012-05-30 17:57:50 +03001908static void
1909treebuilder_dealloc(TreeBuilderObject *self)
1910{
1911 PyObject_GC_UnTrack(self);
1912 treebuilder_gc_clear(self);
Eli Bendersky58d548d2012-05-29 15:45:16 +03001913 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001914}
1915
1916/* -------------------------------------------------------------------- */
1917/* handlers */
1918
1919LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001920treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
1921 PyObject* attrib)
1922{
1923 PyObject* node;
1924 PyObject* this;
1925
1926 if (self->data) {
1927 if (self->this == self->last) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001928 Py_DECREF(JOIN_OBJ(self->last->text));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001929 self->last->text = JOIN_SET(
1930 self->data, PyList_CheckExact(self->data)
1931 );
1932 } else {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001933 Py_DECREF(JOIN_OBJ(self->last->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001934 self->last->tail = JOIN_SET(
1935 self->data, PyList_CheckExact(self->data)
1936 );
1937 }
1938 self->data = NULL;
1939 }
1940
Eli Bendersky48d358b2012-05-30 17:57:50 +03001941 if (self->element_factory) {
1942 node = PyObject_CallFunction(self->element_factory, "OO", tag, attrib);
1943 } else {
1944 node = create_new_element(tag, attrib);
1945 }
1946 if (!node) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001947 return NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03001948 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001949
1950 this = (PyObject*) self->this;
1951
1952 if (this != Py_None) {
1953 if (element_add_subelement((ElementObject*) this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001954 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001955 } else {
1956 if (self->root) {
1957 PyErr_SetString(
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001958 elementtree_parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001959 "multiple elements on top level"
1960 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001961 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001962 }
1963 Py_INCREF(node);
1964 self->root = node;
1965 }
1966
1967 if (self->index < PyList_GET_SIZE(self->stack)) {
1968 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001969 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001970 Py_INCREF(this);
1971 } else {
1972 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001973 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001974 }
1975 self->index++;
1976
1977 Py_DECREF(this);
1978 Py_INCREF(node);
1979 self->this = (ElementObject*) node;
1980
1981 Py_DECREF(self->last);
1982 Py_INCREF(node);
1983 self->last = (ElementObject*) node;
1984
1985 if (self->start_event_obj) {
1986 PyObject* res;
1987 PyObject* action = self->start_event_obj;
1988 res = PyTuple_New(2);
1989 if (res) {
1990 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
1991 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
1992 PyList_Append(self->events, res);
1993 Py_DECREF(res);
1994 } else
1995 PyErr_Clear(); /* FIXME: propagate error */
1996 }
1997
1998 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001999
2000 error:
2001 Py_DECREF(node);
2002 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002003}
2004
2005LOCAL(PyObject*)
2006treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
2007{
2008 if (!self->data) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00002009 if (self->last == (ElementObject*) Py_None) {
2010 /* ignore calls to data before the first call to start */
2011 Py_RETURN_NONE;
2012 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002013 /* store the first item as is */
2014 Py_INCREF(data); self->data = data;
2015 } else {
2016 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00002017 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
2018 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002019 /* expat often generates single character data sections; handle
2020 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00002021 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
2022 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002023 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00002024 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002025 } else if (PyList_CheckExact(self->data)) {
2026 if (PyList_Append(self->data, data) < 0)
2027 return NULL;
2028 } else {
2029 PyObject* list = PyList_New(2);
2030 if (!list)
2031 return NULL;
2032 PyList_SET_ITEM(list, 0, self->data);
2033 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2034 self->data = list;
2035 }
2036 }
2037
2038 Py_RETURN_NONE;
2039}
2040
2041LOCAL(PyObject*)
2042treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2043{
2044 PyObject* item;
2045
2046 if (self->data) {
2047 if (self->this == self->last) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002048 Py_DECREF(JOIN_OBJ(self->last->text));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002049 self->last->text = JOIN_SET(
2050 self->data, PyList_CheckExact(self->data)
2051 );
2052 } else {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002053 Py_DECREF(JOIN_OBJ(self->last->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002054 self->last->tail = JOIN_SET(
2055 self->data, PyList_CheckExact(self->data)
2056 );
2057 }
2058 self->data = NULL;
2059 }
2060
2061 if (self->index == 0) {
2062 PyErr_SetString(
2063 PyExc_IndexError,
2064 "pop from empty stack"
2065 );
2066 return NULL;
2067 }
2068
2069 self->index--;
2070
2071 item = PyList_GET_ITEM(self->stack, self->index);
2072 Py_INCREF(item);
2073
2074 Py_DECREF(self->last);
2075
2076 self->last = (ElementObject*) self->this;
2077 self->this = (ElementObject*) item;
2078
2079 if (self->end_event_obj) {
2080 PyObject* res;
2081 PyObject* action = self->end_event_obj;
2082 PyObject* node = (PyObject*) self->last;
2083 res = PyTuple_New(2);
2084 if (res) {
2085 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
2086 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
2087 PyList_Append(self->events, res);
2088 Py_DECREF(res);
2089 } else
2090 PyErr_Clear(); /* FIXME: propagate error */
2091 }
2092
2093 Py_INCREF(self->last);
2094 return (PyObject*) self->last;
2095}
2096
2097LOCAL(void)
2098treebuilder_handle_namespace(TreeBuilderObject* self, int start,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002099 PyObject *prefix, PyObject *uri)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002100{
2101 PyObject* res;
2102 PyObject* action;
2103 PyObject* parcel;
2104
2105 if (!self->events)
2106 return;
2107
2108 if (start) {
2109 if (!self->start_ns_event_obj)
2110 return;
2111 action = self->start_ns_event_obj;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002112 parcel = Py_BuildValue("OO", prefix, uri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002113 if (!parcel)
2114 return;
2115 Py_INCREF(action);
2116 } else {
2117 if (!self->end_ns_event_obj)
2118 return;
2119 action = self->end_ns_event_obj;
2120 Py_INCREF(action);
2121 parcel = Py_None;
2122 Py_INCREF(parcel);
2123 }
2124
2125 res = PyTuple_New(2);
2126
2127 if (res) {
2128 PyTuple_SET_ITEM(res, 0, action);
2129 PyTuple_SET_ITEM(res, 1, parcel);
2130 PyList_Append(self->events, res);
2131 Py_DECREF(res);
2132 } else
2133 PyErr_Clear(); /* FIXME: propagate error */
2134}
2135
2136/* -------------------------------------------------------------------- */
2137/* methods (in alphabetical order) */
2138
2139static PyObject*
2140treebuilder_data(TreeBuilderObject* self, PyObject* args)
2141{
2142 PyObject* data;
2143 if (!PyArg_ParseTuple(args, "O:data", &data))
2144 return NULL;
2145
2146 return treebuilder_handle_data(self, data);
2147}
2148
2149static PyObject*
2150treebuilder_end(TreeBuilderObject* self, PyObject* args)
2151{
2152 PyObject* tag;
2153 if (!PyArg_ParseTuple(args, "O:end", &tag))
2154 return NULL;
2155
2156 return treebuilder_handle_end(self, tag);
2157}
2158
2159LOCAL(PyObject*)
2160treebuilder_done(TreeBuilderObject* self)
2161{
2162 PyObject* res;
2163
2164 /* FIXME: check stack size? */
2165
2166 if (self->root)
2167 res = self->root;
2168 else
2169 res = Py_None;
2170
2171 Py_INCREF(res);
2172 return res;
2173}
2174
2175static PyObject*
2176treebuilder_close(TreeBuilderObject* self, PyObject* args)
2177{
2178 if (!PyArg_ParseTuple(args, ":close"))
2179 return NULL;
2180
2181 return treebuilder_done(self);
2182}
2183
2184static PyObject*
2185treebuilder_start(TreeBuilderObject* self, PyObject* args)
2186{
2187 PyObject* tag;
2188 PyObject* attrib = Py_None;
2189 if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib))
2190 return NULL;
2191
2192 return treebuilder_handle_start(self, tag, attrib);
2193}
2194
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002195static PyMethodDef treebuilder_methods[] = {
2196 {"data", (PyCFunction) treebuilder_data, METH_VARARGS},
2197 {"start", (PyCFunction) treebuilder_start, METH_VARARGS},
2198 {"end", (PyCFunction) treebuilder_end, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002199 {"close", (PyCFunction) treebuilder_close, METH_VARARGS},
2200 {NULL, NULL}
2201};
2202
Neal Norwitz227b5332006-03-22 09:28:35 +00002203static PyTypeObject TreeBuilder_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002204 PyVarObject_HEAD_INIT(NULL, 0)
2205 "TreeBuilder", sizeof(TreeBuilderObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002206 /* methods */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002207 (destructor)treebuilder_dealloc, /* tp_dealloc */
2208 0, /* tp_print */
2209 0, /* tp_getattr */
2210 0, /* tp_setattr */
2211 0, /* tp_reserved */
2212 0, /* tp_repr */
2213 0, /* tp_as_number */
2214 0, /* tp_as_sequence */
2215 0, /* tp_as_mapping */
2216 0, /* tp_hash */
2217 0, /* tp_call */
2218 0, /* tp_str */
2219 0, /* tp_getattro */
2220 0, /* tp_setattro */
2221 0, /* tp_as_buffer */
Eli Bendersky48d358b2012-05-30 17:57:50 +03002222 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
2223 /* tp_flags */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002224 0, /* tp_doc */
Eli Bendersky48d358b2012-05-30 17:57:50 +03002225 (traverseproc)treebuilder_gc_traverse, /* tp_traverse */
2226 (inquiry)treebuilder_gc_clear, /* tp_clear */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002227 0, /* tp_richcompare */
2228 0, /* tp_weaklistoffset */
2229 0, /* tp_iter */
2230 0, /* tp_iternext */
2231 treebuilder_methods, /* tp_methods */
2232 0, /* tp_members */
2233 0, /* tp_getset */
2234 0, /* tp_base */
2235 0, /* tp_dict */
2236 0, /* tp_descr_get */
2237 0, /* tp_descr_set */
2238 0, /* tp_dictoffset */
2239 (initproc)treebuilder_init, /* tp_init */
2240 PyType_GenericAlloc, /* tp_alloc */
2241 treebuilder_new, /* tp_new */
2242 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002243};
2244
2245/* ==================================================================== */
2246/* the expat interface */
2247
2248#if defined(USE_EXPAT)
2249
2250#include "expat.h"
2251
2252#if defined(USE_PYEXPAT_CAPI)
2253#include "pyexpat.h"
2254static struct PyExpat_CAPI* expat_capi;
2255#define EXPAT(func) (expat_capi->func)
2256#else
2257#define EXPAT(func) (XML_##func)
2258#endif
2259
Eli Bendersky52467b12012-06-01 07:13:08 +03002260static XML_Memory_Handling_Suite ExpatMemoryHandler = {
2261 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
2262
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002263typedef struct {
2264 PyObject_HEAD
2265
2266 XML_Parser parser;
2267
2268 PyObject* target;
2269 PyObject* entity;
2270
2271 PyObject* names;
2272
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002273 PyObject* handle_start;
2274 PyObject* handle_data;
2275 PyObject* handle_end;
2276
2277 PyObject* handle_comment;
2278 PyObject* handle_pi;
2279
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002280 PyObject* handle_close;
2281
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002282} XMLParserObject;
2283
Neal Norwitz227b5332006-03-22 09:28:35 +00002284static PyTypeObject XMLParser_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002285
2286/* helpers */
2287
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002288LOCAL(PyObject*)
2289makeuniversal(XMLParserObject* self, const char* string)
2290{
2291 /* convert a UTF-8 tag/attribute name from the expat parser
2292 to a universal name string */
2293
2294 int size = strlen(string);
2295 PyObject* key;
2296 PyObject* value;
2297
2298 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00002299 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002300 if (!key)
2301 return NULL;
2302
2303 value = PyDict_GetItem(self->names, key);
2304
2305 if (value) {
2306 Py_INCREF(value);
2307 } else {
2308 /* new name. convert to universal name, and decode as
2309 necessary */
2310
2311 PyObject* tag;
2312 char* p;
2313 int i;
2314
2315 /* look for namespace separator */
2316 for (i = 0; i < size; i++)
2317 if (string[i] == '}')
2318 break;
2319 if (i != size) {
2320 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002321 tag = PyBytes_FromStringAndSize(NULL, size+1);
2322 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002323 p[0] = '{';
2324 memcpy(p+1, string, size);
2325 size++;
2326 } else {
2327 /* plain name; use key as tag */
2328 Py_INCREF(key);
2329 tag = key;
2330 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002331
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002332 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002333 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00002334 value = PyUnicode_DecodeUTF8(p, size, "strict");
2335 Py_DECREF(tag);
2336 if (!value) {
2337 Py_DECREF(key);
2338 return NULL;
2339 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002340
2341 /* add to names dictionary */
2342 if (PyDict_SetItem(self->names, key, value) < 0) {
2343 Py_DECREF(key);
2344 Py_DECREF(value);
2345 return NULL;
2346 }
2347 }
2348
2349 Py_DECREF(key);
2350 return value;
2351}
2352
Eli Bendersky5b77d812012-03-16 08:20:05 +02002353/* Set the ParseError exception with the given parameters.
2354 * If message is not NULL, it's used as the error string. Otherwise, the
2355 * message string is the default for the given error_code.
2356*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002357static void
Eli Bendersky5b77d812012-03-16 08:20:05 +02002358expat_set_error(enum XML_Error error_code, int line, int column, char *message)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002359{
Eli Bendersky5b77d812012-03-16 08:20:05 +02002360 PyObject *errmsg, *error, *position, *code;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002361
Victor Stinner499dfcf2011-03-21 13:26:24 +01002362 errmsg = PyUnicode_FromFormat("%s: line %d, column %d",
Eli Bendersky5b77d812012-03-16 08:20:05 +02002363 message ? message : EXPAT(ErrorString)(error_code),
2364 line, column);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002365 if (errmsg == NULL)
2366 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002367
Victor Stinner499dfcf2011-03-21 13:26:24 +01002368 error = PyObject_CallFunction(elementtree_parseerror_obj, "O", errmsg);
2369 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002370 if (!error)
2371 return;
2372
Eli Bendersky5b77d812012-03-16 08:20:05 +02002373 /* Add code and position attributes */
2374 code = PyLong_FromLong((long)error_code);
2375 if (!code) {
2376 Py_DECREF(error);
2377 return;
2378 }
2379 if (PyObject_SetAttrString(error, "code", code) == -1) {
2380 Py_DECREF(error);
2381 Py_DECREF(code);
2382 return;
2383 }
2384 Py_DECREF(code);
2385
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002386 position = Py_BuildValue("(ii)", line, column);
2387 if (!position) {
2388 Py_DECREF(error);
2389 return;
2390 }
2391 if (PyObject_SetAttrString(error, "position", position) == -1) {
2392 Py_DECREF(error);
2393 Py_DECREF(position);
2394 return;
2395 }
2396 Py_DECREF(position);
2397
2398 PyErr_SetObject(elementtree_parseerror_obj, error);
2399 Py_DECREF(error);
2400}
2401
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002402/* -------------------------------------------------------------------- */
2403/* handlers */
2404
2405static void
2406expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2407 int data_len)
2408{
2409 PyObject* key;
2410 PyObject* value;
2411 PyObject* res;
2412
2413 if (data_len < 2 || data_in[0] != '&')
2414 return;
2415
Neal Norwitz0269b912007-08-08 06:56:02 +00002416 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002417 if (!key)
2418 return;
2419
2420 value = PyDict_GetItem(self->entity, key);
2421
2422 if (value) {
2423 if (TreeBuilder_CheckExact(self->target))
2424 res = treebuilder_handle_data(
2425 (TreeBuilderObject*) self->target, value
2426 );
2427 else if (self->handle_data)
2428 res = PyObject_CallFunction(self->handle_data, "O", value);
2429 else
2430 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002431 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002432 } else if (!PyErr_Occurred()) {
2433 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00002434 char message[128] = "undefined entity ";
2435 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002436 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02002437 XML_ERROR_UNDEFINED_ENTITY,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002438 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02002439 EXPAT(GetErrorColumnNumber)(self->parser),
2440 message
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002441 );
2442 }
2443
2444 Py_DECREF(key);
2445}
2446
2447static void
2448expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2449 const XML_Char **attrib_in)
2450{
2451 PyObject* res;
2452 PyObject* tag;
2453 PyObject* attrib;
2454 int ok;
2455
2456 /* tag name */
2457 tag = makeuniversal(self, tag_in);
2458 if (!tag)
2459 return; /* parser will look for errors */
2460
2461 /* attributes */
2462 if (attrib_in[0]) {
2463 attrib = PyDict_New();
2464 if (!attrib)
2465 return;
2466 while (attrib_in[0] && attrib_in[1]) {
2467 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00002468 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002469 if (!key || !value) {
2470 Py_XDECREF(value);
2471 Py_XDECREF(key);
2472 Py_DECREF(attrib);
2473 return;
2474 }
2475 ok = PyDict_SetItem(attrib, key, value);
2476 Py_DECREF(value);
2477 Py_DECREF(key);
2478 if (ok < 0) {
2479 Py_DECREF(attrib);
2480 return;
2481 }
2482 attrib_in += 2;
2483 }
2484 } else {
2485 Py_INCREF(Py_None);
2486 attrib = Py_None;
2487 }
2488
Eli Bendersky48d358b2012-05-30 17:57:50 +03002489 /* If we get None, pass an empty dictionary on */
2490 if (attrib == Py_None) {
2491 Py_DECREF(attrib);
2492 attrib = PyDict_New();
2493 if (!attrib)
2494 return;
2495 }
2496
2497 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002498 /* shortcut */
2499 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2500 tag, attrib);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002501 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002502 else if (self->handle_start) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002503 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002504 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002505 res = NULL;
2506
2507 Py_DECREF(tag);
2508 Py_DECREF(attrib);
2509
2510 Py_XDECREF(res);
2511}
2512
2513static void
2514expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2515 int data_len)
2516{
2517 PyObject* data;
2518 PyObject* res;
2519
Neal Norwitz0269b912007-08-08 06:56:02 +00002520 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002521 if (!data)
2522 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002523
2524 if (TreeBuilder_CheckExact(self->target))
2525 /* shortcut */
2526 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
2527 else if (self->handle_data)
2528 res = PyObject_CallFunction(self->handle_data, "O", data);
2529 else
2530 res = NULL;
2531
2532 Py_DECREF(data);
2533
2534 Py_XDECREF(res);
2535}
2536
2537static void
2538expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
2539{
2540 PyObject* tag;
2541 PyObject* res = NULL;
2542
2543 if (TreeBuilder_CheckExact(self->target))
2544 /* shortcut */
2545 /* the standard tree builder doesn't look at the end tag */
2546 res = treebuilder_handle_end(
2547 (TreeBuilderObject*) self->target, Py_None
2548 );
2549 else if (self->handle_end) {
2550 tag = makeuniversal(self, tag_in);
2551 if (tag) {
2552 res = PyObject_CallFunction(self->handle_end, "O", tag);
2553 Py_DECREF(tag);
2554 }
2555 }
2556
2557 Py_XDECREF(res);
2558}
2559
2560static void
2561expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
2562 const XML_Char *uri)
2563{
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002564 PyObject* sprefix = NULL;
2565 PyObject* suri = NULL;
2566
2567 suri = PyUnicode_DecodeUTF8(uri, strlen(uri), "strict");
2568 if (!suri)
2569 return;
2570
2571 if (prefix)
2572 sprefix = PyUnicode_DecodeUTF8(prefix, strlen(prefix), "strict");
2573 else
2574 sprefix = PyUnicode_FromString("");
2575 if (!sprefix) {
2576 Py_DECREF(suri);
2577 return;
2578 }
2579
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002580 treebuilder_handle_namespace(
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002581 (TreeBuilderObject*) self->target, 1, sprefix, suri
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002582 );
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002583
2584 Py_DECREF(sprefix);
2585 Py_DECREF(suri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002586}
2587
2588static void
2589expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
2590{
2591 treebuilder_handle_namespace(
2592 (TreeBuilderObject*) self->target, 0, NULL, NULL
2593 );
2594}
2595
2596static void
2597expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
2598{
2599 PyObject* comment;
2600 PyObject* res;
2601
2602 if (self->handle_comment) {
Neal Norwitz0269b912007-08-08 06:56:02 +00002603 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002604 if (comment) {
2605 res = PyObject_CallFunction(self->handle_comment, "O", comment);
2606 Py_XDECREF(res);
2607 Py_DECREF(comment);
2608 }
2609 }
2610}
2611
2612static void
2613expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
2614 const XML_Char* data_in)
2615{
2616 PyObject* target;
2617 PyObject* data;
2618 PyObject* res;
2619
2620 if (self->handle_pi) {
Neal Norwitz0269b912007-08-08 06:56:02 +00002621 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
2622 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002623 if (target && data) {
2624 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
2625 Py_XDECREF(res);
2626 Py_DECREF(data);
2627 Py_DECREF(target);
2628 } else {
2629 Py_XDECREF(data);
2630 Py_XDECREF(target);
2631 }
2632 }
2633}
2634
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002635static int
2636expat_unknown_encoding_handler(XMLParserObject *self, const XML_Char *name,
2637 XML_Encoding *info)
2638{
2639 PyObject* u;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002640 unsigned char s[256];
2641 int i;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002642 void *data;
2643 unsigned int kind;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002644
2645 memset(info, 0, sizeof(XML_Encoding));
2646
2647 for (i = 0; i < 256; i++)
2648 s[i] = i;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002649
Fredrik Lundhc3389992005-12-25 11:40:19 +00002650 u = PyUnicode_Decode((char*) s, 256, name, "replace");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002651 if (!u)
2652 return XML_STATUS_ERROR;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002653 if (PyUnicode_READY(u))
2654 return XML_STATUS_ERROR;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002655
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002656 if (PyUnicode_GET_LENGTH(u) != 256) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002657 Py_DECREF(u);
2658 return XML_STATUS_ERROR;
2659 }
2660
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002661 kind = PyUnicode_KIND(u);
2662 data = PyUnicode_DATA(u);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002663 for (i = 0; i < 256; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002664 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
2665 if (ch != Py_UNICODE_REPLACEMENT_CHARACTER)
2666 info->map[i] = ch;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002667 else
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002668 info->map[i] = -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002669 }
2670
2671 Py_DECREF(u);
2672
2673 return XML_STATUS_OK;
2674}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002675
2676/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002677
Eli Bendersky52467b12012-06-01 07:13:08 +03002678static PyObject *
2679xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002680{
Eli Bendersky52467b12012-06-01 07:13:08 +03002681 XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
2682 if (self) {
2683 self->parser = NULL;
2684 self->target = self->entity = self->names = NULL;
2685 self->handle_start = self->handle_data = self->handle_end = NULL;
2686 self->handle_comment = self->handle_pi = self->handle_close = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002687 }
Eli Bendersky52467b12012-06-01 07:13:08 +03002688 return (PyObject *)self;
2689}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002690
Eli Bendersky52467b12012-06-01 07:13:08 +03002691static int
2692xmlparser_init(PyObject *self, PyObject *args, PyObject *kwds)
2693{
2694 XMLParserObject *self_xp = (XMLParserObject *)self;
2695 PyObject *target = NULL, *html = NULL;
2696 char *encoding = NULL;
2697 static char *kwlist[] = {"html", "target", "encoding"};
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002698
Eli Bendersky52467b12012-06-01 07:13:08 +03002699 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|OOz:XMLParser", kwlist,
2700 &html, &target, &encoding)) {
2701 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002702 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002703
Eli Bendersky52467b12012-06-01 07:13:08 +03002704 self_xp->entity = PyDict_New();
2705 if (!self_xp->entity)
2706 return -1;
2707
2708 self_xp->names = PyDict_New();
2709 if (!self_xp->names) {
2710 Py_XDECREF(self_xp->entity);
2711 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002712 }
2713
Eli Bendersky52467b12012-06-01 07:13:08 +03002714 self_xp->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
2715 if (!self_xp->parser) {
2716 Py_XDECREF(self_xp->entity);
2717 Py_XDECREF(self_xp->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002718 PyErr_NoMemory();
Eli Bendersky52467b12012-06-01 07:13:08 +03002719 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002720 }
2721
Eli Bendersky52467b12012-06-01 07:13:08 +03002722 if (target) {
2723 Py_INCREF(target);
2724 } else {
Eli Bendersky58d548d2012-05-29 15:45:16 +03002725 target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002726 if (!target) {
Eli Bendersky52467b12012-06-01 07:13:08 +03002727 Py_XDECREF(self_xp->entity);
2728 Py_XDECREF(self_xp->names);
2729 EXPAT(ParserFree)(self_xp->parser);
2730 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002731 }
Eli Bendersky52467b12012-06-01 07:13:08 +03002732 }
2733 self_xp->target = target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002734
Eli Bendersky52467b12012-06-01 07:13:08 +03002735 self_xp->handle_start = PyObject_GetAttrString(target, "start");
2736 self_xp->handle_data = PyObject_GetAttrString(target, "data");
2737 self_xp->handle_end = PyObject_GetAttrString(target, "end");
2738 self_xp->handle_comment = PyObject_GetAttrString(target, "comment");
2739 self_xp->handle_pi = PyObject_GetAttrString(target, "pi");
2740 self_xp->handle_close = PyObject_GetAttrString(target, "close");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002741
2742 PyErr_Clear();
Eli Bendersky52467b12012-06-01 07:13:08 +03002743
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002744 /* configure parser */
Eli Bendersky52467b12012-06-01 07:13:08 +03002745 EXPAT(SetUserData)(self_xp->parser, self_xp);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002746 EXPAT(SetElementHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03002747 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002748 (XML_StartElementHandler) expat_start_handler,
2749 (XML_EndElementHandler) expat_end_handler
2750 );
2751 EXPAT(SetDefaultHandlerExpand)(
Eli Bendersky52467b12012-06-01 07:13:08 +03002752 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002753 (XML_DefaultHandler) expat_default_handler
2754 );
2755 EXPAT(SetCharacterDataHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03002756 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002757 (XML_CharacterDataHandler) expat_data_handler
2758 );
Eli Bendersky52467b12012-06-01 07:13:08 +03002759 if (self_xp->handle_comment)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002760 EXPAT(SetCommentHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03002761 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002762 (XML_CommentHandler) expat_comment_handler
2763 );
Eli Bendersky52467b12012-06-01 07:13:08 +03002764 if (self_xp->handle_pi)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002765 EXPAT(SetProcessingInstructionHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03002766 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002767 (XML_ProcessingInstructionHandler) expat_pi_handler
2768 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002769 EXPAT(SetUnknownEncodingHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03002770 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002771 (XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL
2772 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002773
Eli Bendersky52467b12012-06-01 07:13:08 +03002774 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002775}
2776
Eli Bendersky52467b12012-06-01 07:13:08 +03002777static int
2778xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
2779{
2780 Py_VISIT(self->handle_close);
2781 Py_VISIT(self->handle_pi);
2782 Py_VISIT(self->handle_comment);
2783 Py_VISIT(self->handle_end);
2784 Py_VISIT(self->handle_data);
2785 Py_VISIT(self->handle_start);
2786
2787 Py_VISIT(self->target);
2788 Py_VISIT(self->entity);
2789 Py_VISIT(self->names);
2790
2791 return 0;
2792}
2793
2794static int
2795xmlparser_gc_clear(XMLParserObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002796{
2797 EXPAT(ParserFree)(self->parser);
2798
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002799 Py_XDECREF(self->handle_close);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002800 Py_XDECREF(self->handle_pi);
2801 Py_XDECREF(self->handle_comment);
2802 Py_XDECREF(self->handle_end);
2803 Py_XDECREF(self->handle_data);
2804 Py_XDECREF(self->handle_start);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002805
Eli Bendersky52467b12012-06-01 07:13:08 +03002806 Py_XDECREF(self->target);
2807 Py_XDECREF(self->entity);
2808 Py_XDECREF(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002809
Eli Bendersky52467b12012-06-01 07:13:08 +03002810 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002811}
2812
Eli Bendersky52467b12012-06-01 07:13:08 +03002813static void
2814xmlparser_dealloc(XMLParserObject* self)
2815{
2816 PyObject_GC_UnTrack(self);
2817 xmlparser_gc_clear(self);
2818 Py_TYPE(self)->tp_free((PyObject *)self);
2819}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002820
2821LOCAL(PyObject*)
2822expat_parse(XMLParserObject* self, char* data, int data_len, int final)
2823{
2824 int ok;
2825
2826 ok = EXPAT(Parse)(self->parser, data, data_len, final);
2827
2828 if (PyErr_Occurred())
2829 return NULL;
2830
2831 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002832 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02002833 EXPAT(GetErrorCode)(self->parser),
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002834 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02002835 EXPAT(GetErrorColumnNumber)(self->parser),
2836 NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002837 );
2838 return NULL;
2839 }
2840
2841 Py_RETURN_NONE;
2842}
2843
2844static PyObject*
2845xmlparser_close(XMLParserObject* self, PyObject* args)
2846{
2847 /* end feeding data to parser */
2848
2849 PyObject* res;
2850 if (!PyArg_ParseTuple(args, ":close"))
2851 return NULL;
2852
2853 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002854 if (!res)
2855 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002856
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002857 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002858 Py_DECREF(res);
2859 return treebuilder_done((TreeBuilderObject*) self->target);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002860 } if (self->handle_close) {
2861 Py_DECREF(res);
2862 return PyObject_CallFunction(self->handle_close, "");
2863 } else
2864 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002865}
2866
2867static PyObject*
2868xmlparser_feed(XMLParserObject* self, PyObject* args)
2869{
2870 /* feed data to parser */
2871
2872 char* data;
2873 int data_len;
2874 if (!PyArg_ParseTuple(args, "s#:feed", &data, &data_len))
2875 return NULL;
2876
2877 return expat_parse(self, data, data_len, 0);
2878}
2879
2880static PyObject*
2881xmlparser_parse(XMLParserObject* self, PyObject* args)
2882{
2883 /* (internal) parse until end of input stream */
2884
2885 PyObject* reader;
2886 PyObject* buffer;
Eli Benderskyf996e772012-03-16 05:53:30 +02002887 PyObject* temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002888 PyObject* res;
2889
2890 PyObject* fileobj;
2891 if (!PyArg_ParseTuple(args, "O:_parse", &fileobj))
2892 return NULL;
2893
2894 reader = PyObject_GetAttrString(fileobj, "read");
2895 if (!reader)
2896 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002897
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002898 /* read from open file object */
2899 for (;;) {
2900
2901 buffer = PyObject_CallFunction(reader, "i", 64*1024);
2902
2903 if (!buffer) {
2904 /* read failed (e.g. due to KeyboardInterrupt) */
2905 Py_DECREF(reader);
2906 return NULL;
2907 }
2908
Eli Benderskyf996e772012-03-16 05:53:30 +02002909 if (PyUnicode_CheckExact(buffer)) {
2910 /* A unicode object is encoded into bytes using UTF-8 */
2911 if (PyUnicode_GET_SIZE(buffer) == 0) {
2912 Py_DECREF(buffer);
2913 break;
2914 }
2915 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
2916 if (!temp) {
2917 /* Propagate exception from PyUnicode_AsEncodedString */
2918 Py_DECREF(buffer);
2919 Py_DECREF(reader);
2920 return NULL;
2921 }
2922
2923 /* Here we no longer need the original buffer since it contains
2924 * unicode. Make it point to the encoded bytes object.
2925 */
2926 Py_DECREF(buffer);
2927 buffer = temp;
2928 }
2929 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002930 Py_DECREF(buffer);
2931 break;
2932 }
2933
2934 res = expat_parse(
Christian Heimes72b710a2008-05-26 13:28:38 +00002935 self, PyBytes_AS_STRING(buffer), PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002936 );
2937
2938 Py_DECREF(buffer);
2939
2940 if (!res) {
2941 Py_DECREF(reader);
2942 return NULL;
2943 }
2944 Py_DECREF(res);
2945
2946 }
2947
2948 Py_DECREF(reader);
2949
2950 res = expat_parse(self, "", 0, 1);
2951
2952 if (res && TreeBuilder_CheckExact(self->target)) {
2953 Py_DECREF(res);
2954 return treebuilder_done((TreeBuilderObject*) self->target);
2955 }
2956
2957 return res;
2958}
2959
2960static PyObject*
2961xmlparser_setevents(XMLParserObject* self, PyObject* args)
2962{
2963 /* activate element event reporting */
2964
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002965 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002966 TreeBuilderObject* target;
2967
2968 PyObject* events; /* event collector */
2969 PyObject* event_set = Py_None;
2970 if (!PyArg_ParseTuple(args, "O!|O:_setevents", &PyList_Type, &events,
2971 &event_set))
2972 return NULL;
2973
2974 if (!TreeBuilder_CheckExact(self->target)) {
2975 PyErr_SetString(
2976 PyExc_TypeError,
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01002977 "event handling only supported for ElementTree.TreeBuilder "
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002978 "targets"
2979 );
2980 return NULL;
2981 }
2982
2983 target = (TreeBuilderObject*) self->target;
2984
2985 Py_INCREF(events);
2986 Py_XDECREF(target->events);
2987 target->events = events;
2988
2989 /* clear out existing events */
2990 Py_XDECREF(target->start_event_obj); target->start_event_obj = NULL;
2991 Py_XDECREF(target->end_event_obj); target->end_event_obj = NULL;
2992 Py_XDECREF(target->start_ns_event_obj); target->start_ns_event_obj = NULL;
2993 Py_XDECREF(target->end_ns_event_obj); target->end_ns_event_obj = NULL;
2994
2995 if (event_set == Py_None) {
2996 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002997 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002998 Py_RETURN_NONE;
2999 }
3000
3001 if (!PyTuple_Check(event_set)) /* FIXME: handle arbitrary sequences */
3002 goto error;
3003
3004 for (i = 0; i < PyTuple_GET_SIZE(event_set); i++) {
3005 PyObject* item = PyTuple_GET_ITEM(event_set, i);
3006 char* event;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003007 if (PyUnicode_Check(item)) {
3008 event = _PyUnicode_AsString(item);
Victor Stinner0477bf32010-03-22 12:11:44 +00003009 if (event == NULL)
3010 goto error;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003011 } else if (PyBytes_Check(item))
3012 event = PyBytes_AS_STRING(item);
3013 else {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003014 goto error;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003015 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003016 if (strcmp(event, "start") == 0) {
3017 Py_INCREF(item);
3018 target->start_event_obj = item;
3019 } else if (strcmp(event, "end") == 0) {
3020 Py_INCREF(item);
3021 Py_XDECREF(target->end_event_obj);
3022 target->end_event_obj = item;
3023 } else if (strcmp(event, "start-ns") == 0) {
3024 Py_INCREF(item);
3025 Py_XDECREF(target->start_ns_event_obj);
3026 target->start_ns_event_obj = item;
3027 EXPAT(SetNamespaceDeclHandler)(
3028 self->parser,
3029 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3030 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3031 );
3032 } else if (strcmp(event, "end-ns") == 0) {
3033 Py_INCREF(item);
3034 Py_XDECREF(target->end_ns_event_obj);
3035 target->end_ns_event_obj = item;
3036 EXPAT(SetNamespaceDeclHandler)(
3037 self->parser,
3038 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3039 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3040 );
3041 } else {
3042 PyErr_Format(
3043 PyExc_ValueError,
3044 "unknown event '%s'", event
3045 );
3046 return NULL;
3047 }
3048 }
3049
3050 Py_RETURN_NONE;
3051
3052 error:
3053 PyErr_SetString(
3054 PyExc_TypeError,
3055 "invalid event tuple"
3056 );
3057 return NULL;
3058}
3059
3060static PyMethodDef xmlparser_methods[] = {
3061 {"feed", (PyCFunction) xmlparser_feed, METH_VARARGS},
3062 {"close", (PyCFunction) xmlparser_close, METH_VARARGS},
3063 {"_parse", (PyCFunction) xmlparser_parse, METH_VARARGS},
3064 {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
3065 {NULL, NULL}
3066};
3067
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003068static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003069xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003070{
Alexander Belopolskye239d232010-12-08 23:31:48 +00003071 if (PyUnicode_Check(nameobj)) {
3072 PyObject* res;
3073 if (PyUnicode_CompareWithASCIIString(nameobj, "entity") == 0)
3074 res = self->entity;
3075 else if (PyUnicode_CompareWithASCIIString(nameobj, "target") == 0)
3076 res = self->target;
3077 else if (PyUnicode_CompareWithASCIIString(nameobj, "version") == 0) {
3078 return PyUnicode_FromFormat(
3079 "Expat %d.%d.%d", XML_MAJOR_VERSION,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003080 XML_MINOR_VERSION, XML_MICRO_VERSION);
Alexander Belopolskye239d232010-12-08 23:31:48 +00003081 }
3082 else
3083 goto generic;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003084
Alexander Belopolskye239d232010-12-08 23:31:48 +00003085 Py_INCREF(res);
3086 return res;
3087 }
3088 generic:
3089 return PyObject_GenericGetAttr((PyObject*) self, nameobj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003090}
3091
Neal Norwitz227b5332006-03-22 09:28:35 +00003092static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003093 PyVarObject_HEAD_INIT(NULL, 0)
3094 "XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003095 /* methods */
Eli Bendersky52467b12012-06-01 07:13:08 +03003096 (destructor)xmlparser_dealloc, /* tp_dealloc */
3097 0, /* tp_print */
3098 0, /* tp_getattr */
3099 0, /* tp_setattr */
3100 0, /* tp_reserved */
3101 0, /* tp_repr */
3102 0, /* tp_as_number */
3103 0, /* tp_as_sequence */
3104 0, /* tp_as_mapping */
3105 0, /* tp_hash */
3106 0, /* tp_call */
3107 0, /* tp_str */
3108 (getattrofunc)xmlparser_getattro, /* tp_getattro */
3109 0, /* tp_setattro */
3110 0, /* tp_as_buffer */
3111 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3112 /* tp_flags */
3113 0, /* tp_doc */
3114 (traverseproc)xmlparser_gc_traverse, /* tp_traverse */
3115 (inquiry)xmlparser_gc_clear, /* tp_clear */
3116 0, /* tp_richcompare */
3117 0, /* tp_weaklistoffset */
3118 0, /* tp_iter */
3119 0, /* tp_iternext */
3120 xmlparser_methods, /* tp_methods */
3121 0, /* tp_members */
3122 0, /* tp_getset */
3123 0, /* tp_base */
3124 0, /* tp_dict */
3125 0, /* tp_descr_get */
3126 0, /* tp_descr_set */
3127 0, /* tp_dictoffset */
3128 (initproc)xmlparser_init, /* tp_init */
3129 PyType_GenericAlloc, /* tp_alloc */
3130 xmlparser_new, /* tp_new */
3131 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003132};
3133
3134#endif
3135
3136/* ==================================================================== */
3137/* python module interface */
3138
3139static PyMethodDef _functions[] = {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003140 {"SubElement", (PyCFunction) subelement, METH_VARARGS|METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003141 {NULL, NULL}
3142};
3143
Martin v. Löwis1a214512008-06-11 05:26:20 +00003144
3145static struct PyModuleDef _elementtreemodule = {
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003146 PyModuleDef_HEAD_INIT,
3147 "_elementtree",
3148 NULL,
3149 -1,
3150 _functions,
3151 NULL,
3152 NULL,
3153 NULL,
3154 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00003155};
3156
Neal Norwitzf6657e62006-12-28 04:47:50 +00003157PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00003158PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003159{
Eli Bendersky828efde2012-04-05 05:40:58 +03003160 PyObject *m, *g, *temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003161 char* bootstrap;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003162
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003163 /* Initialize object types */
3164 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003165 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003166 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003167 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003168#if defined(USE_EXPAT)
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003169 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003170 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003171#endif
3172
Martin v. Löwis1a214512008-06-11 05:26:20 +00003173 m = PyModule_Create(&_elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003174 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00003175 return NULL;
3176
3177 /* The code below requires that the module gets already added
3178 to sys.modules. */
3179 PyDict_SetItemString(PyImport_GetModuleDict(),
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003180 _elementtreemodule.m_name,
3181 m);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003182
3183 /* python glue code */
3184
3185 g = PyDict_New();
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003186 if (!g)
Martin v. Löwis1a214512008-06-11 05:26:20 +00003187 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003188
3189 PyDict_SetItemString(g, "__builtins__", PyEval_GetBuiltins());
3190
3191 bootstrap = (
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003192 "def iter(node, tag=None):\n" /* helper */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003193 " if tag == '*':\n"
3194 " tag = None\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003195 " if tag is None or node.tag == tag:\n"
3196 " yield node\n"
3197 " for node in node:\n"
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003198 " for node in iter(node, tag):\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003199 " yield node\n"
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003200
3201 "def itertext(node):\n" /* helper */
3202 " if node.text:\n"
3203 " yield node.text\n"
3204 " for e in node:\n"
3205 " for s in e.itertext():\n"
3206 " yield s\n"
3207 " if e.tail:\n"
3208 " yield e.tail\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003209
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003210 );
3211
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003212 if (!PyRun_String(bootstrap, Py_file_input, g, NULL))
3213 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003214
Eli Bendersky828efde2012-04-05 05:40:58 +03003215 if (!(temp = PyImport_ImportModule("copy")))
3216 return NULL;
3217 elementtree_deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
3218 Py_XDECREF(temp);
3219
3220 if (!(elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
3221 return NULL;
3222
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003223 elementtree_iter_obj = PyDict_GetItemString(g, "iter");
3224 elementtree_itertext_obj = PyDict_GetItemString(g, "itertext");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003225
3226#if defined(USE_PYEXPAT_CAPI)
3227 /* link against pyexpat, if possible */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003228 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
3229 if (expat_capi) {
3230 /* check that it's usable */
3231 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
3232 expat_capi->size < sizeof(struct PyExpat_CAPI) ||
3233 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
3234 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
Eli Bendersky52467b12012-06-01 07:13:08 +03003235 expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003236 expat_capi = NULL;
Eli Bendersky52467b12012-06-01 07:13:08 +03003237 }
3238 }
3239 if (!expat_capi) {
3240 PyErr_SetString(
3241 PyExc_RuntimeError, "cannot load dispatch table from pyexpat"
3242 );
3243 return NULL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003244 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003245#endif
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003246
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003247 elementtree_parseerror_obj = PyErr_NewException(
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003248 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003249 );
3250 Py_INCREF(elementtree_parseerror_obj);
3251 PyModule_AddObject(m, "ParseError", elementtree_parseerror_obj);
3252
Eli Bendersky092af1f2012-03-04 07:14:03 +02003253 Py_INCREF((PyObject *)&Element_Type);
3254 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
3255
Eli Bendersky58d548d2012-05-29 15:45:16 +03003256 Py_INCREF((PyObject *)&TreeBuilder_Type);
3257 PyModule_AddObject(m, "TreeBuilder", (PyObject *)&TreeBuilder_Type);
3258
Eli Bendersky52467b12012-06-01 07:13:08 +03003259#if defined(USE_EXPAT)
3260 Py_INCREF((PyObject *)&XMLParser_Type);
3261 PyModule_AddObject(m, "XMLParser", (PyObject *)&XMLParser_Type);
3262#endif
3263
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003264 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003265}