blob: 377e88e72138c94a75db3c62c3258c69cd8e778a [file] [log] [blame]
Eli Benderskybf05df22013-04-20 05:44:01 -07001/*--------------------------------------------------------------------
2 * Licensed to PSF under a Contributor Agreement.
3 * See http://www.python.org/psf/license for licensing details.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
Eli Benderskybf05df22013-04-20 05:44:01 -07005 * _elementtree - C accelerator for xml.etree.ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00006 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
7 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00008 *
9 * info@pythonware.com
10 * http://www.pythonware.com
Eli Benderskybf05df22013-04-20 05:44:01 -070011 *--------------------------------------------------------------------
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000012 */
13
14#include "Python.h"
Eli Benderskyebf37a22012-04-03 22:02:37 +030015#include "structmember.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000016
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000017/* -------------------------------------------------------------------- */
18/* configuration */
19
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000020/* An element can hold this many children without extra memory
21 allocations. */
22#define STATIC_CHILDREN 4
23
24/* For best performance, chose a value so that 80-90% of all nodes
25 have no more than the given number of children. Set this to zero
26 to minimize the size of the element structure itself (this only
27 helps if you have lots of leaf nodes with attributes). */
28
29/* Also note that pymalloc always allocates blocks in multiples of
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010030 eight bytes. For the current C version of ElementTree, this means
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000031 that the number of children should be an even number, at least on
32 32-bit platforms. */
33
34/* -------------------------------------------------------------------- */
35
36#if 0
37static int memory = 0;
38#define ALLOC(size, comment)\
39do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
40#define RELEASE(size, comment)\
41do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
42#else
43#define ALLOC(size, comment)
44#define RELEASE(size, comment)
45#endif
46
47/* compiler tweaks */
48#if defined(_MSC_VER)
49#define LOCAL(type) static __inline type __fastcall
50#else
51#define LOCAL(type) static type
52#endif
53
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000054/* macros used to store 'join' flags in string object pointers. note
55 that all use of text and tail as object pointers must be wrapped in
56 JOIN_OBJ. see comments in the ElementObject definition for more
57 info. */
58#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
59#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
Antoine Pitrouca8aa4a2012-09-20 20:56:47 +020060#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~(Py_uintptr_t)1))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000061
62/* glue functions (see the init function for details) */
Florent Xiclunaf15351d2010-03-13 23:24:31 +000063static PyObject* elementtree_parseerror_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000064static PyObject* elementtree_deepcopy_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000065static PyObject* elementpath_obj;
66
67/* helpers */
68
69LOCAL(PyObject*)
70deepcopy(PyObject* object, PyObject* memo)
71{
72 /* do a deep copy of the given object */
73
74 PyObject* args;
75 PyObject* result;
76
77 if (!elementtree_deepcopy_obj) {
78 PyErr_SetString(
79 PyExc_RuntimeError,
80 "deepcopy helper not found"
81 );
82 return NULL;
83 }
84
Antoine Pitrouc1948842012-10-01 23:40:37 +020085 args = PyTuple_Pack(2, object, memo);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +000086 if (!args)
87 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000088 result = PyObject_CallObject(elementtree_deepcopy_obj, args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000089 Py_DECREF(args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000090 return result;
91}
92
93LOCAL(PyObject*)
94list_join(PyObject* list)
95{
96 /* join list elements (destroying the list in the process) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000097 PyObject* joiner;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000098 PyObject* result;
99
Antoine Pitrouc1948842012-10-01 23:40:37 +0200100 joiner = PyUnicode_FromStringAndSize("", 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000101 if (!joiner)
102 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200103 result = PyUnicode_Join(joiner, list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000104 Py_DECREF(joiner);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200105 if (result)
106 Py_DECREF(list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000107 return result;
108}
109
Eli Bendersky48d358b2012-05-30 17:57:50 +0300110/* Is the given object an empty dictionary?
111*/
112static int
113is_empty_dict(PyObject *obj)
114{
115 return PyDict_CheckExact(obj) && PyDict_Size(obj) == 0;
116}
117
118
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000119/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200120/* the Element type */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000121
122typedef struct {
123
124 /* attributes (a dictionary object), or None if no attributes */
125 PyObject* attrib;
126
127 /* child elements */
128 int length; /* actual number of items */
129 int allocated; /* allocated items */
130
131 /* this either points to _children or to a malloced buffer */
132 PyObject* *children;
133
134 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100135
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000136} ElementObjectExtra;
137
138typedef struct {
139 PyObject_HEAD
140
141 /* element tag (a string). */
142 PyObject* tag;
143
144 /* text before first child. note that this is a tagged pointer;
145 use JOIN_OBJ to get the object pointer. the join flag is used
146 to distinguish lists created by the tree builder from lists
147 assigned to the attribute by application code; the former
148 should be joined before being returned to the user, the latter
149 should be left intact. */
150 PyObject* text;
151
152 /* text after this element, in parent. note that this is a tagged
153 pointer; use JOIN_OBJ to get the object pointer. */
154 PyObject* tail;
155
156 ElementObjectExtra* extra;
157
Eli Benderskyebf37a22012-04-03 22:02:37 +0300158 PyObject *weakreflist; /* For tp_weaklistoffset */
159
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000160} ElementObject;
161
Neal Norwitz227b5332006-03-22 09:28:35 +0000162static PyTypeObject Element_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000163
Christian Heimes90aa7642007-12-19 02:45:37 +0000164#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000165
166/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200167/* Element constructors and destructor */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000168
169LOCAL(int)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200170create_extra(ElementObject* self, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000171{
172 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
173 if (!self->extra)
174 return -1;
175
176 if (!attrib)
177 attrib = Py_None;
178
179 Py_INCREF(attrib);
180 self->extra->attrib = attrib;
181
182 self->extra->length = 0;
183 self->extra->allocated = STATIC_CHILDREN;
184 self->extra->children = self->extra->_children;
185
186 return 0;
187}
188
189LOCAL(void)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200190dealloc_extra(ElementObject* self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000191{
Eli Bendersky08b85292012-04-04 15:55:07 +0300192 ElementObjectExtra *myextra;
193 int i;
194
Eli Benderskyebf37a22012-04-03 22:02:37 +0300195 if (!self->extra)
196 return;
197
198 /* Avoid DECREFs calling into this code again (cycles, etc.)
199 */
Eli Bendersky08b85292012-04-04 15:55:07 +0300200 myextra = self->extra;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300201 self->extra = NULL;
202
203 Py_DECREF(myextra->attrib);
204
Eli Benderskyebf37a22012-04-03 22:02:37 +0300205 for (i = 0; i < myextra->length; i++)
206 Py_DECREF(myextra->children[i]);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000207
Eli Benderskyebf37a22012-04-03 22:02:37 +0300208 if (myextra->children != myextra->_children)
209 PyObject_Free(myextra->children);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000210
Eli Benderskyebf37a22012-04-03 22:02:37 +0300211 PyObject_Free(myextra);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000212}
213
Eli Bendersky092af1f2012-03-04 07:14:03 +0200214/* Convenience internal function to create new Element objects with the given
215 * tag and attributes.
216*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000217LOCAL(PyObject*)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200218create_new_element(PyObject* tag, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000219{
220 ElementObject* self;
221
Eli Bendersky0192ba32012-03-30 16:38:33 +0300222 self = PyObject_GC_New(ElementObject, &Element_Type);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000223 if (self == NULL)
224 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000225 self->extra = NULL;
226
Eli Bendersky48d358b2012-05-30 17:57:50 +0300227 if (attrib != Py_None && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200228 if (create_extra(self, attrib) < 0) {
Thomas Wouters477c8d52006-05-27 19:21:47 +0000229 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000230 return NULL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000231 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000232 }
233
234 Py_INCREF(tag);
235 self->tag = tag;
236
237 Py_INCREF(Py_None);
238 self->text = Py_None;
239
240 Py_INCREF(Py_None);
241 self->tail = Py_None;
242
Eli Benderskyebf37a22012-04-03 22:02:37 +0300243 self->weakreflist = NULL;
244
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000245 ALLOC(sizeof(ElementObject), "create element");
Eli Bendersky0192ba32012-03-30 16:38:33 +0300246 PyObject_GC_Track(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000247 return (PyObject*) self;
248}
249
Eli Bendersky092af1f2012-03-04 07:14:03 +0200250static PyObject *
251element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
252{
253 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
254 if (e != NULL) {
255 Py_INCREF(Py_None);
256 e->tag = Py_None;
257
258 Py_INCREF(Py_None);
259 e->text = Py_None;
260
261 Py_INCREF(Py_None);
262 e->tail = Py_None;
263
264 e->extra = NULL;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300265 e->weakreflist = NULL;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200266 }
267 return (PyObject *)e;
268}
269
Eli Bendersky737b1732012-05-29 06:02:56 +0300270/* Helper function for extracting the attrib dictionary from a keywords dict.
271 * This is required by some constructors/functions in this module that can
Eli Bendersky45839902013-01-13 05:14:47 -0800272 * either accept attrib as a keyword argument or all attributes splashed
Eli Bendersky737b1732012-05-29 06:02:56 +0300273 * directly into *kwds.
Eli Benderskyd4cb4b72013-04-22 05:25:25 -0700274 *
275 * Return a dictionary with the content of kwds merged into the content of
276 * attrib. If there is no attrib keyword, return a copy of kwds.
Eli Bendersky737b1732012-05-29 06:02:56 +0300277 */
278static PyObject*
279get_attrib_from_keywords(PyObject *kwds)
280{
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700281 PyObject *attrib_str = PyUnicode_FromString("attrib");
282 PyObject *attrib = PyDict_GetItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300283
284 if (attrib) {
285 /* If attrib was found in kwds, copy its value and remove it from
286 * kwds
287 */
288 if (!PyDict_Check(attrib)) {
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700289 Py_DECREF(attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300290 PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
291 Py_TYPE(attrib)->tp_name);
292 return NULL;
293 }
294 attrib = PyDict_Copy(attrib);
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700295 PyDict_DelItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300296 } else {
297 attrib = PyDict_New();
298 }
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700299
300 Py_DECREF(attrib_str);
301
302 /* attrib can be NULL if PyDict_New failed */
303 if (attrib)
304 PyDict_Update(attrib, kwds);
Eli Bendersky737b1732012-05-29 06:02:56 +0300305 return attrib;
306}
307
Eli Bendersky092af1f2012-03-04 07:14:03 +0200308static int
309element_init(PyObject *self, PyObject *args, PyObject *kwds)
310{
311 PyObject *tag;
312 PyObject *tmp;
313 PyObject *attrib = NULL;
314 ElementObject *self_elem;
315
316 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
317 return -1;
318
Eli Bendersky737b1732012-05-29 06:02:56 +0300319 if (attrib) {
320 /* attrib passed as positional arg */
321 attrib = PyDict_Copy(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200322 if (!attrib)
323 return -1;
Eli Bendersky737b1732012-05-29 06:02:56 +0300324 if (kwds) {
325 if (PyDict_Update(attrib, kwds) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200326 Py_DECREF(attrib);
Eli Bendersky737b1732012-05-29 06:02:56 +0300327 return -1;
328 }
329 }
330 } else if (kwds) {
331 /* have keywords args */
332 attrib = get_attrib_from_keywords(kwds);
333 if (!attrib)
334 return -1;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200335 }
336
337 self_elem = (ElementObject *)self;
338
Antoine Pitrouc1948842012-10-01 23:40:37 +0200339 if (attrib != NULL && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200340 if (create_extra(self_elem, attrib) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200341 Py_DECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200342 return -1;
343 }
344 }
345
Eli Bendersky48d358b2012-05-30 17:57:50 +0300346 /* We own a reference to attrib here and it's no longer needed. */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200347 Py_XDECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200348
349 /* Replace the objects already pointed to by tag, text and tail. */
350 tmp = self_elem->tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200351 Py_INCREF(tag);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200352 self_elem->tag = tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200353 Py_DECREF(tmp);
354
355 tmp = self_elem->text;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200356 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200357 self_elem->text = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200358 Py_DECREF(JOIN_OBJ(tmp));
359
360 tmp = self_elem->tail;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200361 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200362 self_elem->tail = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200363 Py_DECREF(JOIN_OBJ(tmp));
364
365 return 0;
366}
367
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000368LOCAL(int)
369element_resize(ElementObject* self, int extra)
370{
371 int size;
372 PyObject* *children;
373
374 /* make sure self->children can hold the given number of extra
375 elements. set an exception and return -1 if allocation failed */
376
377 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200378 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000379
380 size = self->extra->length + extra;
381
382 if (size > self->extra->allocated) {
383 /* use Python 2.4's list growth strategy */
384 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000385 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100386 * which needs at least 4 bytes.
387 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000388 * be safe.
389 */
390 size = size ? size : 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000391 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000392 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100393 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000394 * false alarm always assume at least one child to be safe.
395 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000396 children = PyObject_Realloc(self->extra->children,
397 size * sizeof(PyObject*));
398 if (!children)
399 goto nomemory;
400 } else {
401 children = PyObject_Malloc(size * sizeof(PyObject*));
402 if (!children)
403 goto nomemory;
404 /* copy existing children from static area to malloc buffer */
405 memcpy(children, self->extra->children,
406 self->extra->length * sizeof(PyObject*));
407 }
408 self->extra->children = children;
409 self->extra->allocated = size;
410 }
411
412 return 0;
413
414 nomemory:
415 PyErr_NoMemory();
416 return -1;
417}
418
419LOCAL(int)
420element_add_subelement(ElementObject* self, PyObject* element)
421{
422 /* add a child element to a parent */
423
424 if (element_resize(self, 1) < 0)
425 return -1;
426
427 Py_INCREF(element);
428 self->extra->children[self->extra->length] = element;
429
430 self->extra->length++;
431
432 return 0;
433}
434
435LOCAL(PyObject*)
436element_get_attrib(ElementObject* self)
437{
438 /* return borrowed reference to attrib dictionary */
439 /* note: this function assumes that the extra section exists */
440
441 PyObject* res = self->extra->attrib;
442
443 if (res == Py_None) {
444 /* create missing dictionary */
445 res = PyDict_New();
446 if (!res)
447 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200448 Py_DECREF(Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000449 self->extra->attrib = res;
450 }
451
452 return res;
453}
454
455LOCAL(PyObject*)
456element_get_text(ElementObject* self)
457{
458 /* return borrowed reference to text attribute */
459
460 PyObject* res = self->text;
461
462 if (JOIN_GET(res)) {
463 res = JOIN_OBJ(res);
464 if (PyList_CheckExact(res)) {
465 res = list_join(res);
466 if (!res)
467 return NULL;
468 self->text = res;
469 }
470 }
471
472 return res;
473}
474
475LOCAL(PyObject*)
476element_get_tail(ElementObject* self)
477{
478 /* return borrowed reference to text attribute */
479
480 PyObject* res = self->tail;
481
482 if (JOIN_GET(res)) {
483 res = JOIN_OBJ(res);
484 if (PyList_CheckExact(res)) {
485 res = list_join(res);
486 if (!res)
487 return NULL;
488 self->tail = res;
489 }
490 }
491
492 return res;
493}
494
495static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300496subelement(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000497{
498 PyObject* elem;
499
500 ElementObject* parent;
501 PyObject* tag;
502 PyObject* attrib = NULL;
503 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
504 &Element_Type, &parent, &tag,
505 &PyDict_Type, &attrib))
506 return NULL;
507
Eli Bendersky737b1732012-05-29 06:02:56 +0300508 if (attrib) {
509 /* attrib passed as positional arg */
510 attrib = PyDict_Copy(attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000511 if (!attrib)
512 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300513 if (kwds) {
514 if (PyDict_Update(attrib, kwds) < 0) {
515 return NULL;
516 }
517 }
518 } else if (kwds) {
519 /* have keyword args */
520 attrib = get_attrib_from_keywords(kwds);
521 if (!attrib)
522 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000523 } else {
Eli Bendersky737b1732012-05-29 06:02:56 +0300524 /* no attrib arg, no kwds, so no attribute */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000525 Py_INCREF(Py_None);
526 attrib = Py_None;
527 }
528
Eli Bendersky092af1f2012-03-04 07:14:03 +0200529 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000530
531 Py_DECREF(attrib);
532
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000533 if (element_add_subelement(parent, elem) < 0) {
534 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000535 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000536 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000537
538 return elem;
539}
540
Eli Bendersky0192ba32012-03-30 16:38:33 +0300541static int
542element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
543{
544 Py_VISIT(self->tag);
545 Py_VISIT(JOIN_OBJ(self->text));
546 Py_VISIT(JOIN_OBJ(self->tail));
547
548 if (self->extra) {
549 int i;
550 Py_VISIT(self->extra->attrib);
551
552 for (i = 0; i < self->extra->length; ++i)
553 Py_VISIT(self->extra->children[i]);
554 }
555 return 0;
556}
557
558static int
559element_gc_clear(ElementObject *self)
560{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300561 Py_CLEAR(self->tag);
Eli Benderskyebf37a22012-04-03 22:02:37 +0300562
563 /* The following is like Py_CLEAR for self->text and self->tail, but
564 * written explicitily because the real pointers hide behind access
565 * macros.
566 */
567 if (self->text) {
568 PyObject *tmp = JOIN_OBJ(self->text);
569 self->text = NULL;
570 Py_DECREF(tmp);
571 }
572
573 if (self->tail) {
574 PyObject *tmp = JOIN_OBJ(self->tail);
575 self->tail = NULL;
576 Py_DECREF(tmp);
577 }
Eli Bendersky0192ba32012-03-30 16:38:33 +0300578
579 /* After dropping all references from extra, it's no longer valid anyway,
Eli Benderskyebf37a22012-04-03 22:02:37 +0300580 * so fully deallocate it.
Eli Bendersky0192ba32012-03-30 16:38:33 +0300581 */
Eli Benderskyebf37a22012-04-03 22:02:37 +0300582 dealloc_extra(self);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300583 return 0;
584}
585
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000586static void
587element_dealloc(ElementObject* self)
588{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300589 PyObject_GC_UnTrack(self);
Eli Benderskyebf37a22012-04-03 22:02:37 +0300590
591 if (self->weakreflist != NULL)
592 PyObject_ClearWeakRefs((PyObject *) self);
593
Eli Bendersky0192ba32012-03-30 16:38:33 +0300594 /* element_gc_clear clears all references and deallocates extra
595 */
596 element_gc_clear(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000597
598 RELEASE(sizeof(ElementObject), "destroy element");
Eli Bendersky092af1f2012-03-04 07:14:03 +0200599 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000600}
601
602/* -------------------------------------------------------------------- */
603/* methods (in alphabetical order) */
604
605static PyObject*
606element_append(ElementObject* self, PyObject* args)
607{
608 PyObject* element;
609 if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element))
610 return NULL;
611
612 if (element_add_subelement(self, element) < 0)
613 return NULL;
614
615 Py_RETURN_NONE;
616}
617
618static PyObject*
Eli Bendersky0192ba32012-03-30 16:38:33 +0300619element_clearmethod(ElementObject* self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000620{
621 if (!PyArg_ParseTuple(args, ":clear"))
622 return NULL;
623
Eli Benderskyebf37a22012-04-03 22:02:37 +0300624 dealloc_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000625
626 Py_INCREF(Py_None);
627 Py_DECREF(JOIN_OBJ(self->text));
628 self->text = Py_None;
629
630 Py_INCREF(Py_None);
631 Py_DECREF(JOIN_OBJ(self->tail));
632 self->tail = Py_None;
633
634 Py_RETURN_NONE;
635}
636
637static PyObject*
638element_copy(ElementObject* self, PyObject* args)
639{
640 int i;
641 ElementObject* element;
642
643 if (!PyArg_ParseTuple(args, ":__copy__"))
644 return NULL;
645
Eli Bendersky092af1f2012-03-04 07:14:03 +0200646 element = (ElementObject*) create_new_element(
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000647 self->tag, (self->extra) ? self->extra->attrib : Py_None
648 );
649 if (!element)
650 return NULL;
651
652 Py_DECREF(JOIN_OBJ(element->text));
653 element->text = self->text;
654 Py_INCREF(JOIN_OBJ(element->text));
655
656 Py_DECREF(JOIN_OBJ(element->tail));
657 element->tail = self->tail;
658 Py_INCREF(JOIN_OBJ(element->tail));
659
660 if (self->extra) {
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100661
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000662 if (element_resize(element, self->extra->length) < 0) {
663 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000664 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000665 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000666
667 for (i = 0; i < self->extra->length; i++) {
668 Py_INCREF(self->extra->children[i]);
669 element->extra->children[i] = self->extra->children[i];
670 }
671
672 element->extra->length = self->extra->length;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100673
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000674 }
675
676 return (PyObject*) element;
677}
678
679static PyObject*
680element_deepcopy(ElementObject* self, PyObject* args)
681{
682 int i;
683 ElementObject* element;
684 PyObject* tag;
685 PyObject* attrib;
686 PyObject* text;
687 PyObject* tail;
688 PyObject* id;
689
690 PyObject* memo;
691 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
692 return NULL;
693
694 tag = deepcopy(self->tag, memo);
695 if (!tag)
696 return NULL;
697
698 if (self->extra) {
699 attrib = deepcopy(self->extra->attrib, memo);
700 if (!attrib) {
701 Py_DECREF(tag);
702 return NULL;
703 }
704 } else {
705 Py_INCREF(Py_None);
706 attrib = Py_None;
707 }
708
Eli Bendersky092af1f2012-03-04 07:14:03 +0200709 element = (ElementObject*) create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000710
711 Py_DECREF(tag);
712 Py_DECREF(attrib);
713
714 if (!element)
715 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100716
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000717 text = deepcopy(JOIN_OBJ(self->text), memo);
718 if (!text)
719 goto error;
720 Py_DECREF(element->text);
721 element->text = JOIN_SET(text, JOIN_GET(self->text));
722
723 tail = deepcopy(JOIN_OBJ(self->tail), memo);
724 if (!tail)
725 goto error;
726 Py_DECREF(element->tail);
727 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
728
729 if (self->extra) {
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100730
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000731 if (element_resize(element, self->extra->length) < 0)
732 goto error;
733
734 for (i = 0; i < self->extra->length; i++) {
735 PyObject* child = deepcopy(self->extra->children[i], memo);
736 if (!child) {
737 element->extra->length = i;
738 goto error;
739 }
740 element->extra->children[i] = child;
741 }
742
743 element->extra->length = self->extra->length;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100744
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000745 }
746
747 /* add object to memo dictionary (so deepcopy won't visit it again) */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200748 id = PyLong_FromSsize_t((Py_uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000749 if (!id)
750 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000751
752 i = PyDict_SetItem(memo, id, (PyObject*) element);
753
754 Py_DECREF(id);
755
756 if (i < 0)
757 goto error;
758
759 return (PyObject*) element;
760
761 error:
762 Py_DECREF(element);
763 return NULL;
764}
765
Martin v. Löwisbce16662012-06-17 10:41:22 +0200766static PyObject*
767element_sizeof(PyObject* _self, PyObject* args)
768{
769 ElementObject *self = (ElementObject*)_self;
770 Py_ssize_t result = sizeof(ElementObject);
771 if (self->extra) {
772 result += sizeof(ElementObjectExtra);
773 if (self->extra->children != self->extra->_children)
774 result += sizeof(PyObject*) * self->extra->allocated;
775 }
776 return PyLong_FromSsize_t(result);
777}
778
Eli Bendersky698bdb22013-01-10 06:01:06 -0800779/* dict keys for getstate/setstate. */
780#define PICKLED_TAG "tag"
781#define PICKLED_CHILDREN "_children"
782#define PICKLED_ATTRIB "attrib"
783#define PICKLED_TAIL "tail"
784#define PICKLED_TEXT "text"
785
786/* __getstate__ returns a fabricated instance dict as in the pure-Python
787 * Element implementation, for interoperability/interchangeability. This
788 * makes the pure-Python implementation details an API, but (a) there aren't
789 * any unnecessary structures there; and (b) it buys compatibility with 3.2
790 * pickles. See issue #16076.
791 */
792static PyObject *
793element_getstate(ElementObject *self)
794{
795 int i, noattrib;
796 PyObject *instancedict = NULL, *children;
797
798 /* Build a list of children. */
799 children = PyList_New(self->extra ? self->extra->length : 0);
800 if (!children)
801 return NULL;
802 for (i = 0; i < PyList_GET_SIZE(children); i++) {
803 PyObject *child = self->extra->children[i];
804 Py_INCREF(child);
805 PyList_SET_ITEM(children, i, child);
806 }
807
808 /* Construct the state object. */
809 noattrib = (self->extra == NULL || self->extra->attrib == Py_None);
810 if (noattrib)
811 instancedict = Py_BuildValue("{sOsOs{}sOsO}",
812 PICKLED_TAG, self->tag,
813 PICKLED_CHILDREN, children,
814 PICKLED_ATTRIB,
815 PICKLED_TEXT, self->text,
816 PICKLED_TAIL, self->tail);
817 else
818 instancedict = Py_BuildValue("{sOsOsOsOsO}",
819 PICKLED_TAG, self->tag,
820 PICKLED_CHILDREN, children,
821 PICKLED_ATTRIB, self->extra->attrib,
822 PICKLED_TEXT, self->text,
823 PICKLED_TAIL, self->tail);
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800824 if (instancedict) {
825 Py_DECREF(children);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800826 return instancedict;
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800827 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800828 else {
829 for (i = 0; i < PyList_GET_SIZE(children); i++)
830 Py_DECREF(PyList_GET_ITEM(children, i));
831 Py_DECREF(children);
832
833 return NULL;
834 }
835}
836
837static PyObject *
838element_setstate_from_attributes(ElementObject *self,
839 PyObject *tag,
840 PyObject *attrib,
841 PyObject *text,
842 PyObject *tail,
843 PyObject *children)
844{
845 Py_ssize_t i, nchildren;
846
847 if (!tag) {
848 PyErr_SetString(PyExc_TypeError, "tag may not be NULL");
849 return NULL;
850 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800851
852 Py_CLEAR(self->tag);
853 self->tag = tag;
854 Py_INCREF(self->tag);
855
856 Py_CLEAR(self->text);
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800857 self->text = text ? text : Py_None;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800858 Py_INCREF(self->text);
859
860 Py_CLEAR(self->tail);
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800861 self->tail = tail ? tail : Py_None;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800862 Py_INCREF(self->tail);
863
864 /* Handle ATTRIB and CHILDREN. */
865 if (!children && !attrib)
866 Py_RETURN_NONE;
867
868 /* Compute 'nchildren'. */
869 if (children) {
870 if (!PyList_Check(children)) {
871 PyErr_SetString(PyExc_TypeError, "'_children' is not a list");
872 return NULL;
873 }
874 nchildren = PyList_Size(children);
875 }
876 else {
877 nchildren = 0;
878 }
879
880 /* Allocate 'extra'. */
881 if (element_resize(self, nchildren)) {
882 return NULL;
883 }
884 assert(self->extra && self->extra->allocated >= nchildren);
885
886 /* Copy children */
887 for (i = 0; i < nchildren; i++) {
888 self->extra->children[i] = PyList_GET_ITEM(children, i);
889 Py_INCREF(self->extra->children[i]);
890 }
891
892 self->extra->length = nchildren;
893 self->extra->allocated = nchildren;
894
895 /* Stash attrib. */
896 if (attrib) {
897 Py_CLEAR(self->extra->attrib);
898 self->extra->attrib = attrib;
899 Py_INCREF(attrib);
900 }
901
902 Py_RETURN_NONE;
903}
904
905/* __setstate__ for Element instance from the Python implementation.
906 * 'state' should be the instance dict.
907 */
908static PyObject *
909element_setstate_from_Python(ElementObject *self, PyObject *state)
910{
911 static char *kwlist[] = {PICKLED_TAG, PICKLED_ATTRIB, PICKLED_TEXT,
912 PICKLED_TAIL, PICKLED_CHILDREN, 0};
913 PyObject *args;
914 PyObject *tag, *attrib, *text, *tail, *children;
Eli Bendersky799e3ed2013-01-12 05:42:38 -0800915 PyObject *retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800916
Eli Bendersky698bdb22013-01-10 06:01:06 -0800917 tag = attrib = text = tail = children = NULL;
918 args = PyTuple_New(0);
Eli Bendersky799e3ed2013-01-12 05:42:38 -0800919 if (!args)
Eli Bendersky698bdb22013-01-10 06:01:06 -0800920 return NULL;
Eli Bendersky799e3ed2013-01-12 05:42:38 -0800921
922 if (PyArg_ParseTupleAndKeywords(args, state, "|$OOOOO", kwlist, &tag,
923 &attrib, &text, &tail, &children))
924 retval = element_setstate_from_attributes(self, tag, attrib, text,
925 tail, children);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800926 else
Eli Bendersky799e3ed2013-01-12 05:42:38 -0800927 retval = NULL;
928
929 Py_DECREF(args);
930 return retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800931}
932
933static PyObject *
934element_setstate(ElementObject *self, PyObject *state)
935{
936 if (!PyDict_CheckExact(state)) {
937 PyErr_Format(PyExc_TypeError,
938 "Don't know how to unpickle \"%.200R\" as an Element",
939 state);
940 return NULL;
941 }
942 else
943 return element_setstate_from_Python(self, state);
944}
945
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000946LOCAL(int)
947checkpath(PyObject* tag)
948{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000949 Py_ssize_t i;
950 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000951
952 /* check if a tag contains an xpath character */
953
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000954#define PATHCHAR(ch) \
955 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000956
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000957 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200958 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
959 void *data = PyUnicode_DATA(tag);
960 unsigned int kind = PyUnicode_KIND(tag);
961 for (i = 0; i < len; i++) {
962 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
963 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000964 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200965 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000966 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200967 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000968 return 1;
969 }
970 return 0;
971 }
Christian Heimes72b710a2008-05-26 13:28:38 +0000972 if (PyBytes_Check(tag)) {
973 char *p = PyBytes_AS_STRING(tag);
974 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000975 if (p[i] == '{')
976 check = 0;
977 else if (p[i] == '}')
978 check = 1;
979 else if (check && PATHCHAR(p[i]))
980 return 1;
981 }
982 return 0;
983 }
984
985 return 1; /* unknown type; might be path expression */
986}
987
988static PyObject*
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000989element_extend(ElementObject* self, PyObject* args)
990{
991 PyObject* seq;
992 Py_ssize_t i, seqlen = 0;
993
994 PyObject* seq_in;
995 if (!PyArg_ParseTuple(args, "O:extend", &seq_in))
996 return NULL;
997
998 seq = PySequence_Fast(seq_in, "");
999 if (!seq) {
1000 PyErr_Format(
1001 PyExc_TypeError,
1002 "expected sequence, not \"%.200s\"", Py_TYPE(seq_in)->tp_name
1003 );
1004 return NULL;
1005 }
1006
1007 seqlen = PySequence_Size(seq);
1008 for (i = 0; i < seqlen; i++) {
1009 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001010 if (!PyObject_IsInstance(element, (PyObject *)&Element_Type)) {
1011 Py_DECREF(seq);
1012 PyErr_Format(
1013 PyExc_TypeError,
1014 "expected an Element, not \"%.200s\"",
1015 Py_TYPE(element)->tp_name);
1016 return NULL;
1017 }
1018
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001019 if (element_add_subelement(self, element) < 0) {
1020 Py_DECREF(seq);
1021 return NULL;
1022 }
1023 }
1024
1025 Py_DECREF(seq);
1026
1027 Py_RETURN_NONE;
1028}
1029
1030static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001031element_find(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001032{
1033 int i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001034 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001035 PyObject* namespaces = Py_None;
Eli Bendersky737b1732012-05-29 06:02:56 +03001036 static char *kwlist[] = {"path", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001037
Eli Bendersky737b1732012-05-29 06:02:56 +03001038 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:find", kwlist,
1039 &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001040 return NULL;
1041
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001042 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001043 _Py_IDENTIFIER(find);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001044 return _PyObject_CallMethodId(
1045 elementpath_obj, &PyId_find, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001046 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001047 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001048
1049 if (!self->extra)
1050 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001051
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001052 for (i = 0; i < self->extra->length; i++) {
1053 PyObject* item = self->extra->children[i];
1054 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +00001055 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001056 Py_INCREF(item);
1057 return item;
1058 }
1059 }
1060
1061 Py_RETURN_NONE;
1062}
1063
1064static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001065element_findtext(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001066{
1067 int i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001068 PyObject* tag;
1069 PyObject* default_value = Py_None;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001070 PyObject* namespaces = Py_None;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001071 _Py_IDENTIFIER(findtext);
Eli Bendersky737b1732012-05-29 06:02:56 +03001072 static char *kwlist[] = {"path", "default", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001073
Eli Bendersky737b1732012-05-29 06:02:56 +03001074 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO:findtext", kwlist,
1075 &tag, &default_value, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001076 return NULL;
1077
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001078 if (checkpath(tag) || namespaces != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001079 return _PyObject_CallMethodId(
1080 elementpath_obj, &PyId_findtext, "OOOO", self, tag, default_value, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001081 );
1082
1083 if (!self->extra) {
1084 Py_INCREF(default_value);
1085 return default_value;
1086 }
1087
1088 for (i = 0; i < self->extra->length; i++) {
1089 ElementObject* item = (ElementObject*) self->extra->children[i];
Mark Dickinson211c6252009-02-01 10:28:51 +00001090 if (Element_CheckExact(item) && (PyObject_RichCompareBool(item->tag, tag, Py_EQ) == 1)) {
1091
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001092 PyObject* text = element_get_text(item);
1093 if (text == Py_None)
Eli Bendersky25771b32013-01-13 05:26:07 -08001094 return PyUnicode_New(0, 0);
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001095 Py_XINCREF(text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001096 return text;
1097 }
1098 }
1099
1100 Py_INCREF(default_value);
1101 return default_value;
1102}
1103
1104static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001105element_findall(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001106{
1107 int i;
1108 PyObject* out;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001109 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001110 PyObject* namespaces = Py_None;
Eli Bendersky737b1732012-05-29 06:02:56 +03001111 static char *kwlist[] = {"path", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001112
Eli Bendersky737b1732012-05-29 06:02:56 +03001113 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:findall", kwlist,
1114 &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001115 return NULL;
1116
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001117 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001118 _Py_IDENTIFIER(findall);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001119 return _PyObject_CallMethodId(
1120 elementpath_obj, &PyId_findall, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001121 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001122 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001123
1124 out = PyList_New(0);
1125 if (!out)
1126 return NULL;
1127
1128 if (!self->extra)
1129 return out;
1130
1131 for (i = 0; i < self->extra->length; i++) {
1132 PyObject* item = self->extra->children[i];
1133 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +00001134 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001135 if (PyList_Append(out, item) < 0) {
1136 Py_DECREF(out);
1137 return NULL;
1138 }
1139 }
1140 }
1141
1142 return out;
1143}
1144
1145static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001146element_iterfind(ElementObject *self, PyObject *args, PyObject *kwds)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001147{
1148 PyObject* tag;
1149 PyObject* namespaces = Py_None;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001150 _Py_IDENTIFIER(iterfind);
Eli Bendersky737b1732012-05-29 06:02:56 +03001151 static char *kwlist[] = {"path", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001152
Eli Bendersky737b1732012-05-29 06:02:56 +03001153 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:iterfind", kwlist,
1154 &tag, &namespaces))
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001155 return NULL;
1156
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001157 return _PyObject_CallMethodId(
1158 elementpath_obj, &PyId_iterfind, "OOO", self, tag, namespaces
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001159 );
1160}
1161
1162static PyObject*
Eli Benderskya8736902013-01-05 06:26:39 -08001163element_get(ElementObject* self, PyObject* args, PyObject* kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001164{
1165 PyObject* value;
Eli Benderskya8736902013-01-05 06:26:39 -08001166 static char* kwlist[] = {"key", "default", 0};
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001167
1168 PyObject* key;
1169 PyObject* default_value = Py_None;
Eli Benderskya8736902013-01-05 06:26:39 -08001170
1171 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:get", kwlist, &key,
1172 &default_value))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001173 return NULL;
1174
1175 if (!self->extra || self->extra->attrib == Py_None)
1176 value = default_value;
1177 else {
1178 value = PyDict_GetItem(self->extra->attrib, key);
1179 if (!value)
1180 value = default_value;
1181 }
1182
1183 Py_INCREF(value);
1184 return value;
1185}
1186
1187static PyObject*
1188element_getchildren(ElementObject* self, PyObject* args)
1189{
1190 int i;
1191 PyObject* list;
1192
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001193 /* FIXME: report as deprecated? */
1194
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001195 if (!PyArg_ParseTuple(args, ":getchildren"))
1196 return NULL;
1197
1198 if (!self->extra)
1199 return PyList_New(0);
1200
1201 list = PyList_New(self->extra->length);
1202 if (!list)
1203 return NULL;
1204
1205 for (i = 0; i < self->extra->length; i++) {
1206 PyObject* item = self->extra->children[i];
1207 Py_INCREF(item);
1208 PyList_SET_ITEM(list, i, item);
1209 }
1210
1211 return list;
1212}
1213
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001214
Eli Bendersky64d11e62012-06-15 07:42:50 +03001215static PyObject *
1216create_elementiter(ElementObject *self, PyObject *tag, int gettext);
1217
1218
1219static PyObject *
Eli Benderskya8736902013-01-05 06:26:39 -08001220element_iter(ElementObject *self, PyObject *args, PyObject *kwds)
Eli Bendersky64d11e62012-06-15 07:42:50 +03001221{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001222 PyObject* tag = Py_None;
Eli Benderskya8736902013-01-05 06:26:39 -08001223 static char* kwlist[] = {"tag", 0};
1224
1225 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:iter", kwlist, &tag))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001226 return NULL;
1227
Eli Bendersky64d11e62012-06-15 07:42:50 +03001228 return create_elementiter(self, tag, 0);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001229}
1230
1231
1232static PyObject*
1233element_itertext(ElementObject* self, PyObject* args)
1234{
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001235 if (!PyArg_ParseTuple(args, ":itertext"))
1236 return NULL;
1237
Eli Bendersky64d11e62012-06-15 07:42:50 +03001238 return create_elementiter(self, Py_None, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001239}
1240
Eli Bendersky64d11e62012-06-15 07:42:50 +03001241
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001242static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001243element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001244{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001245 ElementObject* self = (ElementObject*) self_;
1246
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001247 if (!self->extra || index < 0 || index >= self->extra->length) {
1248 PyErr_SetString(
1249 PyExc_IndexError,
1250 "child index out of range"
1251 );
1252 return NULL;
1253 }
1254
1255 Py_INCREF(self->extra->children[index]);
1256 return self->extra->children[index];
1257}
1258
1259static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001260element_insert(ElementObject* self, PyObject* args)
1261{
1262 int i;
1263
1264 int index;
1265 PyObject* element;
1266 if (!PyArg_ParseTuple(args, "iO!:insert", &index,
1267 &Element_Type, &element))
1268 return NULL;
1269
1270 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001271 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001272
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001273 if (index < 0) {
1274 index += self->extra->length;
1275 if (index < 0)
1276 index = 0;
1277 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001278 if (index > self->extra->length)
1279 index = self->extra->length;
1280
1281 if (element_resize(self, 1) < 0)
1282 return NULL;
1283
1284 for (i = self->extra->length; i > index; i--)
1285 self->extra->children[i] = self->extra->children[i-1];
1286
1287 Py_INCREF(element);
1288 self->extra->children[index] = element;
1289
1290 self->extra->length++;
1291
1292 Py_RETURN_NONE;
1293}
1294
1295static PyObject*
1296element_items(ElementObject* self, PyObject* args)
1297{
1298 if (!PyArg_ParseTuple(args, ":items"))
1299 return NULL;
1300
1301 if (!self->extra || self->extra->attrib == Py_None)
1302 return PyList_New(0);
1303
1304 return PyDict_Items(self->extra->attrib);
1305}
1306
1307static PyObject*
1308element_keys(ElementObject* self, PyObject* args)
1309{
1310 if (!PyArg_ParseTuple(args, ":keys"))
1311 return NULL;
1312
1313 if (!self->extra || self->extra->attrib == Py_None)
1314 return PyList_New(0);
1315
1316 return PyDict_Keys(self->extra->attrib);
1317}
1318
Martin v. Löwis18e16552006-02-15 17:27:45 +00001319static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001320element_length(ElementObject* self)
1321{
1322 if (!self->extra)
1323 return 0;
1324
1325 return self->extra->length;
1326}
1327
1328static PyObject*
1329element_makeelement(PyObject* self, PyObject* args, PyObject* kw)
1330{
1331 PyObject* elem;
1332
1333 PyObject* tag;
1334 PyObject* attrib;
1335 if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib))
1336 return NULL;
1337
1338 attrib = PyDict_Copy(attrib);
1339 if (!attrib)
1340 return NULL;
1341
Eli Bendersky092af1f2012-03-04 07:14:03 +02001342 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001343
1344 Py_DECREF(attrib);
1345
1346 return elem;
1347}
1348
1349static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001350element_remove(ElementObject* self, PyObject* args)
1351{
1352 int i;
1353
1354 PyObject* element;
1355 if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element))
1356 return NULL;
1357
1358 if (!self->extra) {
1359 /* element has no children, so raise exception */
1360 PyErr_SetString(
1361 PyExc_ValueError,
1362 "list.remove(x): x not in list"
1363 );
1364 return NULL;
1365 }
1366
1367 for (i = 0; i < self->extra->length; i++) {
1368 if (self->extra->children[i] == element)
1369 break;
Mark Dickinson211c6252009-02-01 10:28:51 +00001370 if (PyObject_RichCompareBool(self->extra->children[i], element, Py_EQ) == 1)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001371 break;
1372 }
1373
1374 if (i == self->extra->length) {
1375 /* element is not in children, so raise exception */
1376 PyErr_SetString(
1377 PyExc_ValueError,
1378 "list.remove(x): x not in list"
1379 );
1380 return NULL;
1381 }
1382
1383 Py_DECREF(self->extra->children[i]);
1384
1385 self->extra->length--;
1386
1387 for (; i < self->extra->length; i++)
1388 self->extra->children[i] = self->extra->children[i+1];
1389
1390 Py_RETURN_NONE;
1391}
1392
1393static PyObject*
1394element_repr(ElementObject* self)
1395{
Eli Bendersky092af1f2012-03-04 07:14:03 +02001396 if (self->tag)
1397 return PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1398 else
1399 return PyUnicode_FromFormat("<Element at %p>", self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001400}
1401
1402static PyObject*
1403element_set(ElementObject* self, PyObject* args)
1404{
1405 PyObject* attrib;
1406
1407 PyObject* key;
1408 PyObject* value;
1409 if (!PyArg_ParseTuple(args, "OO:set", &key, &value))
1410 return NULL;
1411
1412 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001413 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001414
1415 attrib = element_get_attrib(self);
1416 if (!attrib)
1417 return NULL;
1418
1419 if (PyDict_SetItem(attrib, key, value) < 0)
1420 return NULL;
1421
1422 Py_RETURN_NONE;
1423}
1424
1425static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001426element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001427{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001428 ElementObject* self = (ElementObject*) self_;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001429 int i;
1430 PyObject* old;
1431
1432 if (!self->extra || index < 0 || index >= self->extra->length) {
1433 PyErr_SetString(
1434 PyExc_IndexError,
1435 "child assignment index out of range");
1436 return -1;
1437 }
1438
1439 old = self->extra->children[index];
1440
1441 if (item) {
1442 Py_INCREF(item);
1443 self->extra->children[index] = item;
1444 } else {
1445 self->extra->length--;
1446 for (i = index; i < self->extra->length; i++)
1447 self->extra->children[i] = self->extra->children[i+1];
1448 }
1449
1450 Py_DECREF(old);
1451
1452 return 0;
1453}
1454
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001455static PyObject*
1456element_subscr(PyObject* self_, PyObject* item)
1457{
1458 ElementObject* self = (ElementObject*) self_;
1459
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001460 if (PyIndex_Check(item)) {
1461 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001462
1463 if (i == -1 && PyErr_Occurred()) {
1464 return NULL;
1465 }
1466 if (i < 0 && self->extra)
1467 i += self->extra->length;
1468 return element_getitem(self_, i);
1469 }
1470 else if (PySlice_Check(item)) {
1471 Py_ssize_t start, stop, step, slicelen, cur, i;
1472 PyObject* list;
1473
1474 if (!self->extra)
1475 return PyList_New(0);
1476
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001477 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001478 self->extra->length,
1479 &start, &stop, &step, &slicelen) < 0) {
1480 return NULL;
1481 }
1482
1483 if (slicelen <= 0)
1484 return PyList_New(0);
1485 else {
1486 list = PyList_New(slicelen);
1487 if (!list)
1488 return NULL;
1489
1490 for (cur = start, i = 0; i < slicelen;
1491 cur += step, i++) {
1492 PyObject* item = self->extra->children[cur];
1493 Py_INCREF(item);
1494 PyList_SET_ITEM(list, i, item);
1495 }
1496
1497 return list;
1498 }
1499 }
1500 else {
1501 PyErr_SetString(PyExc_TypeError,
1502 "element indices must be integers");
1503 return NULL;
1504 }
1505}
1506
1507static int
1508element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1509{
1510 ElementObject* self = (ElementObject*) self_;
1511
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001512 if (PyIndex_Check(item)) {
1513 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001514
1515 if (i == -1 && PyErr_Occurred()) {
1516 return -1;
1517 }
1518 if (i < 0 && self->extra)
1519 i += self->extra->length;
1520 return element_setitem(self_, i, value);
1521 }
1522 else if (PySlice_Check(item)) {
1523 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1524
1525 PyObject* recycle = NULL;
1526 PyObject* seq = NULL;
1527
1528 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001529 create_extra(self, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001530
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001531 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001532 self->extra->length,
1533 &start, &stop, &step, &slicelen) < 0) {
1534 return -1;
1535 }
1536
Eli Bendersky865756a2012-03-09 13:38:15 +02001537 if (value == NULL) {
1538 /* Delete slice */
1539 size_t cur;
1540 Py_ssize_t i;
1541
1542 if (slicelen <= 0)
1543 return 0;
1544
1545 /* Since we're deleting, the direction of the range doesn't matter,
1546 * so for simplicity make it always ascending.
1547 */
1548 if (step < 0) {
1549 stop = start + 1;
1550 start = stop + step * (slicelen - 1) - 1;
1551 step = -step;
1552 }
1553
1554 assert((size_t)slicelen <= PY_SIZE_MAX / sizeof(PyObject *));
1555
1556 /* recycle is a list that will contain all the children
1557 * scheduled for removal.
1558 */
1559 if (!(recycle = PyList_New(slicelen))) {
1560 PyErr_NoMemory();
1561 return -1;
1562 }
1563
1564 /* This loop walks over all the children that have to be deleted,
1565 * with cur pointing at them. num_moved is the amount of children
1566 * until the next deleted child that have to be "shifted down" to
1567 * occupy the deleted's places.
1568 * Note that in the ith iteration, shifting is done i+i places down
1569 * because i children were already removed.
1570 */
1571 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1572 /* Compute how many children have to be moved, clipping at the
1573 * list end.
1574 */
1575 Py_ssize_t num_moved = step - 1;
1576 if (cur + step >= (size_t)self->extra->length) {
1577 num_moved = self->extra->length - cur - 1;
1578 }
1579
1580 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1581
1582 memmove(
1583 self->extra->children + cur - i,
1584 self->extra->children + cur + 1,
1585 num_moved * sizeof(PyObject *));
1586 }
1587
1588 /* Leftover "tail" after the last removed child */
1589 cur = start + (size_t)slicelen * step;
1590 if (cur < (size_t)self->extra->length) {
1591 memmove(
1592 self->extra->children + cur - slicelen,
1593 self->extra->children + cur,
1594 (self->extra->length - cur) * sizeof(PyObject *));
1595 }
1596
1597 self->extra->length -= slicelen;
1598
1599 /* Discard the recycle list with all the deleted sub-elements */
1600 Py_XDECREF(recycle);
1601 return 0;
1602 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001603 else {
Eli Bendersky865756a2012-03-09 13:38:15 +02001604 /* A new slice is actually being assigned */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001605 seq = PySequence_Fast(value, "");
1606 if (!seq) {
1607 PyErr_Format(
1608 PyExc_TypeError,
1609 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1610 );
1611 return -1;
1612 }
1613 newlen = PySequence_Size(seq);
1614 }
1615
1616 if (step != 1 && newlen != slicelen)
1617 {
1618 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001619 "attempt to assign sequence of size %zd "
1620 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001621 newlen, slicelen
1622 );
1623 return -1;
1624 }
1625
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001626 /* Resize before creating the recycle bin, to prevent refleaks. */
1627 if (newlen > slicelen) {
1628 if (element_resize(self, newlen - slicelen) < 0) {
1629 if (seq) {
1630 Py_DECREF(seq);
1631 }
1632 return -1;
1633 }
1634 }
1635
1636 if (slicelen > 0) {
1637 /* to avoid recursive calls to this method (via decref), move
1638 old items to the recycle bin here, and get rid of them when
1639 we're done modifying the element */
1640 recycle = PyList_New(slicelen);
1641 if (!recycle) {
1642 if (seq) {
1643 Py_DECREF(seq);
1644 }
1645 return -1;
1646 }
1647 for (cur = start, i = 0; i < slicelen;
1648 cur += step, i++)
1649 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1650 }
1651
1652 if (newlen < slicelen) {
1653 /* delete slice */
1654 for (i = stop; i < self->extra->length; i++)
1655 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1656 } else if (newlen > slicelen) {
1657 /* insert slice */
1658 for (i = self->extra->length-1; i >= stop; i--)
1659 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1660 }
1661
1662 /* replace the slice */
1663 for (cur = start, i = 0; i < newlen;
1664 cur += step, i++) {
1665 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1666 Py_INCREF(element);
1667 self->extra->children[cur] = element;
1668 }
1669
1670 self->extra->length += newlen - slicelen;
1671
1672 if (seq) {
1673 Py_DECREF(seq);
1674 }
1675
1676 /* discard the recycle bin, and everything in it */
1677 Py_XDECREF(recycle);
1678
1679 return 0;
1680 }
1681 else {
1682 PyErr_SetString(PyExc_TypeError,
1683 "element indices must be integers");
1684 return -1;
1685 }
1686}
1687
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001688static PyMethodDef element_methods[] = {
1689
Eli Bendersky0192ba32012-03-30 16:38:33 +03001690 {"clear", (PyCFunction) element_clearmethod, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001691
Eli Benderskya8736902013-01-05 06:26:39 -08001692 {"get", (PyCFunction) element_get, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001693 {"set", (PyCFunction) element_set, METH_VARARGS},
1694
Eli Bendersky737b1732012-05-29 06:02:56 +03001695 {"find", (PyCFunction) element_find, METH_VARARGS | METH_KEYWORDS},
1696 {"findtext", (PyCFunction) element_findtext, METH_VARARGS | METH_KEYWORDS},
1697 {"findall", (PyCFunction) element_findall, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001698
1699 {"append", (PyCFunction) element_append, METH_VARARGS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001700 {"extend", (PyCFunction) element_extend, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001701 {"insert", (PyCFunction) element_insert, METH_VARARGS},
1702 {"remove", (PyCFunction) element_remove, METH_VARARGS},
1703
Eli Benderskya8736902013-01-05 06:26:39 -08001704 {"iter", (PyCFunction) element_iter, METH_VARARGS | METH_KEYWORDS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001705 {"itertext", (PyCFunction) element_itertext, METH_VARARGS},
Eli Bendersky737b1732012-05-29 06:02:56 +03001706 {"iterfind", (PyCFunction) element_iterfind, METH_VARARGS | METH_KEYWORDS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001707
Eli Benderskya8736902013-01-05 06:26:39 -08001708 {"getiterator", (PyCFunction) element_iter, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001709 {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS},
1710
1711 {"items", (PyCFunction) element_items, METH_VARARGS},
1712 {"keys", (PyCFunction) element_keys, METH_VARARGS},
1713
1714 {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS},
1715
1716 {"__copy__", (PyCFunction) element_copy, METH_VARARGS},
1717 {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS},
Martin v. Löwisbce16662012-06-17 10:41:22 +02001718 {"__sizeof__", element_sizeof, METH_NOARGS},
Eli Bendersky698bdb22013-01-10 06:01:06 -08001719 {"__getstate__", (PyCFunction)element_getstate, METH_NOARGS},
1720 {"__setstate__", (PyCFunction)element_setstate, METH_O},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001721
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001722 {NULL, NULL}
1723};
1724
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001725static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001726element_getattro(ElementObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001727{
1728 PyObject* res;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001729 char *name = "";
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001730
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001731 if (PyUnicode_Check(nameobj))
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001732 name = _PyUnicode_AsString(nameobj);
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001733
Alexander Belopolskye239d232010-12-08 23:31:48 +00001734 if (name == NULL)
1735 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001736
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001737 /* handle common attributes first */
1738 if (strcmp(name, "tag") == 0) {
1739 res = self->tag;
1740 Py_INCREF(res);
1741 return res;
1742 } else if (strcmp(name, "text") == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001743 res = element_get_text(self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001744 Py_INCREF(res);
1745 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001746 }
1747
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001748 /* methods */
1749 res = PyObject_GenericGetAttr((PyObject*) self, nameobj);
1750 if (res)
1751 return res;
1752
1753 /* less common attributes */
1754 if (strcmp(name, "tail") == 0) {
1755 PyErr_Clear();
1756 res = element_get_tail(self);
1757 } else if (strcmp(name, "attrib") == 0) {
1758 PyErr_Clear();
1759 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001760 create_extra(self, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001761 res = element_get_attrib(self);
1762 }
1763
1764 if (!res)
1765 return NULL;
1766
1767 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001768 return res;
1769}
1770
Eli Benderskyef9683b2013-05-18 07:52:34 -07001771static int
Eli Benderskyb20df952012-05-20 06:33:29 +03001772element_setattro(ElementObject* self, PyObject* nameobj, PyObject* value)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001773{
Eli Benderskyb20df952012-05-20 06:33:29 +03001774 char *name = "";
1775 if (PyUnicode_Check(nameobj))
1776 name = _PyUnicode_AsString(nameobj);
1777
Eli Benderskyef9683b2013-05-18 07:52:34 -07001778 if (name == NULL) {
1779 return -1;
1780 } else if (strcmp(name, "tag") == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001781 Py_DECREF(self->tag);
1782 self->tag = value;
1783 Py_INCREF(self->tag);
1784 } else if (strcmp(name, "text") == 0) {
1785 Py_DECREF(JOIN_OBJ(self->text));
1786 self->text = value;
1787 Py_INCREF(self->text);
1788 } else if (strcmp(name, "tail") == 0) {
1789 Py_DECREF(JOIN_OBJ(self->tail));
1790 self->tail = value;
1791 Py_INCREF(self->tail);
1792 } else if (strcmp(name, "attrib") == 0) {
1793 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001794 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001795 Py_DECREF(self->extra->attrib);
1796 self->extra->attrib = value;
1797 Py_INCREF(self->extra->attrib);
1798 } else {
Eli Benderskyef9683b2013-05-18 07:52:34 -07001799 PyErr_SetString(PyExc_AttributeError,
Eli Bendersky6a55dc32013-05-19 16:59:59 -07001800 "Can't set arbitrary attributes on Element");
Eli Benderskyef9683b2013-05-18 07:52:34 -07001801 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001802 }
1803
Eli Benderskyef9683b2013-05-18 07:52:34 -07001804 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001805}
1806
1807static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001808 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001809 0, /* sq_concat */
1810 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001811 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001812 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001813 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001814 0,
1815};
1816
1817static PyMappingMethods element_as_mapping = {
1818 (lenfunc) element_length,
1819 (binaryfunc) element_subscr,
1820 (objobjargproc) element_ass_subscr,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001821};
1822
Neal Norwitz227b5332006-03-22 09:28:35 +00001823static PyTypeObject Element_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001824 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08001825 "xml.etree.ElementTree.Element", sizeof(ElementObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001826 /* methods */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001827 (destructor)element_dealloc, /* tp_dealloc */
1828 0, /* tp_print */
1829 0, /* tp_getattr */
Eli Benderskyb20df952012-05-20 06:33:29 +03001830 0, /* tp_setattr */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001831 0, /* tp_reserved */
1832 (reprfunc)element_repr, /* tp_repr */
1833 0, /* tp_as_number */
1834 &element_as_sequence, /* tp_as_sequence */
1835 &element_as_mapping, /* tp_as_mapping */
1836 0, /* tp_hash */
1837 0, /* tp_call */
1838 0, /* tp_str */
1839 (getattrofunc)element_getattro, /* tp_getattro */
Eli Benderskyb20df952012-05-20 06:33:29 +03001840 (setattrofunc)element_setattro, /* tp_setattro */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001841 0, /* tp_as_buffer */
Eli Bendersky0192ba32012-03-30 16:38:33 +03001842 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
1843 /* tp_flags */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001844 0, /* tp_doc */
Eli Bendersky0192ba32012-03-30 16:38:33 +03001845 (traverseproc)element_gc_traverse, /* tp_traverse */
1846 (inquiry)element_gc_clear, /* tp_clear */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001847 0, /* tp_richcompare */
Eli Benderskyebf37a22012-04-03 22:02:37 +03001848 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001849 0, /* tp_iter */
1850 0, /* tp_iternext */
1851 element_methods, /* tp_methods */
1852 0, /* tp_members */
1853 0, /* tp_getset */
1854 0, /* tp_base */
1855 0, /* tp_dict */
1856 0, /* tp_descr_get */
1857 0, /* tp_descr_set */
1858 0, /* tp_dictoffset */
1859 (initproc)element_init, /* tp_init */
1860 PyType_GenericAlloc, /* tp_alloc */
1861 element_new, /* tp_new */
1862 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001863};
1864
Eli Bendersky64d11e62012-06-15 07:42:50 +03001865/******************************* Element iterator ****************************/
1866
1867/* ElementIterObject represents the iteration state over an XML element in
1868 * pre-order traversal. To keep track of which sub-element should be returned
1869 * next, a stack of parents is maintained. This is a standard stack-based
1870 * iterative pre-order traversal of a tree.
1871 * The stack is managed using a single-linked list starting at parent_stack.
1872 * Each stack node contains the saved parent to which we should return after
1873 * the current one is exhausted, and the next child to examine in that parent.
1874 */
1875typedef struct ParentLocator_t {
1876 ElementObject *parent;
1877 Py_ssize_t child_index;
1878 struct ParentLocator_t *next;
1879} ParentLocator;
1880
1881typedef struct {
1882 PyObject_HEAD
1883 ParentLocator *parent_stack;
1884 ElementObject *root_element;
1885 PyObject *sought_tag;
1886 int root_done;
1887 int gettext;
1888} ElementIterObject;
1889
1890
1891static void
1892elementiter_dealloc(ElementIterObject *it)
1893{
1894 ParentLocator *p = it->parent_stack;
1895 while (p) {
1896 ParentLocator *temp = p;
1897 Py_XDECREF(p->parent);
1898 p = p->next;
1899 PyObject_Free(temp);
1900 }
1901
1902 Py_XDECREF(it->sought_tag);
1903 Py_XDECREF(it->root_element);
1904
1905 PyObject_GC_UnTrack(it);
1906 PyObject_GC_Del(it);
1907}
1908
1909static int
1910elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
1911{
1912 ParentLocator *p = it->parent_stack;
1913 while (p) {
1914 Py_VISIT(p->parent);
1915 p = p->next;
1916 }
1917
1918 Py_VISIT(it->root_element);
1919 Py_VISIT(it->sought_tag);
1920 return 0;
1921}
1922
1923/* Helper function for elementiter_next. Add a new parent to the parent stack.
1924 */
1925static ParentLocator *
1926parent_stack_push_new(ParentLocator *stack, ElementObject *parent)
1927{
1928 ParentLocator *new_node = PyObject_Malloc(sizeof(ParentLocator));
1929 if (new_node) {
1930 new_node->parent = parent;
1931 Py_INCREF(parent);
1932 new_node->child_index = 0;
1933 new_node->next = stack;
1934 }
1935 return new_node;
1936}
1937
1938static PyObject *
1939elementiter_next(ElementIterObject *it)
1940{
1941 /* Sub-element iterator.
Eli Bendersky45839902013-01-13 05:14:47 -08001942 *
Eli Bendersky64d11e62012-06-15 07:42:50 +03001943 * A short note on gettext: this function serves both the iter() and
1944 * itertext() methods to avoid code duplication. However, there are a few
1945 * small differences in the way these iterations work. Namely:
1946 * - itertext() only yields text from nodes that have it, and continues
1947 * iterating when a node doesn't have text (so it doesn't return any
1948 * node like iter())
1949 * - itertext() also has to handle tail, after finishing with all the
1950 * children of a node.
1951 */
Eli Bendersky113da642012-06-15 07:52:49 +03001952 ElementObject *cur_parent;
1953 Py_ssize_t child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03001954
1955 while (1) {
1956 /* Handle the case reached in the beginning and end of iteration, where
1957 * the parent stack is empty. The root_done flag gives us indication
1958 * whether we've just started iterating (so root_done is 0), in which
1959 * case the root is returned. If root_done is 1 and we're here, the
1960 * iterator is exhausted.
1961 */
1962 if (!it->parent_stack->parent) {
1963 if (it->root_done) {
1964 PyErr_SetNone(PyExc_StopIteration);
1965 return NULL;
1966 } else {
1967 it->parent_stack = parent_stack_push_new(it->parent_stack,
1968 it->root_element);
1969 if (!it->parent_stack) {
1970 PyErr_NoMemory();
1971 return NULL;
1972 }
1973
1974 it->root_done = 1;
1975 if (it->sought_tag == Py_None ||
1976 PyObject_RichCompareBool(it->root_element->tag,
1977 it->sought_tag, Py_EQ) == 1) {
1978 if (it->gettext) {
Eli Benderskye6174ca2013-01-10 06:27:53 -08001979 PyObject *text = element_get_text(it->root_element);
1980 if (!text)
1981 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03001982 if (PyObject_IsTrue(text)) {
1983 Py_INCREF(text);
1984 return text;
1985 }
1986 } else {
1987 Py_INCREF(it->root_element);
1988 return (PyObject *)it->root_element;
1989 }
1990 }
1991 }
1992 }
1993
1994 /* See if there are children left to traverse in the current parent. If
1995 * yes, visit the next child. If not, pop the stack and try again.
1996 */
Eli Bendersky113da642012-06-15 07:52:49 +03001997 cur_parent = it->parent_stack->parent;
1998 child_index = it->parent_stack->child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03001999 if (cur_parent->extra && child_index < cur_parent->extra->length) {
2000 ElementObject *child = (ElementObject *)
2001 cur_parent->extra->children[child_index];
2002 it->parent_stack->child_index++;
2003 it->parent_stack = parent_stack_push_new(it->parent_stack,
2004 child);
2005 if (!it->parent_stack) {
2006 PyErr_NoMemory();
2007 return NULL;
2008 }
2009
2010 if (it->gettext) {
Eli Benderskye6174ca2013-01-10 06:27:53 -08002011 PyObject *text = element_get_text(child);
2012 if (!text)
2013 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002014 if (PyObject_IsTrue(text)) {
2015 Py_INCREF(text);
2016 return text;
2017 }
2018 } else if (it->sought_tag == Py_None ||
2019 PyObject_RichCompareBool(child->tag,
2020 it->sought_tag, Py_EQ) == 1) {
2021 Py_INCREF(child);
2022 return (PyObject *)child;
2023 }
2024 else
2025 continue;
2026 }
2027 else {
Eli Benderskye6174ca2013-01-10 06:27:53 -08002028 PyObject *tail;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002029 ParentLocator *next = it->parent_stack->next;
Eli Benderskye6174ca2013-01-10 06:27:53 -08002030 if (it->gettext) {
2031 tail = element_get_tail(cur_parent);
2032 if (!tail)
2033 return NULL;
2034 }
2035 else
2036 tail = Py_None;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002037 Py_XDECREF(it->parent_stack->parent);
2038 PyObject_Free(it->parent_stack);
2039 it->parent_stack = next;
2040
2041 /* Note that extra condition on it->parent_stack->parent here;
2042 * this is because itertext() is supposed to only return *inner*
2043 * text, not text following the element it began iteration with.
2044 */
2045 if (it->parent_stack->parent && PyObject_IsTrue(tail)) {
2046 Py_INCREF(tail);
2047 return tail;
2048 }
2049 }
2050 }
2051
2052 return NULL;
2053}
2054
2055
2056static PyTypeObject ElementIter_Type = {
2057 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002058 /* Using the module's name since the pure-Python implementation does not
2059 have such a type. */
Eli Bendersky64d11e62012-06-15 07:42:50 +03002060 "_elementtree._element_iterator", /* tp_name */
2061 sizeof(ElementIterObject), /* tp_basicsize */
2062 0, /* tp_itemsize */
2063 /* methods */
2064 (destructor)elementiter_dealloc, /* tp_dealloc */
2065 0, /* tp_print */
2066 0, /* tp_getattr */
2067 0, /* tp_setattr */
2068 0, /* tp_reserved */
2069 0, /* tp_repr */
2070 0, /* tp_as_number */
2071 0, /* tp_as_sequence */
2072 0, /* tp_as_mapping */
2073 0, /* tp_hash */
2074 0, /* tp_call */
2075 0, /* tp_str */
2076 0, /* tp_getattro */
2077 0, /* tp_setattro */
2078 0, /* tp_as_buffer */
2079 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2080 0, /* tp_doc */
2081 (traverseproc)elementiter_traverse, /* tp_traverse */
2082 0, /* tp_clear */
2083 0, /* tp_richcompare */
2084 0, /* tp_weaklistoffset */
2085 PyObject_SelfIter, /* tp_iter */
2086 (iternextfunc)elementiter_next, /* tp_iternext */
2087 0, /* tp_methods */
2088 0, /* tp_members */
2089 0, /* tp_getset */
2090 0, /* tp_base */
2091 0, /* tp_dict */
2092 0, /* tp_descr_get */
2093 0, /* tp_descr_set */
2094 0, /* tp_dictoffset */
2095 0, /* tp_init */
2096 0, /* tp_alloc */
2097 0, /* tp_new */
2098};
2099
2100
2101static PyObject *
2102create_elementiter(ElementObject *self, PyObject *tag, int gettext)
2103{
2104 ElementIterObject *it;
2105 PyObject *star = NULL;
2106
2107 it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
2108 if (!it)
2109 return NULL;
2110 if (!(it->parent_stack = PyObject_Malloc(sizeof(ParentLocator)))) {
2111 PyObject_GC_Del(it);
2112 return NULL;
2113 }
2114
2115 it->parent_stack->parent = NULL;
2116 it->parent_stack->child_index = 0;
2117 it->parent_stack->next = NULL;
2118
2119 if (PyUnicode_Check(tag))
2120 star = PyUnicode_FromString("*");
2121 else if (PyBytes_Check(tag))
2122 star = PyBytes_FromString("*");
2123
2124 if (star && PyObject_RichCompareBool(tag, star, Py_EQ) == 1)
2125 tag = Py_None;
2126
2127 Py_XDECREF(star);
2128 it->sought_tag = tag;
2129 it->root_done = 0;
2130 it->gettext = gettext;
2131 it->root_element = self;
2132
2133 Py_INCREF(self);
2134 Py_INCREF(tag);
2135
2136 PyObject_GC_Track(it);
2137 return (PyObject *)it;
2138}
2139
2140
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002141/* ==================================================================== */
2142/* the tree builder type */
2143
2144typedef struct {
2145 PyObject_HEAD
2146
Eli Bendersky58d548d2012-05-29 15:45:16 +03002147 PyObject *root; /* root node (first created node) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002148
Antoine Pitrouee329312012-10-04 19:53:29 +02002149 PyObject *this; /* current node */
2150 PyObject *last; /* most recently created node */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002151
Eli Bendersky58d548d2012-05-29 15:45:16 +03002152 PyObject *data; /* data collector (string or list), or NULL */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002153
Eli Bendersky58d548d2012-05-29 15:45:16 +03002154 PyObject *stack; /* element stack */
2155 Py_ssize_t index; /* current stack size (0 means empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002156
Eli Bendersky48d358b2012-05-30 17:57:50 +03002157 PyObject *element_factory;
2158
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002159 /* element tracing */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002160 PyObject *events; /* list of events, or NULL if not collecting */
2161 PyObject *start_event_obj; /* event objects (NULL to ignore) */
2162 PyObject *end_event_obj;
2163 PyObject *start_ns_event_obj;
2164 PyObject *end_ns_event_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002165} TreeBuilderObject;
2166
Neal Norwitz227b5332006-03-22 09:28:35 +00002167static PyTypeObject TreeBuilder_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002168
Christian Heimes90aa7642007-12-19 02:45:37 +00002169#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002170
2171/* -------------------------------------------------------------------- */
2172/* constructor and destructor */
2173
Eli Bendersky58d548d2012-05-29 15:45:16 +03002174static PyObject *
2175treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002176{
Eli Bendersky58d548d2012-05-29 15:45:16 +03002177 TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
2178 if (t != NULL) {
2179 t->root = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002180
Eli Bendersky58d548d2012-05-29 15:45:16 +03002181 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002182 t->this = Py_None;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002183 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002184 t->last = Py_None;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002185
Eli Bendersky58d548d2012-05-29 15:45:16 +03002186 t->data = NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002187 t->element_factory = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002188 t->stack = PyList_New(20);
2189 if (!t->stack) {
2190 Py_DECREF(t->this);
2191 Py_DECREF(t->last);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002192 Py_DECREF((PyObject *) t);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002193 return NULL;
2194 }
2195 t->index = 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002196
Eli Bendersky58d548d2012-05-29 15:45:16 +03002197 t->events = NULL;
2198 t->start_event_obj = t->end_event_obj = NULL;
2199 t->start_ns_event_obj = t->end_ns_event_obj = NULL;
2200 }
2201 return (PyObject *)t;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002202}
2203
Eli Bendersky58d548d2012-05-29 15:45:16 +03002204static int
2205treebuilder_init(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002206{
Eli Benderskyc68e1362012-06-03 06:09:42 +03002207 static char *kwlist[] = {"element_factory", 0};
Eli Bendersky48d358b2012-05-30 17:57:50 +03002208 PyObject *element_factory = NULL;
2209 TreeBuilderObject *self_tb = (TreeBuilderObject *)self;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002210 PyObject *tmp;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002211
2212 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:TreeBuilder", kwlist,
2213 &element_factory)) {
2214 return -1;
2215 }
2216
2217 if (element_factory) {
2218 Py_INCREF(element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002219 tmp = self_tb->element_factory;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002220 self_tb->element_factory = element_factory;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002221 Py_XDECREF(tmp);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002222 }
2223
Eli Bendersky58d548d2012-05-29 15:45:16 +03002224 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002225}
2226
Eli Bendersky48d358b2012-05-30 17:57:50 +03002227static int
2228treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
2229{
2230 Py_VISIT(self->root);
2231 Py_VISIT(self->this);
2232 Py_VISIT(self->last);
2233 Py_VISIT(self->data);
2234 Py_VISIT(self->stack);
2235 Py_VISIT(self->element_factory);
2236 return 0;
2237}
2238
2239static int
2240treebuilder_gc_clear(TreeBuilderObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002241{
Antoine Pitrouc1948842012-10-01 23:40:37 +02002242 Py_CLEAR(self->end_ns_event_obj);
2243 Py_CLEAR(self->start_ns_event_obj);
2244 Py_CLEAR(self->end_event_obj);
2245 Py_CLEAR(self->start_event_obj);
2246 Py_CLEAR(self->events);
2247 Py_CLEAR(self->stack);
2248 Py_CLEAR(self->data);
2249 Py_CLEAR(self->last);
2250 Py_CLEAR(self->this);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002251 Py_CLEAR(self->element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002252 Py_CLEAR(self->root);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002253 return 0;
2254}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002255
Eli Bendersky48d358b2012-05-30 17:57:50 +03002256static void
2257treebuilder_dealloc(TreeBuilderObject *self)
2258{
2259 PyObject_GC_UnTrack(self);
2260 treebuilder_gc_clear(self);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002261 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002262}
2263
2264/* -------------------------------------------------------------------- */
Antoine Pitrouee329312012-10-04 19:53:29 +02002265/* helpers for handling of arbitrary element-like objects */
2266
2267static int
2268treebuilder_set_element_text_or_tail(PyObject *element, PyObject *data,
2269 PyObject **dest, _Py_Identifier *name)
2270{
2271 if (Element_CheckExact(element)) {
2272 Py_DECREF(JOIN_OBJ(*dest));
2273 *dest = JOIN_SET(data, PyList_CheckExact(data));
2274 return 0;
2275 }
2276 else {
2277 PyObject *joined = list_join(data);
2278 int r;
2279 if (joined == NULL)
2280 return -1;
2281 r = _PyObject_SetAttrId(element, name, joined);
2282 Py_DECREF(joined);
2283 return r;
2284 }
2285}
2286
2287/* These two functions steal a reference to data */
2288static int
2289treebuilder_set_element_text(PyObject *element, PyObject *data)
2290{
2291 _Py_IDENTIFIER(text);
2292 return treebuilder_set_element_text_or_tail(
2293 element, data, &((ElementObject *) element)->text, &PyId_text);
2294}
2295
2296static int
2297treebuilder_set_element_tail(PyObject *element, PyObject *data)
2298{
2299 _Py_IDENTIFIER(tail);
2300 return treebuilder_set_element_text_or_tail(
2301 element, data, &((ElementObject *) element)->tail, &PyId_tail);
2302}
2303
2304static int
2305treebuilder_add_subelement(PyObject *element, PyObject *child)
2306{
2307 _Py_IDENTIFIER(append);
2308 if (Element_CheckExact(element)) {
2309 ElementObject *elem = (ElementObject *) element;
2310 return element_add_subelement(elem, child);
2311 }
2312 else {
2313 PyObject *res;
2314 res = _PyObject_CallMethodId(element, &PyId_append, "O", child);
2315 if (res == NULL)
2316 return -1;
2317 Py_DECREF(res);
2318 return 0;
2319 }
2320}
2321
2322/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002323/* handlers */
2324
2325LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002326treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
2327 PyObject* attrib)
2328{
2329 PyObject* node;
2330 PyObject* this;
2331
2332 if (self->data) {
2333 if (self->this == self->last) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002334 if (treebuilder_set_element_text(self->last, self->data))
2335 return NULL;
2336 }
2337 else {
2338 if (treebuilder_set_element_tail(self->last, self->data))
2339 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002340 }
2341 self->data = NULL;
2342 }
2343
Eli Bendersky08231a92013-05-18 15:47:16 -07002344 if (self->element_factory && self->element_factory != Py_None) {
Eli Bendersky48d358b2012-05-30 17:57:50 +03002345 node = PyObject_CallFunction(self->element_factory, "OO", tag, attrib);
2346 } else {
2347 node = create_new_element(tag, attrib);
2348 }
2349 if (!node) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002350 return NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002351 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002352
Antoine Pitrouee329312012-10-04 19:53:29 +02002353 this = self->this;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002354
2355 if (this != Py_None) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002356 if (treebuilder_add_subelement(this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002357 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002358 } else {
2359 if (self->root) {
2360 PyErr_SetString(
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002361 elementtree_parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002362 "multiple elements on top level"
2363 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002364 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002365 }
2366 Py_INCREF(node);
2367 self->root = node;
2368 }
2369
2370 if (self->index < PyList_GET_SIZE(self->stack)) {
2371 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002372 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002373 Py_INCREF(this);
2374 } else {
2375 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002376 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002377 }
2378 self->index++;
2379
2380 Py_DECREF(this);
2381 Py_INCREF(node);
Antoine Pitrouee329312012-10-04 19:53:29 +02002382 self->this = node;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002383
2384 Py_DECREF(self->last);
2385 Py_INCREF(node);
Antoine Pitrouee329312012-10-04 19:53:29 +02002386 self->last = node;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002387
2388 if (self->start_event_obj) {
2389 PyObject* res;
2390 PyObject* action = self->start_event_obj;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002391 res = PyTuple_Pack(2, action, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002392 if (res) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002393 PyList_Append(self->events, res);
2394 Py_DECREF(res);
2395 } else
2396 PyErr_Clear(); /* FIXME: propagate error */
2397 }
2398
2399 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002400
2401 error:
2402 Py_DECREF(node);
2403 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002404}
2405
2406LOCAL(PyObject*)
2407treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
2408{
2409 if (!self->data) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002410 if (self->last == Py_None) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00002411 /* ignore calls to data before the first call to start */
2412 Py_RETURN_NONE;
2413 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002414 /* store the first item as is */
2415 Py_INCREF(data); self->data = data;
2416 } else {
2417 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00002418 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
2419 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002420 /* XXX this code path unused in Python 3? */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002421 /* expat often generates single character data sections; handle
2422 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00002423 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
2424 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002425 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00002426 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002427 } else if (PyList_CheckExact(self->data)) {
2428 if (PyList_Append(self->data, data) < 0)
2429 return NULL;
2430 } else {
2431 PyObject* list = PyList_New(2);
2432 if (!list)
2433 return NULL;
2434 PyList_SET_ITEM(list, 0, self->data);
2435 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2436 self->data = list;
2437 }
2438 }
2439
2440 Py_RETURN_NONE;
2441}
2442
2443LOCAL(PyObject*)
2444treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2445{
2446 PyObject* item;
2447
2448 if (self->data) {
2449 if (self->this == self->last) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002450 if (treebuilder_set_element_text(self->last, self->data))
2451 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002452 } else {
Antoine Pitrouee329312012-10-04 19:53:29 +02002453 if (treebuilder_set_element_tail(self->last, self->data))
2454 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002455 }
2456 self->data = NULL;
2457 }
2458
2459 if (self->index == 0) {
2460 PyErr_SetString(
2461 PyExc_IndexError,
2462 "pop from empty stack"
2463 );
2464 return NULL;
2465 }
2466
2467 self->index--;
2468
2469 item = PyList_GET_ITEM(self->stack, self->index);
2470 Py_INCREF(item);
2471
2472 Py_DECREF(self->last);
2473
Antoine Pitrouee329312012-10-04 19:53:29 +02002474 self->last = self->this;
2475 self->this = item;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002476
2477 if (self->end_event_obj) {
2478 PyObject* res;
2479 PyObject* action = self->end_event_obj;
2480 PyObject* node = (PyObject*) self->last;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002481 res = PyTuple_Pack(2, action, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002482 if (res) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002483 PyList_Append(self->events, res);
2484 Py_DECREF(res);
2485 } else
2486 PyErr_Clear(); /* FIXME: propagate error */
2487 }
2488
2489 Py_INCREF(self->last);
2490 return (PyObject*) self->last;
2491}
2492
2493LOCAL(void)
2494treebuilder_handle_namespace(TreeBuilderObject* self, int start,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002495 PyObject *prefix, PyObject *uri)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002496{
2497 PyObject* res;
2498 PyObject* action;
2499 PyObject* parcel;
2500
2501 if (!self->events)
2502 return;
2503
2504 if (start) {
2505 if (!self->start_ns_event_obj)
2506 return;
2507 action = self->start_ns_event_obj;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002508 parcel = Py_BuildValue("OO", prefix, uri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002509 if (!parcel)
2510 return;
2511 Py_INCREF(action);
2512 } else {
2513 if (!self->end_ns_event_obj)
2514 return;
2515 action = self->end_ns_event_obj;
2516 Py_INCREF(action);
2517 parcel = Py_None;
2518 Py_INCREF(parcel);
2519 }
2520
2521 res = PyTuple_New(2);
2522
2523 if (res) {
2524 PyTuple_SET_ITEM(res, 0, action);
2525 PyTuple_SET_ITEM(res, 1, parcel);
2526 PyList_Append(self->events, res);
2527 Py_DECREF(res);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002528 }
2529 else {
2530 Py_DECREF(action);
2531 Py_DECREF(parcel);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002532 PyErr_Clear(); /* FIXME: propagate error */
Antoine Pitrouc1948842012-10-01 23:40:37 +02002533 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002534}
2535
2536/* -------------------------------------------------------------------- */
2537/* methods (in alphabetical order) */
2538
2539static PyObject*
2540treebuilder_data(TreeBuilderObject* self, PyObject* args)
2541{
2542 PyObject* data;
2543 if (!PyArg_ParseTuple(args, "O:data", &data))
2544 return NULL;
2545
2546 return treebuilder_handle_data(self, data);
2547}
2548
2549static PyObject*
2550treebuilder_end(TreeBuilderObject* self, PyObject* args)
2551{
2552 PyObject* tag;
2553 if (!PyArg_ParseTuple(args, "O:end", &tag))
2554 return NULL;
2555
2556 return treebuilder_handle_end(self, tag);
2557}
2558
2559LOCAL(PyObject*)
2560treebuilder_done(TreeBuilderObject* self)
2561{
2562 PyObject* res;
2563
2564 /* FIXME: check stack size? */
2565
2566 if (self->root)
2567 res = self->root;
2568 else
2569 res = Py_None;
2570
2571 Py_INCREF(res);
2572 return res;
2573}
2574
2575static PyObject*
2576treebuilder_close(TreeBuilderObject* self, PyObject* args)
2577{
2578 if (!PyArg_ParseTuple(args, ":close"))
2579 return NULL;
2580
2581 return treebuilder_done(self);
2582}
2583
2584static PyObject*
2585treebuilder_start(TreeBuilderObject* self, PyObject* args)
2586{
2587 PyObject* tag;
2588 PyObject* attrib = Py_None;
2589 if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib))
2590 return NULL;
2591
2592 return treebuilder_handle_start(self, tag, attrib);
2593}
2594
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002595static PyMethodDef treebuilder_methods[] = {
2596 {"data", (PyCFunction) treebuilder_data, METH_VARARGS},
2597 {"start", (PyCFunction) treebuilder_start, METH_VARARGS},
2598 {"end", (PyCFunction) treebuilder_end, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002599 {"close", (PyCFunction) treebuilder_close, METH_VARARGS},
2600 {NULL, NULL}
2601};
2602
Neal Norwitz227b5332006-03-22 09:28:35 +00002603static PyTypeObject TreeBuilder_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002604 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002605 "xml.etree.ElementTree.TreeBuilder", sizeof(TreeBuilderObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002606 /* methods */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002607 (destructor)treebuilder_dealloc, /* tp_dealloc */
2608 0, /* tp_print */
2609 0, /* tp_getattr */
2610 0, /* tp_setattr */
2611 0, /* tp_reserved */
2612 0, /* tp_repr */
2613 0, /* tp_as_number */
2614 0, /* tp_as_sequence */
2615 0, /* tp_as_mapping */
2616 0, /* tp_hash */
2617 0, /* tp_call */
2618 0, /* tp_str */
2619 0, /* tp_getattro */
2620 0, /* tp_setattro */
2621 0, /* tp_as_buffer */
Eli Bendersky48d358b2012-05-30 17:57:50 +03002622 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
2623 /* tp_flags */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002624 0, /* tp_doc */
Eli Bendersky48d358b2012-05-30 17:57:50 +03002625 (traverseproc)treebuilder_gc_traverse, /* tp_traverse */
2626 (inquiry)treebuilder_gc_clear, /* tp_clear */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002627 0, /* tp_richcompare */
2628 0, /* tp_weaklistoffset */
2629 0, /* tp_iter */
2630 0, /* tp_iternext */
2631 treebuilder_methods, /* tp_methods */
2632 0, /* tp_members */
2633 0, /* tp_getset */
2634 0, /* tp_base */
2635 0, /* tp_dict */
2636 0, /* tp_descr_get */
2637 0, /* tp_descr_set */
2638 0, /* tp_dictoffset */
2639 (initproc)treebuilder_init, /* tp_init */
2640 PyType_GenericAlloc, /* tp_alloc */
2641 treebuilder_new, /* tp_new */
2642 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002643};
2644
2645/* ==================================================================== */
2646/* the expat interface */
2647
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002648#include "expat.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002649#include "pyexpat.h"
Eli Bendersky20d41742012-06-01 09:48:37 +03002650static struct PyExpat_CAPI *expat_capi;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002651#define EXPAT(func) (expat_capi->func)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002652
Eli Bendersky52467b12012-06-01 07:13:08 +03002653static XML_Memory_Handling_Suite ExpatMemoryHandler = {
2654 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
2655
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002656typedef struct {
2657 PyObject_HEAD
2658
2659 XML_Parser parser;
2660
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002661 PyObject *target;
2662 PyObject *entity;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002663
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002664 PyObject *names;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002665
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002666 PyObject *handle_start;
2667 PyObject *handle_data;
2668 PyObject *handle_end;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002669
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002670 PyObject *handle_comment;
2671 PyObject *handle_pi;
2672 PyObject *handle_doctype;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002673
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002674 PyObject *handle_close;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002675
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002676} XMLParserObject;
2677
Neal Norwitz227b5332006-03-22 09:28:35 +00002678static PyTypeObject XMLParser_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002679
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002680#define XMLParser_CheckExact(op) (Py_TYPE(op) == &XMLParser_Type)
2681
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002682/* helpers */
2683
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002684LOCAL(PyObject*)
2685makeuniversal(XMLParserObject* self, const char* string)
2686{
2687 /* convert a UTF-8 tag/attribute name from the expat parser
2688 to a universal name string */
2689
Antoine Pitrouc1948842012-10-01 23:40:37 +02002690 Py_ssize_t size = (Py_ssize_t) strlen(string);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002691 PyObject* key;
2692 PyObject* value;
2693
2694 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00002695 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002696 if (!key)
2697 return NULL;
2698
2699 value = PyDict_GetItem(self->names, key);
2700
2701 if (value) {
2702 Py_INCREF(value);
2703 } else {
2704 /* new name. convert to universal name, and decode as
2705 necessary */
2706
2707 PyObject* tag;
2708 char* p;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002709 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002710
2711 /* look for namespace separator */
2712 for (i = 0; i < size; i++)
2713 if (string[i] == '}')
2714 break;
2715 if (i != size) {
2716 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002717 tag = PyBytes_FromStringAndSize(NULL, size+1);
2718 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002719 p[0] = '{';
2720 memcpy(p+1, string, size);
2721 size++;
2722 } else {
2723 /* plain name; use key as tag */
2724 Py_INCREF(key);
2725 tag = key;
2726 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002727
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002728 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002729 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00002730 value = PyUnicode_DecodeUTF8(p, size, "strict");
2731 Py_DECREF(tag);
2732 if (!value) {
2733 Py_DECREF(key);
2734 return NULL;
2735 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002736
2737 /* add to names dictionary */
2738 if (PyDict_SetItem(self->names, key, value) < 0) {
2739 Py_DECREF(key);
2740 Py_DECREF(value);
2741 return NULL;
2742 }
2743 }
2744
2745 Py_DECREF(key);
2746 return value;
2747}
2748
Eli Bendersky5b77d812012-03-16 08:20:05 +02002749/* Set the ParseError exception with the given parameters.
2750 * If message is not NULL, it's used as the error string. Otherwise, the
2751 * message string is the default for the given error_code.
2752*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002753static void
Eli Bendersky5b77d812012-03-16 08:20:05 +02002754expat_set_error(enum XML_Error error_code, int line, int column, char *message)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002755{
Eli Bendersky5b77d812012-03-16 08:20:05 +02002756 PyObject *errmsg, *error, *position, *code;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002757
Victor Stinner499dfcf2011-03-21 13:26:24 +01002758 errmsg = PyUnicode_FromFormat("%s: line %d, column %d",
Eli Bendersky5b77d812012-03-16 08:20:05 +02002759 message ? message : EXPAT(ErrorString)(error_code),
2760 line, column);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002761 if (errmsg == NULL)
2762 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002763
Victor Stinner499dfcf2011-03-21 13:26:24 +01002764 error = PyObject_CallFunction(elementtree_parseerror_obj, "O", errmsg);
2765 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002766 if (!error)
2767 return;
2768
Eli Bendersky5b77d812012-03-16 08:20:05 +02002769 /* Add code and position attributes */
2770 code = PyLong_FromLong((long)error_code);
2771 if (!code) {
2772 Py_DECREF(error);
2773 return;
2774 }
2775 if (PyObject_SetAttrString(error, "code", code) == -1) {
2776 Py_DECREF(error);
2777 Py_DECREF(code);
2778 return;
2779 }
2780 Py_DECREF(code);
2781
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002782 position = Py_BuildValue("(ii)", line, column);
2783 if (!position) {
2784 Py_DECREF(error);
2785 return;
2786 }
2787 if (PyObject_SetAttrString(error, "position", position) == -1) {
2788 Py_DECREF(error);
2789 Py_DECREF(position);
2790 return;
2791 }
2792 Py_DECREF(position);
2793
2794 PyErr_SetObject(elementtree_parseerror_obj, error);
2795 Py_DECREF(error);
2796}
2797
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002798/* -------------------------------------------------------------------- */
2799/* handlers */
2800
2801static void
2802expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2803 int data_len)
2804{
2805 PyObject* key;
2806 PyObject* value;
2807 PyObject* res;
2808
2809 if (data_len < 2 || data_in[0] != '&')
2810 return;
2811
Neal Norwitz0269b912007-08-08 06:56:02 +00002812 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002813 if (!key)
2814 return;
2815
2816 value = PyDict_GetItem(self->entity, key);
2817
2818 if (value) {
2819 if (TreeBuilder_CheckExact(self->target))
2820 res = treebuilder_handle_data(
2821 (TreeBuilderObject*) self->target, value
2822 );
2823 else if (self->handle_data)
2824 res = PyObject_CallFunction(self->handle_data, "O", value);
2825 else
2826 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002827 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002828 } else if (!PyErr_Occurred()) {
2829 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00002830 char message[128] = "undefined entity ";
2831 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002832 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02002833 XML_ERROR_UNDEFINED_ENTITY,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002834 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02002835 EXPAT(GetErrorColumnNumber)(self->parser),
2836 message
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002837 );
2838 }
2839
2840 Py_DECREF(key);
2841}
2842
2843static void
2844expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2845 const XML_Char **attrib_in)
2846{
2847 PyObject* res;
2848 PyObject* tag;
2849 PyObject* attrib;
2850 int ok;
2851
2852 /* tag name */
2853 tag = makeuniversal(self, tag_in);
2854 if (!tag)
2855 return; /* parser will look for errors */
2856
2857 /* attributes */
2858 if (attrib_in[0]) {
2859 attrib = PyDict_New();
2860 if (!attrib)
2861 return;
2862 while (attrib_in[0] && attrib_in[1]) {
2863 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00002864 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002865 if (!key || !value) {
2866 Py_XDECREF(value);
2867 Py_XDECREF(key);
2868 Py_DECREF(attrib);
2869 return;
2870 }
2871 ok = PyDict_SetItem(attrib, key, value);
2872 Py_DECREF(value);
2873 Py_DECREF(key);
2874 if (ok < 0) {
2875 Py_DECREF(attrib);
2876 return;
2877 }
2878 attrib_in += 2;
2879 }
2880 } else {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002881 /* Pass an empty dictionary on */
Eli Bendersky48d358b2012-05-30 17:57:50 +03002882 attrib = PyDict_New();
2883 if (!attrib)
2884 return;
2885 }
2886
2887 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002888 /* shortcut */
2889 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2890 tag, attrib);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002891 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002892 else if (self->handle_start) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002893 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002894 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002895 res = NULL;
2896
2897 Py_DECREF(tag);
2898 Py_DECREF(attrib);
2899
2900 Py_XDECREF(res);
2901}
2902
2903static void
2904expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2905 int data_len)
2906{
2907 PyObject* data;
2908 PyObject* res;
2909
Neal Norwitz0269b912007-08-08 06:56:02 +00002910 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002911 if (!data)
2912 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002913
2914 if (TreeBuilder_CheckExact(self->target))
2915 /* shortcut */
2916 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
2917 else if (self->handle_data)
2918 res = PyObject_CallFunction(self->handle_data, "O", data);
2919 else
2920 res = NULL;
2921
2922 Py_DECREF(data);
2923
2924 Py_XDECREF(res);
2925}
2926
2927static void
2928expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
2929{
2930 PyObject* tag;
2931 PyObject* res = NULL;
2932
2933 if (TreeBuilder_CheckExact(self->target))
2934 /* shortcut */
2935 /* the standard tree builder doesn't look at the end tag */
2936 res = treebuilder_handle_end(
2937 (TreeBuilderObject*) self->target, Py_None
2938 );
2939 else if (self->handle_end) {
2940 tag = makeuniversal(self, tag_in);
2941 if (tag) {
2942 res = PyObject_CallFunction(self->handle_end, "O", tag);
2943 Py_DECREF(tag);
2944 }
2945 }
2946
2947 Py_XDECREF(res);
2948}
2949
2950static void
2951expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
2952 const XML_Char *uri)
2953{
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002954 PyObject* sprefix = NULL;
2955 PyObject* suri = NULL;
2956
2957 suri = PyUnicode_DecodeUTF8(uri, strlen(uri), "strict");
2958 if (!suri)
2959 return;
2960
2961 if (prefix)
2962 sprefix = PyUnicode_DecodeUTF8(prefix, strlen(prefix), "strict");
2963 else
2964 sprefix = PyUnicode_FromString("");
2965 if (!sprefix) {
2966 Py_DECREF(suri);
2967 return;
2968 }
2969
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002970 treebuilder_handle_namespace(
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002971 (TreeBuilderObject*) self->target, 1, sprefix, suri
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002972 );
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002973
2974 Py_DECREF(sprefix);
2975 Py_DECREF(suri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002976}
2977
2978static void
2979expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
2980{
2981 treebuilder_handle_namespace(
2982 (TreeBuilderObject*) self->target, 0, NULL, NULL
2983 );
2984}
2985
2986static void
2987expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
2988{
2989 PyObject* comment;
2990 PyObject* res;
2991
2992 if (self->handle_comment) {
Neal Norwitz0269b912007-08-08 06:56:02 +00002993 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002994 if (comment) {
2995 res = PyObject_CallFunction(self->handle_comment, "O", comment);
2996 Py_XDECREF(res);
2997 Py_DECREF(comment);
2998 }
2999 }
3000}
3001
Eli Bendersky45839902013-01-13 05:14:47 -08003002static void
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003003expat_start_doctype_handler(XMLParserObject *self,
3004 const XML_Char *doctype_name,
3005 const XML_Char *sysid,
3006 const XML_Char *pubid,
3007 int has_internal_subset)
3008{
3009 PyObject *self_pyobj = (PyObject *)self;
3010 PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
3011 PyObject *parser_doctype = NULL;
3012 PyObject *res = NULL;
3013
3014 doctype_name_obj = makeuniversal(self, doctype_name);
3015 if (!doctype_name_obj)
3016 return;
3017
3018 if (sysid) {
3019 sysid_obj = makeuniversal(self, sysid);
3020 if (!sysid_obj) {
3021 Py_DECREF(doctype_name_obj);
3022 return;
3023 }
3024 } else {
3025 Py_INCREF(Py_None);
3026 sysid_obj = Py_None;
3027 }
3028
3029 if (pubid) {
3030 pubid_obj = makeuniversal(self, pubid);
3031 if (!pubid_obj) {
3032 Py_DECREF(doctype_name_obj);
3033 Py_DECREF(sysid_obj);
3034 return;
3035 }
3036 } else {
3037 Py_INCREF(Py_None);
3038 pubid_obj = Py_None;
3039 }
3040
3041 /* If the target has a handler for doctype, call it. */
3042 if (self->handle_doctype) {
3043 res = PyObject_CallFunction(self->handle_doctype, "OOO",
3044 doctype_name_obj, pubid_obj, sysid_obj);
3045 Py_CLEAR(res);
3046 }
3047
3048 /* Now see if the parser itself has a doctype method. If yes and it's
3049 * a subclass, call it but warn about deprecation. If it's not a subclass
3050 * (i.e. vanilla XMLParser), do nothing.
3051 */
3052 parser_doctype = PyObject_GetAttrString(self_pyobj, "doctype");
3053 if (parser_doctype) {
3054 if (!XMLParser_CheckExact(self_pyobj)) {
3055 if (PyErr_WarnEx(PyExc_DeprecationWarning,
3056 "This method of XMLParser is deprecated. Define"
3057 " doctype() method on the TreeBuilder target.",
3058 1) < 0) {
3059 goto clear;
3060 }
3061 res = PyObject_CallFunction(parser_doctype, "OOO",
3062 doctype_name_obj, pubid_obj, sysid_obj);
3063 Py_CLEAR(res);
3064 }
3065 }
3066
3067clear:
3068 Py_XDECREF(parser_doctype);
3069 Py_DECREF(doctype_name_obj);
3070 Py_DECREF(pubid_obj);
3071 Py_DECREF(sysid_obj);
3072}
3073
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003074static void
3075expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
3076 const XML_Char* data_in)
3077{
3078 PyObject* target;
3079 PyObject* data;
3080 PyObject* res;
3081
3082 if (self->handle_pi) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003083 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3084 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003085 if (target && data) {
3086 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
3087 Py_XDECREF(res);
3088 Py_DECREF(data);
3089 Py_DECREF(target);
3090 } else {
3091 Py_XDECREF(data);
3092 Py_XDECREF(target);
3093 }
3094 }
3095}
3096
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003097/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003098
Eli Bendersky52467b12012-06-01 07:13:08 +03003099static PyObject *
3100xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003101{
Eli Bendersky52467b12012-06-01 07:13:08 +03003102 XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
3103 if (self) {
3104 self->parser = NULL;
3105 self->target = self->entity = self->names = NULL;
3106 self->handle_start = self->handle_data = self->handle_end = NULL;
3107 self->handle_comment = self->handle_pi = self->handle_close = NULL;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003108 self->handle_doctype = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003109 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003110 return (PyObject *)self;
3111}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003112
Eli Bendersky52467b12012-06-01 07:13:08 +03003113static int
3114xmlparser_init(PyObject *self, PyObject *args, PyObject *kwds)
3115{
3116 XMLParserObject *self_xp = (XMLParserObject *)self;
3117 PyObject *target = NULL, *html = NULL;
3118 char *encoding = NULL;
Eli Benderskyc68e1362012-06-03 06:09:42 +03003119 static char *kwlist[] = {"html", "target", "encoding", 0};
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003120
Eli Bendersky52467b12012-06-01 07:13:08 +03003121 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|OOz:XMLParser", kwlist,
3122 &html, &target, &encoding)) {
3123 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003124 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003125
Eli Bendersky52467b12012-06-01 07:13:08 +03003126 self_xp->entity = PyDict_New();
3127 if (!self_xp->entity)
3128 return -1;
3129
3130 self_xp->names = PyDict_New();
3131 if (!self_xp->names) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02003132 Py_CLEAR(self_xp->entity);
Eli Bendersky52467b12012-06-01 07:13:08 +03003133 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003134 }
3135
Eli Bendersky52467b12012-06-01 07:13:08 +03003136 self_xp->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
3137 if (!self_xp->parser) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02003138 Py_CLEAR(self_xp->entity);
3139 Py_CLEAR(self_xp->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003140 PyErr_NoMemory();
Eli Bendersky52467b12012-06-01 07:13:08 +03003141 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003142 }
3143
Eli Bendersky52467b12012-06-01 07:13:08 +03003144 if (target) {
3145 Py_INCREF(target);
3146 } else {
Eli Bendersky58d548d2012-05-29 15:45:16 +03003147 target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003148 if (!target) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02003149 Py_CLEAR(self_xp->entity);
3150 Py_CLEAR(self_xp->names);
Eli Bendersky52467b12012-06-01 07:13:08 +03003151 EXPAT(ParserFree)(self_xp->parser);
3152 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003153 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003154 }
3155 self_xp->target = target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003156
Eli Bendersky52467b12012-06-01 07:13:08 +03003157 self_xp->handle_start = PyObject_GetAttrString(target, "start");
3158 self_xp->handle_data = PyObject_GetAttrString(target, "data");
3159 self_xp->handle_end = PyObject_GetAttrString(target, "end");
3160 self_xp->handle_comment = PyObject_GetAttrString(target, "comment");
3161 self_xp->handle_pi = PyObject_GetAttrString(target, "pi");
3162 self_xp->handle_close = PyObject_GetAttrString(target, "close");
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003163 self_xp->handle_doctype = PyObject_GetAttrString(target, "doctype");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003164
3165 PyErr_Clear();
Eli Bendersky45839902013-01-13 05:14:47 -08003166
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003167 /* configure parser */
Eli Bendersky52467b12012-06-01 07:13:08 +03003168 EXPAT(SetUserData)(self_xp->parser, self_xp);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003169 EXPAT(SetElementHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003170 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003171 (XML_StartElementHandler) expat_start_handler,
3172 (XML_EndElementHandler) expat_end_handler
3173 );
3174 EXPAT(SetDefaultHandlerExpand)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003175 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003176 (XML_DefaultHandler) expat_default_handler
3177 );
3178 EXPAT(SetCharacterDataHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003179 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003180 (XML_CharacterDataHandler) expat_data_handler
3181 );
Eli Bendersky52467b12012-06-01 07:13:08 +03003182 if (self_xp->handle_comment)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003183 EXPAT(SetCommentHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003184 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003185 (XML_CommentHandler) expat_comment_handler
3186 );
Eli Bendersky52467b12012-06-01 07:13:08 +03003187 if (self_xp->handle_pi)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003188 EXPAT(SetProcessingInstructionHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003189 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003190 (XML_ProcessingInstructionHandler) expat_pi_handler
3191 );
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003192 EXPAT(SetStartDoctypeDeclHandler)(
3193 self_xp->parser,
3194 (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
3195 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003196 EXPAT(SetUnknownEncodingHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003197 self_xp->parser,
Eli Bendersky6dc32b32013-05-25 05:25:48 -07003198 EXPAT(DefaultUnknownEncodingHandler), NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003199 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003200
Eli Bendersky52467b12012-06-01 07:13:08 +03003201 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003202}
3203
Eli Bendersky52467b12012-06-01 07:13:08 +03003204static int
3205xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
3206{
3207 Py_VISIT(self->handle_close);
3208 Py_VISIT(self->handle_pi);
3209 Py_VISIT(self->handle_comment);
3210 Py_VISIT(self->handle_end);
3211 Py_VISIT(self->handle_data);
3212 Py_VISIT(self->handle_start);
3213
3214 Py_VISIT(self->target);
3215 Py_VISIT(self->entity);
3216 Py_VISIT(self->names);
3217
3218 return 0;
3219}
3220
3221static int
3222xmlparser_gc_clear(XMLParserObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003223{
3224 EXPAT(ParserFree)(self->parser);
3225
Antoine Pitrouc1948842012-10-01 23:40:37 +02003226 Py_CLEAR(self->handle_close);
3227 Py_CLEAR(self->handle_pi);
3228 Py_CLEAR(self->handle_comment);
3229 Py_CLEAR(self->handle_end);
3230 Py_CLEAR(self->handle_data);
3231 Py_CLEAR(self->handle_start);
3232 Py_CLEAR(self->handle_doctype);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003233
Antoine Pitrouc1948842012-10-01 23:40:37 +02003234 Py_CLEAR(self->target);
3235 Py_CLEAR(self->entity);
3236 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003237
Eli Bendersky52467b12012-06-01 07:13:08 +03003238 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003239}
3240
Eli Bendersky52467b12012-06-01 07:13:08 +03003241static void
3242xmlparser_dealloc(XMLParserObject* self)
3243{
3244 PyObject_GC_UnTrack(self);
3245 xmlparser_gc_clear(self);
3246 Py_TYPE(self)->tp_free((PyObject *)self);
3247}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003248
3249LOCAL(PyObject*)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003250expat_parse(XMLParserObject* self, const char* data, int data_len, int final)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003251{
3252 int ok;
3253
3254 ok = EXPAT(Parse)(self->parser, data, data_len, final);
3255
3256 if (PyErr_Occurred())
3257 return NULL;
3258
3259 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003260 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02003261 EXPAT(GetErrorCode)(self->parser),
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003262 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02003263 EXPAT(GetErrorColumnNumber)(self->parser),
3264 NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003265 );
3266 return NULL;
3267 }
3268
3269 Py_RETURN_NONE;
3270}
3271
3272static PyObject*
3273xmlparser_close(XMLParserObject* self, PyObject* args)
3274{
3275 /* end feeding data to parser */
3276
3277 PyObject* res;
3278 if (!PyArg_ParseTuple(args, ":close"))
3279 return NULL;
3280
3281 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003282 if (!res)
3283 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003284
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003285 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003286 Py_DECREF(res);
3287 return treebuilder_done((TreeBuilderObject*) self->target);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003288 } if (self->handle_close) {
3289 Py_DECREF(res);
3290 return PyObject_CallFunction(self->handle_close, "");
3291 } else
3292 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003293}
3294
3295static PyObject*
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003296xmlparser_feed(XMLParserObject* self, PyObject* arg)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003297{
3298 /* feed data to parser */
3299
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003300 if (PyUnicode_Check(arg)) {
3301 Py_ssize_t data_len;
3302 const char *data = PyUnicode_AsUTF8AndSize(arg, &data_len);
3303 if (data == NULL)
3304 return NULL;
3305 if (data_len > INT_MAX) {
3306 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3307 return NULL;
3308 }
3309 /* Explicitly set UTF-8 encoding. Return code ignored. */
3310 (void)EXPAT(SetEncoding)(self->parser, "utf-8");
3311 return expat_parse(self, data, (int)data_len, 0);
3312 }
3313 else {
3314 Py_buffer view;
3315 PyObject *res;
3316 if (PyObject_GetBuffer(arg, &view, PyBUF_SIMPLE) < 0)
3317 return NULL;
3318 if (view.len > INT_MAX) {
3319 PyBuffer_Release(&view);
3320 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3321 return NULL;
3322 }
3323 res = expat_parse(self, view.buf, (int)view.len, 0);
3324 PyBuffer_Release(&view);
3325 return res;
3326 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003327}
3328
3329static PyObject*
Eli Benderskya3699232013-05-19 18:47:23 -07003330xmlparser_parse_whole(XMLParserObject* self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003331{
Eli Benderskya3699232013-05-19 18:47:23 -07003332 /* (internal) parse the whole input, until end of stream */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003333 PyObject* reader;
3334 PyObject* buffer;
Eli Benderskyf996e772012-03-16 05:53:30 +02003335 PyObject* temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003336 PyObject* res;
3337
3338 PyObject* fileobj;
3339 if (!PyArg_ParseTuple(args, "O:_parse", &fileobj))
3340 return NULL;
3341
3342 reader = PyObject_GetAttrString(fileobj, "read");
3343 if (!reader)
3344 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003345
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003346 /* read from open file object */
3347 for (;;) {
3348
3349 buffer = PyObject_CallFunction(reader, "i", 64*1024);
3350
3351 if (!buffer) {
3352 /* read failed (e.g. due to KeyboardInterrupt) */
3353 Py_DECREF(reader);
3354 return NULL;
3355 }
3356
Eli Benderskyf996e772012-03-16 05:53:30 +02003357 if (PyUnicode_CheckExact(buffer)) {
3358 /* A unicode object is encoded into bytes using UTF-8 */
3359 if (PyUnicode_GET_SIZE(buffer) == 0) {
3360 Py_DECREF(buffer);
3361 break;
3362 }
3363 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
Antoine Pitrouc1948842012-10-01 23:40:37 +02003364 Py_DECREF(buffer);
Eli Benderskyf996e772012-03-16 05:53:30 +02003365 if (!temp) {
3366 /* Propagate exception from PyUnicode_AsEncodedString */
Eli Benderskyf996e772012-03-16 05:53:30 +02003367 Py_DECREF(reader);
3368 return NULL;
3369 }
Eli Benderskyf996e772012-03-16 05:53:30 +02003370 buffer = temp;
3371 }
3372 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003373 Py_DECREF(buffer);
3374 break;
3375 }
3376
3377 res = expat_parse(
Christian Heimes72b710a2008-05-26 13:28:38 +00003378 self, PyBytes_AS_STRING(buffer), PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003379 );
3380
3381 Py_DECREF(buffer);
3382
3383 if (!res) {
3384 Py_DECREF(reader);
3385 return NULL;
3386 }
3387 Py_DECREF(res);
3388
3389 }
3390
3391 Py_DECREF(reader);
3392
3393 res = expat_parse(self, "", 0, 1);
3394
3395 if (res && TreeBuilder_CheckExact(self->target)) {
3396 Py_DECREF(res);
3397 return treebuilder_done((TreeBuilderObject*) self->target);
3398 }
3399
3400 return res;
3401}
3402
3403static PyObject*
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003404xmlparser_doctype(XMLParserObject *self, PyObject *args)
3405{
3406 Py_RETURN_NONE;
3407}
3408
3409static PyObject*
3410xmlparser_setevents(XMLParserObject *self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003411{
3412 /* activate element event reporting */
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003413 Py_ssize_t i, seqlen;
3414 TreeBuilderObject *target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003415
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003416 PyObject *events_queue;
3417 PyObject *events_to_report = Py_None;
3418 PyObject *events_seq;
3419 if (!PyArg_ParseTuple(args, "O!|O:_setevents", &PyList_Type, &events_queue,
3420 &events_to_report))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003421 return NULL;
3422
3423 if (!TreeBuilder_CheckExact(self->target)) {
3424 PyErr_SetString(
3425 PyExc_TypeError,
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003426 "event handling only supported for ElementTree.TreeBuilder "
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003427 "targets"
3428 );
3429 return NULL;
3430 }
3431
3432 target = (TreeBuilderObject*) self->target;
3433
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003434 Py_INCREF(events_queue);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003435 Py_XDECREF(target->events);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003436 target->events = events_queue;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003437
3438 /* clear out existing events */
Antoine Pitrouc1948842012-10-01 23:40:37 +02003439 Py_CLEAR(target->start_event_obj);
3440 Py_CLEAR(target->end_event_obj);
3441 Py_CLEAR(target->start_ns_event_obj);
3442 Py_CLEAR(target->end_ns_event_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003443
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003444 if (events_to_report == Py_None) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003445 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003446 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003447 Py_RETURN_NONE;
3448 }
3449
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003450 if (!(events_seq = PySequence_Fast(events_to_report,
3451 "events must be a sequence"))) {
3452 return NULL;
3453 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003454
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003455 seqlen = PySequence_Size(events_seq);
3456 for (i = 0; i < seqlen; ++i) {
3457 PyObject *event_name_obj = PySequence_Fast_GET_ITEM(events_seq, i);
3458 char *event_name = NULL;
3459 if (PyUnicode_Check(event_name_obj)) {
3460 event_name = _PyUnicode_AsString(event_name_obj);
3461 } else if (PyBytes_Check(event_name_obj)) {
3462 event_name = PyBytes_AS_STRING(event_name_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003463 }
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003464
3465 if (event_name == NULL) {
3466 Py_DECREF(events_seq);
3467 PyErr_Format(PyExc_ValueError, "invalid events sequence");
3468 return NULL;
3469 } else if (strcmp(event_name, "start") == 0) {
3470 Py_INCREF(event_name_obj);
3471 target->start_event_obj = event_name_obj;
3472 } else if (strcmp(event_name, "end") == 0) {
3473 Py_INCREF(event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003474 Py_XDECREF(target->end_event_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003475 target->end_event_obj = event_name_obj;
3476 } else if (strcmp(event_name, "start-ns") == 0) {
3477 Py_INCREF(event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003478 Py_XDECREF(target->start_ns_event_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003479 target->start_ns_event_obj = event_name_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003480 EXPAT(SetNamespaceDeclHandler)(
3481 self->parser,
3482 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3483 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3484 );
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003485 } else if (strcmp(event_name, "end-ns") == 0) {
3486 Py_INCREF(event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003487 Py_XDECREF(target->end_ns_event_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003488 target->end_ns_event_obj = event_name_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003489 EXPAT(SetNamespaceDeclHandler)(
3490 self->parser,
3491 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3492 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3493 );
3494 } else {
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003495 Py_DECREF(events_seq);
3496 PyErr_Format(PyExc_ValueError, "unknown event '%s'", event_name);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003497 return NULL;
3498 }
3499 }
3500
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003501 Py_DECREF(events_seq);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003502 Py_RETURN_NONE;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003503}
3504
3505static PyMethodDef xmlparser_methods[] = {
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003506 {"feed", (PyCFunction) xmlparser_feed, METH_O},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003507 {"close", (PyCFunction) xmlparser_close, METH_VARARGS},
Eli Benderskya3699232013-05-19 18:47:23 -07003508 {"_parse_whole", (PyCFunction) xmlparser_parse_whole, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003509 {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003510 {"doctype", (PyCFunction) xmlparser_doctype, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003511 {NULL, NULL}
3512};
3513
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003514static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003515xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003516{
Alexander Belopolskye239d232010-12-08 23:31:48 +00003517 if (PyUnicode_Check(nameobj)) {
3518 PyObject* res;
3519 if (PyUnicode_CompareWithASCIIString(nameobj, "entity") == 0)
3520 res = self->entity;
3521 else if (PyUnicode_CompareWithASCIIString(nameobj, "target") == 0)
3522 res = self->target;
3523 else if (PyUnicode_CompareWithASCIIString(nameobj, "version") == 0) {
3524 return PyUnicode_FromFormat(
3525 "Expat %d.%d.%d", XML_MAJOR_VERSION,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003526 XML_MINOR_VERSION, XML_MICRO_VERSION);
Alexander Belopolskye239d232010-12-08 23:31:48 +00003527 }
3528 else
3529 goto generic;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003530
Alexander Belopolskye239d232010-12-08 23:31:48 +00003531 Py_INCREF(res);
3532 return res;
3533 }
3534 generic:
3535 return PyObject_GenericGetAttr((PyObject*) self, nameobj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003536}
3537
Neal Norwitz227b5332006-03-22 09:28:35 +00003538static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003539 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08003540 "xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003541 /* methods */
Eli Bendersky52467b12012-06-01 07:13:08 +03003542 (destructor)xmlparser_dealloc, /* tp_dealloc */
3543 0, /* tp_print */
3544 0, /* tp_getattr */
3545 0, /* tp_setattr */
3546 0, /* tp_reserved */
3547 0, /* tp_repr */
3548 0, /* tp_as_number */
3549 0, /* tp_as_sequence */
3550 0, /* tp_as_mapping */
3551 0, /* tp_hash */
3552 0, /* tp_call */
3553 0, /* tp_str */
3554 (getattrofunc)xmlparser_getattro, /* tp_getattro */
3555 0, /* tp_setattro */
3556 0, /* tp_as_buffer */
3557 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3558 /* tp_flags */
3559 0, /* tp_doc */
3560 (traverseproc)xmlparser_gc_traverse, /* tp_traverse */
3561 (inquiry)xmlparser_gc_clear, /* tp_clear */
3562 0, /* tp_richcompare */
3563 0, /* tp_weaklistoffset */
3564 0, /* tp_iter */
3565 0, /* tp_iternext */
3566 xmlparser_methods, /* tp_methods */
3567 0, /* tp_members */
3568 0, /* tp_getset */
3569 0, /* tp_base */
3570 0, /* tp_dict */
3571 0, /* tp_descr_get */
3572 0, /* tp_descr_set */
3573 0, /* tp_dictoffset */
3574 (initproc)xmlparser_init, /* tp_init */
3575 PyType_GenericAlloc, /* tp_alloc */
3576 xmlparser_new, /* tp_new */
3577 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003578};
3579
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003580/* ==================================================================== */
3581/* python module interface */
3582
3583static PyMethodDef _functions[] = {
Eli Benderskya8736902013-01-05 06:26:39 -08003584 {"SubElement", (PyCFunction) subelement, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003585 {NULL, NULL}
3586};
3587
Martin v. Löwis1a214512008-06-11 05:26:20 +00003588
3589static struct PyModuleDef _elementtreemodule = {
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003590 PyModuleDef_HEAD_INIT,
3591 "_elementtree",
3592 NULL,
3593 -1,
3594 _functions,
3595 NULL,
3596 NULL,
3597 NULL,
3598 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00003599};
3600
Neal Norwitzf6657e62006-12-28 04:47:50 +00003601PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00003602PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003603{
Eli Bendersky64d11e62012-06-15 07:42:50 +03003604 PyObject *m, *temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003605
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003606 /* Initialize object types */
3607 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003608 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003609 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003610 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003611 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003612 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003613
Martin v. Löwis1a214512008-06-11 05:26:20 +00003614 m = PyModule_Create(&_elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003615 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00003616 return NULL;
3617
Eli Bendersky828efde2012-04-05 05:40:58 +03003618 if (!(temp = PyImport_ImportModule("copy")))
3619 return NULL;
3620 elementtree_deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
3621 Py_XDECREF(temp);
3622
3623 if (!(elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
3624 return NULL;
3625
Eli Bendersky20d41742012-06-01 09:48:37 +03003626 /* link against pyexpat */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003627 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
3628 if (expat_capi) {
3629 /* check that it's usable */
3630 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
3631 expat_capi->size < sizeof(struct PyExpat_CAPI) ||
3632 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
3633 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
Eli Bendersky52467b12012-06-01 07:13:08 +03003634 expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
Eli Benderskyef391ac2012-07-21 20:28:46 +03003635 PyErr_SetString(PyExc_ImportError,
3636 "pyexpat version is incompatible");
3637 return NULL;
Eli Bendersky52467b12012-06-01 07:13:08 +03003638 }
Eli Benderskyef391ac2012-07-21 20:28:46 +03003639 } else {
Eli Bendersky52467b12012-06-01 07:13:08 +03003640 return NULL;
Eli Benderskyef391ac2012-07-21 20:28:46 +03003641 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003642
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003643 elementtree_parseerror_obj = PyErr_NewException(
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003644 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003645 );
3646 Py_INCREF(elementtree_parseerror_obj);
3647 PyModule_AddObject(m, "ParseError", elementtree_parseerror_obj);
3648
Eli Bendersky092af1f2012-03-04 07:14:03 +02003649 Py_INCREF((PyObject *)&Element_Type);
3650 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
3651
Eli Bendersky58d548d2012-05-29 15:45:16 +03003652 Py_INCREF((PyObject *)&TreeBuilder_Type);
3653 PyModule_AddObject(m, "TreeBuilder", (PyObject *)&TreeBuilder_Type);
3654
Eli Bendersky52467b12012-06-01 07:13:08 +03003655 Py_INCREF((PyObject *)&XMLParser_Type);
3656 PyModule_AddObject(m, "XMLParser", (PyObject *)&XMLParser_Type);
Eli Bendersky52467b12012-06-01 07:13:08 +03003657
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003658 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003659}