blob: c87b6a2b30f229ded8929dd364c80b49cf8888c3 [file] [log] [blame]
Eli Benderskybf05df22013-04-20 05:44:01 -07001/*--------------------------------------------------------------------
2 * Licensed to PSF under a Contributor Agreement.
3 * See http://www.python.org/psf/license for licensing details.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
Eli Benderskybf05df22013-04-20 05:44:01 -07005 * _elementtree - C accelerator for xml.etree.ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00006 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
7 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00008 *
9 * info@pythonware.com
10 * http://www.pythonware.com
Eli Benderskybf05df22013-04-20 05:44:01 -070011 *--------------------------------------------------------------------
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000012 */
13
14#include "Python.h"
Eli Benderskyebf37a22012-04-03 22:02:37 +030015#include "structmember.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000016
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000017/* -------------------------------------------------------------------- */
18/* configuration */
19
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000020/* An element can hold this many children without extra memory
21 allocations. */
22#define STATIC_CHILDREN 4
23
24/* For best performance, chose a value so that 80-90% of all nodes
25 have no more than the given number of children. Set this to zero
26 to minimize the size of the element structure itself (this only
27 helps if you have lots of leaf nodes with attributes). */
28
29/* Also note that pymalloc always allocates blocks in multiples of
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010030 eight bytes. For the current C version of ElementTree, this means
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000031 that the number of children should be an even number, at least on
32 32-bit platforms. */
33
34/* -------------------------------------------------------------------- */
35
36#if 0
37static int memory = 0;
38#define ALLOC(size, comment)\
39do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
40#define RELEASE(size, comment)\
41do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
42#else
43#define ALLOC(size, comment)
44#define RELEASE(size, comment)
45#endif
46
47/* compiler tweaks */
48#if defined(_MSC_VER)
49#define LOCAL(type) static __inline type __fastcall
50#else
51#define LOCAL(type) static type
52#endif
53
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000054/* macros used to store 'join' flags in string object pointers. note
55 that all use of text and tail as object pointers must be wrapped in
56 JOIN_OBJ. see comments in the ElementObject definition for more
57 info. */
58#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
59#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
Antoine Pitrouca8aa4a2012-09-20 20:56:47 +020060#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~(Py_uintptr_t)1))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000061
62/* glue functions (see the init function for details) */
Florent Xiclunaf15351d2010-03-13 23:24:31 +000063static PyObject* elementtree_parseerror_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000064static PyObject* elementtree_deepcopy_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000065static PyObject* elementpath_obj;
66
67/* helpers */
68
69LOCAL(PyObject*)
70deepcopy(PyObject* object, PyObject* memo)
71{
72 /* do a deep copy of the given object */
73
74 PyObject* args;
75 PyObject* result;
76
77 if (!elementtree_deepcopy_obj) {
78 PyErr_SetString(
79 PyExc_RuntimeError,
80 "deepcopy helper not found"
81 );
82 return NULL;
83 }
84
Antoine Pitrouc1948842012-10-01 23:40:37 +020085 args = PyTuple_Pack(2, object, memo);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +000086 if (!args)
87 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000088 result = PyObject_CallObject(elementtree_deepcopy_obj, args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000089 Py_DECREF(args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000090 return result;
91}
92
93LOCAL(PyObject*)
94list_join(PyObject* list)
95{
96 /* join list elements (destroying the list in the process) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000097 PyObject* joiner;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000098 PyObject* result;
99
Antoine Pitrouc1948842012-10-01 23:40:37 +0200100 joiner = PyUnicode_FromStringAndSize("", 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000101 if (!joiner)
102 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200103 result = PyUnicode_Join(joiner, list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000104 Py_DECREF(joiner);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200105 if (result)
106 Py_DECREF(list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000107 return result;
108}
109
Eli Bendersky48d358b2012-05-30 17:57:50 +0300110/* Is the given object an empty dictionary?
111*/
112static int
113is_empty_dict(PyObject *obj)
114{
115 return PyDict_CheckExact(obj) && PyDict_Size(obj) == 0;
116}
117
118
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000119/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200120/* the Element type */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000121
122typedef struct {
123
124 /* attributes (a dictionary object), or None if no attributes */
125 PyObject* attrib;
126
127 /* child elements */
128 int length; /* actual number of items */
129 int allocated; /* allocated items */
130
131 /* this either points to _children or to a malloced buffer */
132 PyObject* *children;
133
134 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100135
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000136} ElementObjectExtra;
137
138typedef struct {
139 PyObject_HEAD
140
141 /* element tag (a string). */
142 PyObject* tag;
143
144 /* text before first child. note that this is a tagged pointer;
145 use JOIN_OBJ to get the object pointer. the join flag is used
146 to distinguish lists created by the tree builder from lists
147 assigned to the attribute by application code; the former
148 should be joined before being returned to the user, the latter
149 should be left intact. */
150 PyObject* text;
151
152 /* text after this element, in parent. note that this is a tagged
153 pointer; use JOIN_OBJ to get the object pointer. */
154 PyObject* tail;
155
156 ElementObjectExtra* extra;
157
Eli Benderskyebf37a22012-04-03 22:02:37 +0300158 PyObject *weakreflist; /* For tp_weaklistoffset */
159
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000160} ElementObject;
161
Neal Norwitz227b5332006-03-22 09:28:35 +0000162static PyTypeObject Element_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000163
Christian Heimes90aa7642007-12-19 02:45:37 +0000164#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000165
166/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200167/* Element constructors and destructor */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000168
169LOCAL(int)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200170create_extra(ElementObject* self, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000171{
172 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
173 if (!self->extra)
174 return -1;
175
176 if (!attrib)
177 attrib = Py_None;
178
179 Py_INCREF(attrib);
180 self->extra->attrib = attrib;
181
182 self->extra->length = 0;
183 self->extra->allocated = STATIC_CHILDREN;
184 self->extra->children = self->extra->_children;
185
186 return 0;
187}
188
189LOCAL(void)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200190dealloc_extra(ElementObject* self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000191{
Eli Bendersky08b85292012-04-04 15:55:07 +0300192 ElementObjectExtra *myextra;
193 int i;
194
Eli Benderskyebf37a22012-04-03 22:02:37 +0300195 if (!self->extra)
196 return;
197
198 /* Avoid DECREFs calling into this code again (cycles, etc.)
199 */
Eli Bendersky08b85292012-04-04 15:55:07 +0300200 myextra = self->extra;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300201 self->extra = NULL;
202
203 Py_DECREF(myextra->attrib);
204
Eli Benderskyebf37a22012-04-03 22:02:37 +0300205 for (i = 0; i < myextra->length; i++)
206 Py_DECREF(myextra->children[i]);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000207
Eli Benderskyebf37a22012-04-03 22:02:37 +0300208 if (myextra->children != myextra->_children)
209 PyObject_Free(myextra->children);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000210
Eli Benderskyebf37a22012-04-03 22:02:37 +0300211 PyObject_Free(myextra);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000212}
213
Eli Bendersky092af1f2012-03-04 07:14:03 +0200214/* Convenience internal function to create new Element objects with the given
215 * tag and attributes.
216*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000217LOCAL(PyObject*)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200218create_new_element(PyObject* tag, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000219{
220 ElementObject* self;
221
Eli Bendersky0192ba32012-03-30 16:38:33 +0300222 self = PyObject_GC_New(ElementObject, &Element_Type);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000223 if (self == NULL)
224 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000225 self->extra = NULL;
226
Eli Bendersky48d358b2012-05-30 17:57:50 +0300227 if (attrib != Py_None && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200228 if (create_extra(self, attrib) < 0) {
Thomas Wouters477c8d52006-05-27 19:21:47 +0000229 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000230 return NULL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000231 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000232 }
233
234 Py_INCREF(tag);
235 self->tag = tag;
236
237 Py_INCREF(Py_None);
238 self->text = Py_None;
239
240 Py_INCREF(Py_None);
241 self->tail = Py_None;
242
Eli Benderskyebf37a22012-04-03 22:02:37 +0300243 self->weakreflist = NULL;
244
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000245 ALLOC(sizeof(ElementObject), "create element");
Eli Bendersky0192ba32012-03-30 16:38:33 +0300246 PyObject_GC_Track(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000247 return (PyObject*) self;
248}
249
Eli Bendersky092af1f2012-03-04 07:14:03 +0200250static PyObject *
251element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
252{
253 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
254 if (e != NULL) {
255 Py_INCREF(Py_None);
256 e->tag = Py_None;
257
258 Py_INCREF(Py_None);
259 e->text = Py_None;
260
261 Py_INCREF(Py_None);
262 e->tail = Py_None;
263
264 e->extra = NULL;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300265 e->weakreflist = NULL;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200266 }
267 return (PyObject *)e;
268}
269
Eli Bendersky737b1732012-05-29 06:02:56 +0300270/* Helper function for extracting the attrib dictionary from a keywords dict.
271 * This is required by some constructors/functions in this module that can
Eli Bendersky45839902013-01-13 05:14:47 -0800272 * either accept attrib as a keyword argument or all attributes splashed
Eli Bendersky737b1732012-05-29 06:02:56 +0300273 * directly into *kwds.
274 * If there is no 'attrib' keyword, return an empty dict.
275 */
276static PyObject*
277get_attrib_from_keywords(PyObject *kwds)
278{
279 PyObject *attrib_str = PyUnicode_FromString("attrib");
280 PyObject *attrib = PyDict_GetItem(kwds, attrib_str);
281
282 if (attrib) {
283 /* If attrib was found in kwds, copy its value and remove it from
284 * kwds
285 */
286 if (!PyDict_Check(attrib)) {
287 Py_DECREF(attrib_str);
288 PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
289 Py_TYPE(attrib)->tp_name);
290 return NULL;
291 }
292 attrib = PyDict_Copy(attrib);
293 PyDict_DelItem(kwds, attrib_str);
294 } else {
295 attrib = PyDict_New();
296 }
297
298 Py_DECREF(attrib_str);
299
300 if (attrib)
301 PyDict_Update(attrib, kwds);
302 return attrib;
303}
304
Eli Bendersky092af1f2012-03-04 07:14:03 +0200305static int
306element_init(PyObject *self, PyObject *args, PyObject *kwds)
307{
308 PyObject *tag;
309 PyObject *tmp;
310 PyObject *attrib = NULL;
311 ElementObject *self_elem;
312
313 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
314 return -1;
315
Eli Bendersky737b1732012-05-29 06:02:56 +0300316 if (attrib) {
317 /* attrib passed as positional arg */
318 attrib = PyDict_Copy(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200319 if (!attrib)
320 return -1;
Eli Bendersky737b1732012-05-29 06:02:56 +0300321 if (kwds) {
322 if (PyDict_Update(attrib, kwds) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200323 Py_DECREF(attrib);
Eli Bendersky737b1732012-05-29 06:02:56 +0300324 return -1;
325 }
326 }
327 } else if (kwds) {
328 /* have keywords args */
329 attrib = get_attrib_from_keywords(kwds);
330 if (!attrib)
331 return -1;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200332 }
333
334 self_elem = (ElementObject *)self;
335
Antoine Pitrouc1948842012-10-01 23:40:37 +0200336 if (attrib != NULL && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200337 if (create_extra(self_elem, attrib) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200338 Py_DECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200339 return -1;
340 }
341 }
342
Eli Bendersky48d358b2012-05-30 17:57:50 +0300343 /* We own a reference to attrib here and it's no longer needed. */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200344 Py_XDECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200345
346 /* Replace the objects already pointed to by tag, text and tail. */
347 tmp = self_elem->tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200348 Py_INCREF(tag);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200349 self_elem->tag = tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200350 Py_DECREF(tmp);
351
352 tmp = self_elem->text;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200353 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200354 self_elem->text = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200355 Py_DECREF(JOIN_OBJ(tmp));
356
357 tmp = self_elem->tail;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200358 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200359 self_elem->tail = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200360 Py_DECREF(JOIN_OBJ(tmp));
361
362 return 0;
363}
364
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000365LOCAL(int)
366element_resize(ElementObject* self, int extra)
367{
368 int size;
369 PyObject* *children;
370
371 /* make sure self->children can hold the given number of extra
372 elements. set an exception and return -1 if allocation failed */
373
374 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200375 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000376
377 size = self->extra->length + extra;
378
379 if (size > self->extra->allocated) {
380 /* use Python 2.4's list growth strategy */
381 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000382 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100383 * which needs at least 4 bytes.
384 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000385 * be safe.
386 */
387 size = size ? size : 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000388 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000389 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100390 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000391 * false alarm always assume at least one child to be safe.
392 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000393 children = PyObject_Realloc(self->extra->children,
394 size * sizeof(PyObject*));
395 if (!children)
396 goto nomemory;
397 } else {
398 children = PyObject_Malloc(size * sizeof(PyObject*));
399 if (!children)
400 goto nomemory;
401 /* copy existing children from static area to malloc buffer */
402 memcpy(children, self->extra->children,
403 self->extra->length * sizeof(PyObject*));
404 }
405 self->extra->children = children;
406 self->extra->allocated = size;
407 }
408
409 return 0;
410
411 nomemory:
412 PyErr_NoMemory();
413 return -1;
414}
415
416LOCAL(int)
417element_add_subelement(ElementObject* self, PyObject* element)
418{
419 /* add a child element to a parent */
420
421 if (element_resize(self, 1) < 0)
422 return -1;
423
424 Py_INCREF(element);
425 self->extra->children[self->extra->length] = element;
426
427 self->extra->length++;
428
429 return 0;
430}
431
432LOCAL(PyObject*)
433element_get_attrib(ElementObject* self)
434{
435 /* return borrowed reference to attrib dictionary */
436 /* note: this function assumes that the extra section exists */
437
438 PyObject* res = self->extra->attrib;
439
440 if (res == Py_None) {
441 /* create missing dictionary */
442 res = PyDict_New();
443 if (!res)
444 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200445 Py_DECREF(Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000446 self->extra->attrib = res;
447 }
448
449 return res;
450}
451
452LOCAL(PyObject*)
453element_get_text(ElementObject* self)
454{
455 /* return borrowed reference to text attribute */
456
457 PyObject* res = self->text;
458
459 if (JOIN_GET(res)) {
460 res = JOIN_OBJ(res);
461 if (PyList_CheckExact(res)) {
462 res = list_join(res);
463 if (!res)
464 return NULL;
465 self->text = res;
466 }
467 }
468
469 return res;
470}
471
472LOCAL(PyObject*)
473element_get_tail(ElementObject* self)
474{
475 /* return borrowed reference to text attribute */
476
477 PyObject* res = self->tail;
478
479 if (JOIN_GET(res)) {
480 res = JOIN_OBJ(res);
481 if (PyList_CheckExact(res)) {
482 res = list_join(res);
483 if (!res)
484 return NULL;
485 self->tail = res;
486 }
487 }
488
489 return res;
490}
491
492static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300493subelement(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000494{
495 PyObject* elem;
496
497 ElementObject* parent;
498 PyObject* tag;
499 PyObject* attrib = NULL;
500 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
501 &Element_Type, &parent, &tag,
502 &PyDict_Type, &attrib))
503 return NULL;
504
Eli Bendersky737b1732012-05-29 06:02:56 +0300505 if (attrib) {
506 /* attrib passed as positional arg */
507 attrib = PyDict_Copy(attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000508 if (!attrib)
509 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300510 if (kwds) {
511 if (PyDict_Update(attrib, kwds) < 0) {
512 return NULL;
513 }
514 }
515 } else if (kwds) {
516 /* have keyword args */
517 attrib = get_attrib_from_keywords(kwds);
518 if (!attrib)
519 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000520 } else {
Eli Bendersky737b1732012-05-29 06:02:56 +0300521 /* no attrib arg, no kwds, so no attribute */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000522 Py_INCREF(Py_None);
523 attrib = Py_None;
524 }
525
Eli Bendersky092af1f2012-03-04 07:14:03 +0200526 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000527
528 Py_DECREF(attrib);
529
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000530 if (element_add_subelement(parent, elem) < 0) {
531 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000532 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000533 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000534
535 return elem;
536}
537
Eli Bendersky0192ba32012-03-30 16:38:33 +0300538static int
539element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
540{
541 Py_VISIT(self->tag);
542 Py_VISIT(JOIN_OBJ(self->text));
543 Py_VISIT(JOIN_OBJ(self->tail));
544
545 if (self->extra) {
546 int i;
547 Py_VISIT(self->extra->attrib);
548
549 for (i = 0; i < self->extra->length; ++i)
550 Py_VISIT(self->extra->children[i]);
551 }
552 return 0;
553}
554
555static int
556element_gc_clear(ElementObject *self)
557{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300558 Py_CLEAR(self->tag);
Eli Benderskyebf37a22012-04-03 22:02:37 +0300559
560 /* The following is like Py_CLEAR for self->text and self->tail, but
561 * written explicitily because the real pointers hide behind access
562 * macros.
563 */
564 if (self->text) {
565 PyObject *tmp = JOIN_OBJ(self->text);
566 self->text = NULL;
567 Py_DECREF(tmp);
568 }
569
570 if (self->tail) {
571 PyObject *tmp = JOIN_OBJ(self->tail);
572 self->tail = NULL;
573 Py_DECREF(tmp);
574 }
Eli Bendersky0192ba32012-03-30 16:38:33 +0300575
576 /* After dropping all references from extra, it's no longer valid anyway,
Eli Benderskyebf37a22012-04-03 22:02:37 +0300577 * so fully deallocate it.
Eli Bendersky0192ba32012-03-30 16:38:33 +0300578 */
Eli Benderskyebf37a22012-04-03 22:02:37 +0300579 dealloc_extra(self);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300580 return 0;
581}
582
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000583static void
584element_dealloc(ElementObject* self)
585{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300586 PyObject_GC_UnTrack(self);
Eli Benderskyebf37a22012-04-03 22:02:37 +0300587
588 if (self->weakreflist != NULL)
589 PyObject_ClearWeakRefs((PyObject *) self);
590
Eli Bendersky0192ba32012-03-30 16:38:33 +0300591 /* element_gc_clear clears all references and deallocates extra
592 */
593 element_gc_clear(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000594
595 RELEASE(sizeof(ElementObject), "destroy element");
Eli Bendersky092af1f2012-03-04 07:14:03 +0200596 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000597}
598
599/* -------------------------------------------------------------------- */
600/* methods (in alphabetical order) */
601
602static PyObject*
603element_append(ElementObject* self, PyObject* args)
604{
605 PyObject* element;
606 if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element))
607 return NULL;
608
609 if (element_add_subelement(self, element) < 0)
610 return NULL;
611
612 Py_RETURN_NONE;
613}
614
615static PyObject*
Eli Bendersky0192ba32012-03-30 16:38:33 +0300616element_clearmethod(ElementObject* self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000617{
618 if (!PyArg_ParseTuple(args, ":clear"))
619 return NULL;
620
Eli Benderskyebf37a22012-04-03 22:02:37 +0300621 dealloc_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000622
623 Py_INCREF(Py_None);
624 Py_DECREF(JOIN_OBJ(self->text));
625 self->text = Py_None;
626
627 Py_INCREF(Py_None);
628 Py_DECREF(JOIN_OBJ(self->tail));
629 self->tail = Py_None;
630
631 Py_RETURN_NONE;
632}
633
634static PyObject*
635element_copy(ElementObject* self, PyObject* args)
636{
637 int i;
638 ElementObject* element;
639
640 if (!PyArg_ParseTuple(args, ":__copy__"))
641 return NULL;
642
Eli Bendersky092af1f2012-03-04 07:14:03 +0200643 element = (ElementObject*) create_new_element(
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000644 self->tag, (self->extra) ? self->extra->attrib : Py_None
645 );
646 if (!element)
647 return NULL;
648
649 Py_DECREF(JOIN_OBJ(element->text));
650 element->text = self->text;
651 Py_INCREF(JOIN_OBJ(element->text));
652
653 Py_DECREF(JOIN_OBJ(element->tail));
654 element->tail = self->tail;
655 Py_INCREF(JOIN_OBJ(element->tail));
656
657 if (self->extra) {
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100658
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000659 if (element_resize(element, self->extra->length) < 0) {
660 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000661 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000662 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000663
664 for (i = 0; i < self->extra->length; i++) {
665 Py_INCREF(self->extra->children[i]);
666 element->extra->children[i] = self->extra->children[i];
667 }
668
669 element->extra->length = self->extra->length;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100670
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000671 }
672
673 return (PyObject*) element;
674}
675
676static PyObject*
677element_deepcopy(ElementObject* self, PyObject* args)
678{
679 int i;
680 ElementObject* element;
681 PyObject* tag;
682 PyObject* attrib;
683 PyObject* text;
684 PyObject* tail;
685 PyObject* id;
686
687 PyObject* memo;
688 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
689 return NULL;
690
691 tag = deepcopy(self->tag, memo);
692 if (!tag)
693 return NULL;
694
695 if (self->extra) {
696 attrib = deepcopy(self->extra->attrib, memo);
697 if (!attrib) {
698 Py_DECREF(tag);
699 return NULL;
700 }
701 } else {
702 Py_INCREF(Py_None);
703 attrib = Py_None;
704 }
705
Eli Bendersky092af1f2012-03-04 07:14:03 +0200706 element = (ElementObject*) create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000707
708 Py_DECREF(tag);
709 Py_DECREF(attrib);
710
711 if (!element)
712 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100713
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000714 text = deepcopy(JOIN_OBJ(self->text), memo);
715 if (!text)
716 goto error;
717 Py_DECREF(element->text);
718 element->text = JOIN_SET(text, JOIN_GET(self->text));
719
720 tail = deepcopy(JOIN_OBJ(self->tail), memo);
721 if (!tail)
722 goto error;
723 Py_DECREF(element->tail);
724 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
725
726 if (self->extra) {
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100727
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000728 if (element_resize(element, self->extra->length) < 0)
729 goto error;
730
731 for (i = 0; i < self->extra->length; i++) {
732 PyObject* child = deepcopy(self->extra->children[i], memo);
733 if (!child) {
734 element->extra->length = i;
735 goto error;
736 }
737 element->extra->children[i] = child;
738 }
739
740 element->extra->length = self->extra->length;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100741
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000742 }
743
744 /* add object to memo dictionary (so deepcopy won't visit it again) */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200745 id = PyLong_FromSsize_t((Py_uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000746 if (!id)
747 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000748
749 i = PyDict_SetItem(memo, id, (PyObject*) element);
750
751 Py_DECREF(id);
752
753 if (i < 0)
754 goto error;
755
756 return (PyObject*) element;
757
758 error:
759 Py_DECREF(element);
760 return NULL;
761}
762
Martin v. Löwisbce16662012-06-17 10:41:22 +0200763static PyObject*
764element_sizeof(PyObject* _self, PyObject* args)
765{
766 ElementObject *self = (ElementObject*)_self;
767 Py_ssize_t result = sizeof(ElementObject);
768 if (self->extra) {
769 result += sizeof(ElementObjectExtra);
770 if (self->extra->children != self->extra->_children)
771 result += sizeof(PyObject*) * self->extra->allocated;
772 }
773 return PyLong_FromSsize_t(result);
774}
775
Eli Bendersky698bdb22013-01-10 06:01:06 -0800776/* dict keys for getstate/setstate. */
777#define PICKLED_TAG "tag"
778#define PICKLED_CHILDREN "_children"
779#define PICKLED_ATTRIB "attrib"
780#define PICKLED_TAIL "tail"
781#define PICKLED_TEXT "text"
782
783/* __getstate__ returns a fabricated instance dict as in the pure-Python
784 * Element implementation, for interoperability/interchangeability. This
785 * makes the pure-Python implementation details an API, but (a) there aren't
786 * any unnecessary structures there; and (b) it buys compatibility with 3.2
787 * pickles. See issue #16076.
788 */
789static PyObject *
790element_getstate(ElementObject *self)
791{
792 int i, noattrib;
793 PyObject *instancedict = NULL, *children;
794
795 /* Build a list of children. */
796 children = PyList_New(self->extra ? self->extra->length : 0);
797 if (!children)
798 return NULL;
799 for (i = 0; i < PyList_GET_SIZE(children); i++) {
800 PyObject *child = self->extra->children[i];
801 Py_INCREF(child);
802 PyList_SET_ITEM(children, i, child);
803 }
804
805 /* Construct the state object. */
806 noattrib = (self->extra == NULL || self->extra->attrib == Py_None);
807 if (noattrib)
808 instancedict = Py_BuildValue("{sOsOs{}sOsO}",
809 PICKLED_TAG, self->tag,
810 PICKLED_CHILDREN, children,
811 PICKLED_ATTRIB,
812 PICKLED_TEXT, self->text,
813 PICKLED_TAIL, self->tail);
814 else
815 instancedict = Py_BuildValue("{sOsOsOsOsO}",
816 PICKLED_TAG, self->tag,
817 PICKLED_CHILDREN, children,
818 PICKLED_ATTRIB, self->extra->attrib,
819 PICKLED_TEXT, self->text,
820 PICKLED_TAIL, self->tail);
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800821 if (instancedict) {
822 Py_DECREF(children);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800823 return instancedict;
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800824 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800825 else {
826 for (i = 0; i < PyList_GET_SIZE(children); i++)
827 Py_DECREF(PyList_GET_ITEM(children, i));
828 Py_DECREF(children);
829
830 return NULL;
831 }
832}
833
834static PyObject *
835element_setstate_from_attributes(ElementObject *self,
836 PyObject *tag,
837 PyObject *attrib,
838 PyObject *text,
839 PyObject *tail,
840 PyObject *children)
841{
842 Py_ssize_t i, nchildren;
843
844 if (!tag) {
845 PyErr_SetString(PyExc_TypeError, "tag may not be NULL");
846 return NULL;
847 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800848
849 Py_CLEAR(self->tag);
850 self->tag = tag;
851 Py_INCREF(self->tag);
852
853 Py_CLEAR(self->text);
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800854 self->text = text ? text : Py_None;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800855 Py_INCREF(self->text);
856
857 Py_CLEAR(self->tail);
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800858 self->tail = tail ? tail : Py_None;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800859 Py_INCREF(self->tail);
860
861 /* Handle ATTRIB and CHILDREN. */
862 if (!children && !attrib)
863 Py_RETURN_NONE;
864
865 /* Compute 'nchildren'. */
866 if (children) {
867 if (!PyList_Check(children)) {
868 PyErr_SetString(PyExc_TypeError, "'_children' is not a list");
869 return NULL;
870 }
871 nchildren = PyList_Size(children);
872 }
873 else {
874 nchildren = 0;
875 }
876
877 /* Allocate 'extra'. */
878 if (element_resize(self, nchildren)) {
879 return NULL;
880 }
881 assert(self->extra && self->extra->allocated >= nchildren);
882
883 /* Copy children */
884 for (i = 0; i < nchildren; i++) {
885 self->extra->children[i] = PyList_GET_ITEM(children, i);
886 Py_INCREF(self->extra->children[i]);
887 }
888
889 self->extra->length = nchildren;
890 self->extra->allocated = nchildren;
891
892 /* Stash attrib. */
893 if (attrib) {
894 Py_CLEAR(self->extra->attrib);
895 self->extra->attrib = attrib;
896 Py_INCREF(attrib);
897 }
898
899 Py_RETURN_NONE;
900}
901
902/* __setstate__ for Element instance from the Python implementation.
903 * 'state' should be the instance dict.
904 */
905static PyObject *
906element_setstate_from_Python(ElementObject *self, PyObject *state)
907{
908 static char *kwlist[] = {PICKLED_TAG, PICKLED_ATTRIB, PICKLED_TEXT,
909 PICKLED_TAIL, PICKLED_CHILDREN, 0};
910 PyObject *args;
911 PyObject *tag, *attrib, *text, *tail, *children;
Eli Bendersky799e3ed2013-01-12 05:42:38 -0800912 PyObject *retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800913
Eli Bendersky698bdb22013-01-10 06:01:06 -0800914 tag = attrib = text = tail = children = NULL;
915 args = PyTuple_New(0);
Eli Bendersky799e3ed2013-01-12 05:42:38 -0800916 if (!args)
Eli Bendersky698bdb22013-01-10 06:01:06 -0800917 return NULL;
Eli Bendersky799e3ed2013-01-12 05:42:38 -0800918
919 if (PyArg_ParseTupleAndKeywords(args, state, "|$OOOOO", kwlist, &tag,
920 &attrib, &text, &tail, &children))
921 retval = element_setstate_from_attributes(self, tag, attrib, text,
922 tail, children);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800923 else
Eli Bendersky799e3ed2013-01-12 05:42:38 -0800924 retval = NULL;
925
926 Py_DECREF(args);
927 return retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800928}
929
930static PyObject *
931element_setstate(ElementObject *self, PyObject *state)
932{
933 if (!PyDict_CheckExact(state)) {
934 PyErr_Format(PyExc_TypeError,
935 "Don't know how to unpickle \"%.200R\" as an Element",
936 state);
937 return NULL;
938 }
939 else
940 return element_setstate_from_Python(self, state);
941}
942
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000943LOCAL(int)
944checkpath(PyObject* tag)
945{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000946 Py_ssize_t i;
947 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000948
949 /* check if a tag contains an xpath character */
950
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000951#define PATHCHAR(ch) \
952 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000953
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000954 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200955 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
956 void *data = PyUnicode_DATA(tag);
957 unsigned int kind = PyUnicode_KIND(tag);
958 for (i = 0; i < len; i++) {
959 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
960 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000961 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200962 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000963 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200964 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000965 return 1;
966 }
967 return 0;
968 }
Christian Heimes72b710a2008-05-26 13:28:38 +0000969 if (PyBytes_Check(tag)) {
970 char *p = PyBytes_AS_STRING(tag);
971 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000972 if (p[i] == '{')
973 check = 0;
974 else if (p[i] == '}')
975 check = 1;
976 else if (check && PATHCHAR(p[i]))
977 return 1;
978 }
979 return 0;
980 }
981
982 return 1; /* unknown type; might be path expression */
983}
984
985static PyObject*
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000986element_extend(ElementObject* self, PyObject* args)
987{
988 PyObject* seq;
989 Py_ssize_t i, seqlen = 0;
990
991 PyObject* seq_in;
992 if (!PyArg_ParseTuple(args, "O:extend", &seq_in))
993 return NULL;
994
995 seq = PySequence_Fast(seq_in, "");
996 if (!seq) {
997 PyErr_Format(
998 PyExc_TypeError,
999 "expected sequence, not \"%.200s\"", Py_TYPE(seq_in)->tp_name
1000 );
1001 return NULL;
1002 }
1003
1004 seqlen = PySequence_Size(seq);
1005 for (i = 0; i < seqlen; i++) {
1006 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001007 if (!PyObject_IsInstance(element, (PyObject *)&Element_Type)) {
1008 Py_DECREF(seq);
1009 PyErr_Format(
1010 PyExc_TypeError,
1011 "expected an Element, not \"%.200s\"",
1012 Py_TYPE(element)->tp_name);
1013 return NULL;
1014 }
1015
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001016 if (element_add_subelement(self, element) < 0) {
1017 Py_DECREF(seq);
1018 return NULL;
1019 }
1020 }
1021
1022 Py_DECREF(seq);
1023
1024 Py_RETURN_NONE;
1025}
1026
1027static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001028element_find(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001029{
1030 int i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001031 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001032 PyObject* namespaces = Py_None;
Eli Bendersky737b1732012-05-29 06:02:56 +03001033 static char *kwlist[] = {"path", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001034
Eli Bendersky737b1732012-05-29 06:02:56 +03001035 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:find", kwlist,
1036 &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001037 return NULL;
1038
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001039 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001040 _Py_IDENTIFIER(find);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001041 return _PyObject_CallMethodId(
1042 elementpath_obj, &PyId_find, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001043 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001044 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001045
1046 if (!self->extra)
1047 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001048
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001049 for (i = 0; i < self->extra->length; i++) {
1050 PyObject* item = self->extra->children[i];
1051 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +00001052 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001053 Py_INCREF(item);
1054 return item;
1055 }
1056 }
1057
1058 Py_RETURN_NONE;
1059}
1060
1061static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001062element_findtext(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001063{
1064 int i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001065 PyObject* tag;
1066 PyObject* default_value = Py_None;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001067 PyObject* namespaces = Py_None;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001068 _Py_IDENTIFIER(findtext);
Eli Bendersky737b1732012-05-29 06:02:56 +03001069 static char *kwlist[] = {"path", "default", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001070
Eli Bendersky737b1732012-05-29 06:02:56 +03001071 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO:findtext", kwlist,
1072 &tag, &default_value, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001073 return NULL;
1074
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001075 if (checkpath(tag) || namespaces != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001076 return _PyObject_CallMethodId(
1077 elementpath_obj, &PyId_findtext, "OOOO", self, tag, default_value, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001078 );
1079
1080 if (!self->extra) {
1081 Py_INCREF(default_value);
1082 return default_value;
1083 }
1084
1085 for (i = 0; i < self->extra->length; i++) {
1086 ElementObject* item = (ElementObject*) self->extra->children[i];
Mark Dickinson211c6252009-02-01 10:28:51 +00001087 if (Element_CheckExact(item) && (PyObject_RichCompareBool(item->tag, tag, Py_EQ) == 1)) {
1088
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001089 PyObject* text = element_get_text(item);
1090 if (text == Py_None)
Eli Bendersky25771b32013-01-13 05:26:07 -08001091 return PyUnicode_New(0, 0);
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001092 Py_XINCREF(text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001093 return text;
1094 }
1095 }
1096
1097 Py_INCREF(default_value);
1098 return default_value;
1099}
1100
1101static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001102element_findall(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001103{
1104 int i;
1105 PyObject* out;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001106 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001107 PyObject* namespaces = Py_None;
Eli Bendersky737b1732012-05-29 06:02:56 +03001108 static char *kwlist[] = {"path", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001109
Eli Bendersky737b1732012-05-29 06:02:56 +03001110 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:findall", kwlist,
1111 &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001112 return NULL;
1113
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001114 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001115 _Py_IDENTIFIER(findall);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001116 return _PyObject_CallMethodId(
1117 elementpath_obj, &PyId_findall, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001118 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001119 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001120
1121 out = PyList_New(0);
1122 if (!out)
1123 return NULL;
1124
1125 if (!self->extra)
1126 return out;
1127
1128 for (i = 0; i < self->extra->length; i++) {
1129 PyObject* item = self->extra->children[i];
1130 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +00001131 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001132 if (PyList_Append(out, item) < 0) {
1133 Py_DECREF(out);
1134 return NULL;
1135 }
1136 }
1137 }
1138
1139 return out;
1140}
1141
1142static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001143element_iterfind(ElementObject *self, PyObject *args, PyObject *kwds)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001144{
1145 PyObject* tag;
1146 PyObject* namespaces = Py_None;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001147 _Py_IDENTIFIER(iterfind);
Eli Bendersky737b1732012-05-29 06:02:56 +03001148 static char *kwlist[] = {"path", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001149
Eli Bendersky737b1732012-05-29 06:02:56 +03001150 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:iterfind", kwlist,
1151 &tag, &namespaces))
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001152 return NULL;
1153
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001154 return _PyObject_CallMethodId(
1155 elementpath_obj, &PyId_iterfind, "OOO", self, tag, namespaces
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001156 );
1157}
1158
1159static PyObject*
Eli Benderskya8736902013-01-05 06:26:39 -08001160element_get(ElementObject* self, PyObject* args, PyObject* kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001161{
1162 PyObject* value;
Eli Benderskya8736902013-01-05 06:26:39 -08001163 static char* kwlist[] = {"key", "default", 0};
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001164
1165 PyObject* key;
1166 PyObject* default_value = Py_None;
Eli Benderskya8736902013-01-05 06:26:39 -08001167
1168 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:get", kwlist, &key,
1169 &default_value))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001170 return NULL;
1171
1172 if (!self->extra || self->extra->attrib == Py_None)
1173 value = default_value;
1174 else {
1175 value = PyDict_GetItem(self->extra->attrib, key);
1176 if (!value)
1177 value = default_value;
1178 }
1179
1180 Py_INCREF(value);
1181 return value;
1182}
1183
1184static PyObject*
1185element_getchildren(ElementObject* self, PyObject* args)
1186{
1187 int i;
1188 PyObject* list;
1189
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001190 /* FIXME: report as deprecated? */
1191
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001192 if (!PyArg_ParseTuple(args, ":getchildren"))
1193 return NULL;
1194
1195 if (!self->extra)
1196 return PyList_New(0);
1197
1198 list = PyList_New(self->extra->length);
1199 if (!list)
1200 return NULL;
1201
1202 for (i = 0; i < self->extra->length; i++) {
1203 PyObject* item = self->extra->children[i];
1204 Py_INCREF(item);
1205 PyList_SET_ITEM(list, i, item);
1206 }
1207
1208 return list;
1209}
1210
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001211
Eli Bendersky64d11e62012-06-15 07:42:50 +03001212static PyObject *
1213create_elementiter(ElementObject *self, PyObject *tag, int gettext);
1214
1215
1216static PyObject *
Eli Benderskya8736902013-01-05 06:26:39 -08001217element_iter(ElementObject *self, PyObject *args, PyObject *kwds)
Eli Bendersky64d11e62012-06-15 07:42:50 +03001218{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001219 PyObject* tag = Py_None;
Eli Benderskya8736902013-01-05 06:26:39 -08001220 static char* kwlist[] = {"tag", 0};
1221
1222 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:iter", kwlist, &tag))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001223 return NULL;
1224
Eli Bendersky64d11e62012-06-15 07:42:50 +03001225 return create_elementiter(self, tag, 0);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001226}
1227
1228
1229static PyObject*
1230element_itertext(ElementObject* self, PyObject* args)
1231{
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001232 if (!PyArg_ParseTuple(args, ":itertext"))
1233 return NULL;
1234
Eli Bendersky64d11e62012-06-15 07:42:50 +03001235 return create_elementiter(self, Py_None, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001236}
1237
Eli Bendersky64d11e62012-06-15 07:42:50 +03001238
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001239static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001240element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001241{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001242 ElementObject* self = (ElementObject*) self_;
1243
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001244 if (!self->extra || index < 0 || index >= self->extra->length) {
1245 PyErr_SetString(
1246 PyExc_IndexError,
1247 "child index out of range"
1248 );
1249 return NULL;
1250 }
1251
1252 Py_INCREF(self->extra->children[index]);
1253 return self->extra->children[index];
1254}
1255
1256static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001257element_insert(ElementObject* self, PyObject* args)
1258{
1259 int i;
1260
1261 int index;
1262 PyObject* element;
1263 if (!PyArg_ParseTuple(args, "iO!:insert", &index,
1264 &Element_Type, &element))
1265 return NULL;
1266
1267 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001268 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001269
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001270 if (index < 0) {
1271 index += self->extra->length;
1272 if (index < 0)
1273 index = 0;
1274 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001275 if (index > self->extra->length)
1276 index = self->extra->length;
1277
1278 if (element_resize(self, 1) < 0)
1279 return NULL;
1280
1281 for (i = self->extra->length; i > index; i--)
1282 self->extra->children[i] = self->extra->children[i-1];
1283
1284 Py_INCREF(element);
1285 self->extra->children[index] = element;
1286
1287 self->extra->length++;
1288
1289 Py_RETURN_NONE;
1290}
1291
1292static PyObject*
1293element_items(ElementObject* self, PyObject* args)
1294{
1295 if (!PyArg_ParseTuple(args, ":items"))
1296 return NULL;
1297
1298 if (!self->extra || self->extra->attrib == Py_None)
1299 return PyList_New(0);
1300
1301 return PyDict_Items(self->extra->attrib);
1302}
1303
1304static PyObject*
1305element_keys(ElementObject* self, PyObject* args)
1306{
1307 if (!PyArg_ParseTuple(args, ":keys"))
1308 return NULL;
1309
1310 if (!self->extra || self->extra->attrib == Py_None)
1311 return PyList_New(0);
1312
1313 return PyDict_Keys(self->extra->attrib);
1314}
1315
Martin v. Löwis18e16552006-02-15 17:27:45 +00001316static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001317element_length(ElementObject* self)
1318{
1319 if (!self->extra)
1320 return 0;
1321
1322 return self->extra->length;
1323}
1324
1325static PyObject*
1326element_makeelement(PyObject* self, PyObject* args, PyObject* kw)
1327{
1328 PyObject* elem;
1329
1330 PyObject* tag;
1331 PyObject* attrib;
1332 if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib))
1333 return NULL;
1334
1335 attrib = PyDict_Copy(attrib);
1336 if (!attrib)
1337 return NULL;
1338
Eli Bendersky092af1f2012-03-04 07:14:03 +02001339 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001340
1341 Py_DECREF(attrib);
1342
1343 return elem;
1344}
1345
1346static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001347element_remove(ElementObject* self, PyObject* args)
1348{
1349 int i;
1350
1351 PyObject* element;
1352 if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element))
1353 return NULL;
1354
1355 if (!self->extra) {
1356 /* element has no children, so raise exception */
1357 PyErr_SetString(
1358 PyExc_ValueError,
1359 "list.remove(x): x not in list"
1360 );
1361 return NULL;
1362 }
1363
1364 for (i = 0; i < self->extra->length; i++) {
1365 if (self->extra->children[i] == element)
1366 break;
Mark Dickinson211c6252009-02-01 10:28:51 +00001367 if (PyObject_RichCompareBool(self->extra->children[i], element, Py_EQ) == 1)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001368 break;
1369 }
1370
1371 if (i == self->extra->length) {
1372 /* element is not in children, so raise exception */
1373 PyErr_SetString(
1374 PyExc_ValueError,
1375 "list.remove(x): x not in list"
1376 );
1377 return NULL;
1378 }
1379
1380 Py_DECREF(self->extra->children[i]);
1381
1382 self->extra->length--;
1383
1384 for (; i < self->extra->length; i++)
1385 self->extra->children[i] = self->extra->children[i+1];
1386
1387 Py_RETURN_NONE;
1388}
1389
1390static PyObject*
1391element_repr(ElementObject* self)
1392{
Eli Bendersky092af1f2012-03-04 07:14:03 +02001393 if (self->tag)
1394 return PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1395 else
1396 return PyUnicode_FromFormat("<Element at %p>", self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001397}
1398
1399static PyObject*
1400element_set(ElementObject* self, PyObject* args)
1401{
1402 PyObject* attrib;
1403
1404 PyObject* key;
1405 PyObject* value;
1406 if (!PyArg_ParseTuple(args, "OO:set", &key, &value))
1407 return NULL;
1408
1409 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001410 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001411
1412 attrib = element_get_attrib(self);
1413 if (!attrib)
1414 return NULL;
1415
1416 if (PyDict_SetItem(attrib, key, value) < 0)
1417 return NULL;
1418
1419 Py_RETURN_NONE;
1420}
1421
1422static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001423element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001424{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001425 ElementObject* self = (ElementObject*) self_;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001426 int i;
1427 PyObject* old;
1428
1429 if (!self->extra || index < 0 || index >= self->extra->length) {
1430 PyErr_SetString(
1431 PyExc_IndexError,
1432 "child assignment index out of range");
1433 return -1;
1434 }
1435
1436 old = self->extra->children[index];
1437
1438 if (item) {
1439 Py_INCREF(item);
1440 self->extra->children[index] = item;
1441 } else {
1442 self->extra->length--;
1443 for (i = index; i < self->extra->length; i++)
1444 self->extra->children[i] = self->extra->children[i+1];
1445 }
1446
1447 Py_DECREF(old);
1448
1449 return 0;
1450}
1451
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001452static PyObject*
1453element_subscr(PyObject* self_, PyObject* item)
1454{
1455 ElementObject* self = (ElementObject*) self_;
1456
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001457 if (PyIndex_Check(item)) {
1458 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001459
1460 if (i == -1 && PyErr_Occurred()) {
1461 return NULL;
1462 }
1463 if (i < 0 && self->extra)
1464 i += self->extra->length;
1465 return element_getitem(self_, i);
1466 }
1467 else if (PySlice_Check(item)) {
1468 Py_ssize_t start, stop, step, slicelen, cur, i;
1469 PyObject* list;
1470
1471 if (!self->extra)
1472 return PyList_New(0);
1473
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001474 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001475 self->extra->length,
1476 &start, &stop, &step, &slicelen) < 0) {
1477 return NULL;
1478 }
1479
1480 if (slicelen <= 0)
1481 return PyList_New(0);
1482 else {
1483 list = PyList_New(slicelen);
1484 if (!list)
1485 return NULL;
1486
1487 for (cur = start, i = 0; i < slicelen;
1488 cur += step, i++) {
1489 PyObject* item = self->extra->children[cur];
1490 Py_INCREF(item);
1491 PyList_SET_ITEM(list, i, item);
1492 }
1493
1494 return list;
1495 }
1496 }
1497 else {
1498 PyErr_SetString(PyExc_TypeError,
1499 "element indices must be integers");
1500 return NULL;
1501 }
1502}
1503
1504static int
1505element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1506{
1507 ElementObject* self = (ElementObject*) self_;
1508
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001509 if (PyIndex_Check(item)) {
1510 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001511
1512 if (i == -1 && PyErr_Occurred()) {
1513 return -1;
1514 }
1515 if (i < 0 && self->extra)
1516 i += self->extra->length;
1517 return element_setitem(self_, i, value);
1518 }
1519 else if (PySlice_Check(item)) {
1520 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1521
1522 PyObject* recycle = NULL;
1523 PyObject* seq = NULL;
1524
1525 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001526 create_extra(self, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001527
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001528 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001529 self->extra->length,
1530 &start, &stop, &step, &slicelen) < 0) {
1531 return -1;
1532 }
1533
Eli Bendersky865756a2012-03-09 13:38:15 +02001534 if (value == NULL) {
1535 /* Delete slice */
1536 size_t cur;
1537 Py_ssize_t i;
1538
1539 if (slicelen <= 0)
1540 return 0;
1541
1542 /* Since we're deleting, the direction of the range doesn't matter,
1543 * so for simplicity make it always ascending.
1544 */
1545 if (step < 0) {
1546 stop = start + 1;
1547 start = stop + step * (slicelen - 1) - 1;
1548 step = -step;
1549 }
1550
1551 assert((size_t)slicelen <= PY_SIZE_MAX / sizeof(PyObject *));
1552
1553 /* recycle is a list that will contain all the children
1554 * scheduled for removal.
1555 */
1556 if (!(recycle = PyList_New(slicelen))) {
1557 PyErr_NoMemory();
1558 return -1;
1559 }
1560
1561 /* This loop walks over all the children that have to be deleted,
1562 * with cur pointing at them. num_moved is the amount of children
1563 * until the next deleted child that have to be "shifted down" to
1564 * occupy the deleted's places.
1565 * Note that in the ith iteration, shifting is done i+i places down
1566 * because i children were already removed.
1567 */
1568 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1569 /* Compute how many children have to be moved, clipping at the
1570 * list end.
1571 */
1572 Py_ssize_t num_moved = step - 1;
1573 if (cur + step >= (size_t)self->extra->length) {
1574 num_moved = self->extra->length - cur - 1;
1575 }
1576
1577 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1578
1579 memmove(
1580 self->extra->children + cur - i,
1581 self->extra->children + cur + 1,
1582 num_moved * sizeof(PyObject *));
1583 }
1584
1585 /* Leftover "tail" after the last removed child */
1586 cur = start + (size_t)slicelen * step;
1587 if (cur < (size_t)self->extra->length) {
1588 memmove(
1589 self->extra->children + cur - slicelen,
1590 self->extra->children + cur,
1591 (self->extra->length - cur) * sizeof(PyObject *));
1592 }
1593
1594 self->extra->length -= slicelen;
1595
1596 /* Discard the recycle list with all the deleted sub-elements */
1597 Py_XDECREF(recycle);
1598 return 0;
1599 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001600 else {
Eli Bendersky865756a2012-03-09 13:38:15 +02001601 /* A new slice is actually being assigned */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001602 seq = PySequence_Fast(value, "");
1603 if (!seq) {
1604 PyErr_Format(
1605 PyExc_TypeError,
1606 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1607 );
1608 return -1;
1609 }
1610 newlen = PySequence_Size(seq);
1611 }
1612
1613 if (step != 1 && newlen != slicelen)
1614 {
1615 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001616 "attempt to assign sequence of size %zd "
1617 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001618 newlen, slicelen
1619 );
1620 return -1;
1621 }
1622
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001623 /* Resize before creating the recycle bin, to prevent refleaks. */
1624 if (newlen > slicelen) {
1625 if (element_resize(self, newlen - slicelen) < 0) {
1626 if (seq) {
1627 Py_DECREF(seq);
1628 }
1629 return -1;
1630 }
1631 }
1632
1633 if (slicelen > 0) {
1634 /* to avoid recursive calls to this method (via decref), move
1635 old items to the recycle bin here, and get rid of them when
1636 we're done modifying the element */
1637 recycle = PyList_New(slicelen);
1638 if (!recycle) {
1639 if (seq) {
1640 Py_DECREF(seq);
1641 }
1642 return -1;
1643 }
1644 for (cur = start, i = 0; i < slicelen;
1645 cur += step, i++)
1646 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1647 }
1648
1649 if (newlen < slicelen) {
1650 /* delete slice */
1651 for (i = stop; i < self->extra->length; i++)
1652 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1653 } else if (newlen > slicelen) {
1654 /* insert slice */
1655 for (i = self->extra->length-1; i >= stop; i--)
1656 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1657 }
1658
1659 /* replace the slice */
1660 for (cur = start, i = 0; i < newlen;
1661 cur += step, i++) {
1662 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1663 Py_INCREF(element);
1664 self->extra->children[cur] = element;
1665 }
1666
1667 self->extra->length += newlen - slicelen;
1668
1669 if (seq) {
1670 Py_DECREF(seq);
1671 }
1672
1673 /* discard the recycle bin, and everything in it */
1674 Py_XDECREF(recycle);
1675
1676 return 0;
1677 }
1678 else {
1679 PyErr_SetString(PyExc_TypeError,
1680 "element indices must be integers");
1681 return -1;
1682 }
1683}
1684
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001685static PyMethodDef element_methods[] = {
1686
Eli Bendersky0192ba32012-03-30 16:38:33 +03001687 {"clear", (PyCFunction) element_clearmethod, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001688
Eli Benderskya8736902013-01-05 06:26:39 -08001689 {"get", (PyCFunction) element_get, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001690 {"set", (PyCFunction) element_set, METH_VARARGS},
1691
Eli Bendersky737b1732012-05-29 06:02:56 +03001692 {"find", (PyCFunction) element_find, METH_VARARGS | METH_KEYWORDS},
1693 {"findtext", (PyCFunction) element_findtext, METH_VARARGS | METH_KEYWORDS},
1694 {"findall", (PyCFunction) element_findall, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001695
1696 {"append", (PyCFunction) element_append, METH_VARARGS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001697 {"extend", (PyCFunction) element_extend, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001698 {"insert", (PyCFunction) element_insert, METH_VARARGS},
1699 {"remove", (PyCFunction) element_remove, METH_VARARGS},
1700
Eli Benderskya8736902013-01-05 06:26:39 -08001701 {"iter", (PyCFunction) element_iter, METH_VARARGS | METH_KEYWORDS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001702 {"itertext", (PyCFunction) element_itertext, METH_VARARGS},
Eli Bendersky737b1732012-05-29 06:02:56 +03001703 {"iterfind", (PyCFunction) element_iterfind, METH_VARARGS | METH_KEYWORDS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001704
Eli Benderskya8736902013-01-05 06:26:39 -08001705 {"getiterator", (PyCFunction) element_iter, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001706 {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS},
1707
1708 {"items", (PyCFunction) element_items, METH_VARARGS},
1709 {"keys", (PyCFunction) element_keys, METH_VARARGS},
1710
1711 {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS},
1712
1713 {"__copy__", (PyCFunction) element_copy, METH_VARARGS},
1714 {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS},
Martin v. Löwisbce16662012-06-17 10:41:22 +02001715 {"__sizeof__", element_sizeof, METH_NOARGS},
Eli Bendersky698bdb22013-01-10 06:01:06 -08001716 {"__getstate__", (PyCFunction)element_getstate, METH_NOARGS},
1717 {"__setstate__", (PyCFunction)element_setstate, METH_O},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001718
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001719 {NULL, NULL}
1720};
1721
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001722static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001723element_getattro(ElementObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001724{
1725 PyObject* res;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001726 char *name = "";
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001727
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001728 if (PyUnicode_Check(nameobj))
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001729 name = _PyUnicode_AsString(nameobj);
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001730
Alexander Belopolskye239d232010-12-08 23:31:48 +00001731 if (name == NULL)
1732 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001733
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001734 /* handle common attributes first */
1735 if (strcmp(name, "tag") == 0) {
1736 res = self->tag;
1737 Py_INCREF(res);
1738 return res;
1739 } else if (strcmp(name, "text") == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001740 res = element_get_text(self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001741 Py_INCREF(res);
1742 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001743 }
1744
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001745 /* methods */
1746 res = PyObject_GenericGetAttr((PyObject*) self, nameobj);
1747 if (res)
1748 return res;
1749
1750 /* less common attributes */
1751 if (strcmp(name, "tail") == 0) {
1752 PyErr_Clear();
1753 res = element_get_tail(self);
1754 } else if (strcmp(name, "attrib") == 0) {
1755 PyErr_Clear();
1756 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001757 create_extra(self, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001758 res = element_get_attrib(self);
1759 }
1760
1761 if (!res)
1762 return NULL;
1763
1764 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001765 return res;
1766}
1767
Eli Benderskyb20df952012-05-20 06:33:29 +03001768static PyObject*
1769element_setattro(ElementObject* self, PyObject* nameobj, PyObject* value)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001770{
Eli Benderskyb20df952012-05-20 06:33:29 +03001771 char *name = "";
1772 if (PyUnicode_Check(nameobj))
1773 name = _PyUnicode_AsString(nameobj);
1774
1775 if (name == NULL)
1776 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001777
1778 if (strcmp(name, "tag") == 0) {
1779 Py_DECREF(self->tag);
1780 self->tag = value;
1781 Py_INCREF(self->tag);
1782 } else if (strcmp(name, "text") == 0) {
1783 Py_DECREF(JOIN_OBJ(self->text));
1784 self->text = value;
1785 Py_INCREF(self->text);
1786 } else if (strcmp(name, "tail") == 0) {
1787 Py_DECREF(JOIN_OBJ(self->tail));
1788 self->tail = value;
1789 Py_INCREF(self->tail);
1790 } else if (strcmp(name, "attrib") == 0) {
1791 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001792 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001793 Py_DECREF(self->extra->attrib);
1794 self->extra->attrib = value;
1795 Py_INCREF(self->extra->attrib);
1796 } else {
1797 PyErr_SetString(PyExc_AttributeError, name);
Eli Benderskyb20df952012-05-20 06:33:29 +03001798 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001799 }
1800
Eli Benderskyb20df952012-05-20 06:33:29 +03001801 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001802}
1803
1804static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001805 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001806 0, /* sq_concat */
1807 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001808 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001809 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001810 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001811 0,
1812};
1813
1814static PyMappingMethods element_as_mapping = {
1815 (lenfunc) element_length,
1816 (binaryfunc) element_subscr,
1817 (objobjargproc) element_ass_subscr,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001818};
1819
Neal Norwitz227b5332006-03-22 09:28:35 +00001820static PyTypeObject Element_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001821 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08001822 "xml.etree.ElementTree.Element", sizeof(ElementObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001823 /* methods */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001824 (destructor)element_dealloc, /* tp_dealloc */
1825 0, /* tp_print */
1826 0, /* tp_getattr */
Eli Benderskyb20df952012-05-20 06:33:29 +03001827 0, /* tp_setattr */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001828 0, /* tp_reserved */
1829 (reprfunc)element_repr, /* tp_repr */
1830 0, /* tp_as_number */
1831 &element_as_sequence, /* tp_as_sequence */
1832 &element_as_mapping, /* tp_as_mapping */
1833 0, /* tp_hash */
1834 0, /* tp_call */
1835 0, /* tp_str */
1836 (getattrofunc)element_getattro, /* tp_getattro */
Eli Benderskyb20df952012-05-20 06:33:29 +03001837 (setattrofunc)element_setattro, /* tp_setattro */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001838 0, /* tp_as_buffer */
Eli Bendersky0192ba32012-03-30 16:38:33 +03001839 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
1840 /* tp_flags */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001841 0, /* tp_doc */
Eli Bendersky0192ba32012-03-30 16:38:33 +03001842 (traverseproc)element_gc_traverse, /* tp_traverse */
1843 (inquiry)element_gc_clear, /* tp_clear */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001844 0, /* tp_richcompare */
Eli Benderskyebf37a22012-04-03 22:02:37 +03001845 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001846 0, /* tp_iter */
1847 0, /* tp_iternext */
1848 element_methods, /* tp_methods */
1849 0, /* tp_members */
1850 0, /* tp_getset */
1851 0, /* tp_base */
1852 0, /* tp_dict */
1853 0, /* tp_descr_get */
1854 0, /* tp_descr_set */
1855 0, /* tp_dictoffset */
1856 (initproc)element_init, /* tp_init */
1857 PyType_GenericAlloc, /* tp_alloc */
1858 element_new, /* tp_new */
1859 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001860};
1861
Eli Bendersky64d11e62012-06-15 07:42:50 +03001862/******************************* Element iterator ****************************/
1863
1864/* ElementIterObject represents the iteration state over an XML element in
1865 * pre-order traversal. To keep track of which sub-element should be returned
1866 * next, a stack of parents is maintained. This is a standard stack-based
1867 * iterative pre-order traversal of a tree.
1868 * The stack is managed using a single-linked list starting at parent_stack.
1869 * Each stack node contains the saved parent to which we should return after
1870 * the current one is exhausted, and the next child to examine in that parent.
1871 */
1872typedef struct ParentLocator_t {
1873 ElementObject *parent;
1874 Py_ssize_t child_index;
1875 struct ParentLocator_t *next;
1876} ParentLocator;
1877
1878typedef struct {
1879 PyObject_HEAD
1880 ParentLocator *parent_stack;
1881 ElementObject *root_element;
1882 PyObject *sought_tag;
1883 int root_done;
1884 int gettext;
1885} ElementIterObject;
1886
1887
1888static void
1889elementiter_dealloc(ElementIterObject *it)
1890{
1891 ParentLocator *p = it->parent_stack;
1892 while (p) {
1893 ParentLocator *temp = p;
1894 Py_XDECREF(p->parent);
1895 p = p->next;
1896 PyObject_Free(temp);
1897 }
1898
1899 Py_XDECREF(it->sought_tag);
1900 Py_XDECREF(it->root_element);
1901
1902 PyObject_GC_UnTrack(it);
1903 PyObject_GC_Del(it);
1904}
1905
1906static int
1907elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
1908{
1909 ParentLocator *p = it->parent_stack;
1910 while (p) {
1911 Py_VISIT(p->parent);
1912 p = p->next;
1913 }
1914
1915 Py_VISIT(it->root_element);
1916 Py_VISIT(it->sought_tag);
1917 return 0;
1918}
1919
1920/* Helper function for elementiter_next. Add a new parent to the parent stack.
1921 */
1922static ParentLocator *
1923parent_stack_push_new(ParentLocator *stack, ElementObject *parent)
1924{
1925 ParentLocator *new_node = PyObject_Malloc(sizeof(ParentLocator));
1926 if (new_node) {
1927 new_node->parent = parent;
1928 Py_INCREF(parent);
1929 new_node->child_index = 0;
1930 new_node->next = stack;
1931 }
1932 return new_node;
1933}
1934
1935static PyObject *
1936elementiter_next(ElementIterObject *it)
1937{
1938 /* Sub-element iterator.
Eli Bendersky45839902013-01-13 05:14:47 -08001939 *
Eli Bendersky64d11e62012-06-15 07:42:50 +03001940 * A short note on gettext: this function serves both the iter() and
1941 * itertext() methods to avoid code duplication. However, there are a few
1942 * small differences in the way these iterations work. Namely:
1943 * - itertext() only yields text from nodes that have it, and continues
1944 * iterating when a node doesn't have text (so it doesn't return any
1945 * node like iter())
1946 * - itertext() also has to handle tail, after finishing with all the
1947 * children of a node.
1948 */
Eli Bendersky113da642012-06-15 07:52:49 +03001949 ElementObject *cur_parent;
1950 Py_ssize_t child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03001951
1952 while (1) {
1953 /* Handle the case reached in the beginning and end of iteration, where
1954 * the parent stack is empty. The root_done flag gives us indication
1955 * whether we've just started iterating (so root_done is 0), in which
1956 * case the root is returned. If root_done is 1 and we're here, the
1957 * iterator is exhausted.
1958 */
1959 if (!it->parent_stack->parent) {
1960 if (it->root_done) {
1961 PyErr_SetNone(PyExc_StopIteration);
1962 return NULL;
1963 } else {
1964 it->parent_stack = parent_stack_push_new(it->parent_stack,
1965 it->root_element);
1966 if (!it->parent_stack) {
1967 PyErr_NoMemory();
1968 return NULL;
1969 }
1970
1971 it->root_done = 1;
1972 if (it->sought_tag == Py_None ||
1973 PyObject_RichCompareBool(it->root_element->tag,
1974 it->sought_tag, Py_EQ) == 1) {
1975 if (it->gettext) {
Eli Benderskye6174ca2013-01-10 06:27:53 -08001976 PyObject *text = element_get_text(it->root_element);
1977 if (!text)
1978 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03001979 if (PyObject_IsTrue(text)) {
1980 Py_INCREF(text);
1981 return text;
1982 }
1983 } else {
1984 Py_INCREF(it->root_element);
1985 return (PyObject *)it->root_element;
1986 }
1987 }
1988 }
1989 }
1990
1991 /* See if there are children left to traverse in the current parent. If
1992 * yes, visit the next child. If not, pop the stack and try again.
1993 */
Eli Bendersky113da642012-06-15 07:52:49 +03001994 cur_parent = it->parent_stack->parent;
1995 child_index = it->parent_stack->child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03001996 if (cur_parent->extra && child_index < cur_parent->extra->length) {
1997 ElementObject *child = (ElementObject *)
1998 cur_parent->extra->children[child_index];
1999 it->parent_stack->child_index++;
2000 it->parent_stack = parent_stack_push_new(it->parent_stack,
2001 child);
2002 if (!it->parent_stack) {
2003 PyErr_NoMemory();
2004 return NULL;
2005 }
2006
2007 if (it->gettext) {
Eli Benderskye6174ca2013-01-10 06:27:53 -08002008 PyObject *text = element_get_text(child);
2009 if (!text)
2010 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002011 if (PyObject_IsTrue(text)) {
2012 Py_INCREF(text);
2013 return text;
2014 }
2015 } else if (it->sought_tag == Py_None ||
2016 PyObject_RichCompareBool(child->tag,
2017 it->sought_tag, Py_EQ) == 1) {
2018 Py_INCREF(child);
2019 return (PyObject *)child;
2020 }
2021 else
2022 continue;
2023 }
2024 else {
Eli Benderskye6174ca2013-01-10 06:27:53 -08002025 PyObject *tail;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002026 ParentLocator *next = it->parent_stack->next;
Eli Benderskye6174ca2013-01-10 06:27:53 -08002027 if (it->gettext) {
2028 tail = element_get_tail(cur_parent);
2029 if (!tail)
2030 return NULL;
2031 }
2032 else
2033 tail = Py_None;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002034 Py_XDECREF(it->parent_stack->parent);
2035 PyObject_Free(it->parent_stack);
2036 it->parent_stack = next;
2037
2038 /* Note that extra condition on it->parent_stack->parent here;
2039 * this is because itertext() is supposed to only return *inner*
2040 * text, not text following the element it began iteration with.
2041 */
2042 if (it->parent_stack->parent && PyObject_IsTrue(tail)) {
2043 Py_INCREF(tail);
2044 return tail;
2045 }
2046 }
2047 }
2048
2049 return NULL;
2050}
2051
2052
2053static PyTypeObject ElementIter_Type = {
2054 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002055 /* Using the module's name since the pure-Python implementation does not
2056 have such a type. */
Eli Bendersky64d11e62012-06-15 07:42:50 +03002057 "_elementtree._element_iterator", /* tp_name */
2058 sizeof(ElementIterObject), /* tp_basicsize */
2059 0, /* tp_itemsize */
2060 /* methods */
2061 (destructor)elementiter_dealloc, /* tp_dealloc */
2062 0, /* tp_print */
2063 0, /* tp_getattr */
2064 0, /* tp_setattr */
2065 0, /* tp_reserved */
2066 0, /* tp_repr */
2067 0, /* tp_as_number */
2068 0, /* tp_as_sequence */
2069 0, /* tp_as_mapping */
2070 0, /* tp_hash */
2071 0, /* tp_call */
2072 0, /* tp_str */
2073 0, /* tp_getattro */
2074 0, /* tp_setattro */
2075 0, /* tp_as_buffer */
2076 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2077 0, /* tp_doc */
2078 (traverseproc)elementiter_traverse, /* tp_traverse */
2079 0, /* tp_clear */
2080 0, /* tp_richcompare */
2081 0, /* tp_weaklistoffset */
2082 PyObject_SelfIter, /* tp_iter */
2083 (iternextfunc)elementiter_next, /* tp_iternext */
2084 0, /* tp_methods */
2085 0, /* tp_members */
2086 0, /* tp_getset */
2087 0, /* tp_base */
2088 0, /* tp_dict */
2089 0, /* tp_descr_get */
2090 0, /* tp_descr_set */
2091 0, /* tp_dictoffset */
2092 0, /* tp_init */
2093 0, /* tp_alloc */
2094 0, /* tp_new */
2095};
2096
2097
2098static PyObject *
2099create_elementiter(ElementObject *self, PyObject *tag, int gettext)
2100{
2101 ElementIterObject *it;
2102 PyObject *star = NULL;
2103
2104 it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
2105 if (!it)
2106 return NULL;
2107 if (!(it->parent_stack = PyObject_Malloc(sizeof(ParentLocator)))) {
2108 PyObject_GC_Del(it);
2109 return NULL;
2110 }
2111
2112 it->parent_stack->parent = NULL;
2113 it->parent_stack->child_index = 0;
2114 it->parent_stack->next = NULL;
2115
2116 if (PyUnicode_Check(tag))
2117 star = PyUnicode_FromString("*");
2118 else if (PyBytes_Check(tag))
2119 star = PyBytes_FromString("*");
2120
2121 if (star && PyObject_RichCompareBool(tag, star, Py_EQ) == 1)
2122 tag = Py_None;
2123
2124 Py_XDECREF(star);
2125 it->sought_tag = tag;
2126 it->root_done = 0;
2127 it->gettext = gettext;
2128 it->root_element = self;
2129
2130 Py_INCREF(self);
2131 Py_INCREF(tag);
2132
2133 PyObject_GC_Track(it);
2134 return (PyObject *)it;
2135}
2136
2137
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002138/* ==================================================================== */
2139/* the tree builder type */
2140
2141typedef struct {
2142 PyObject_HEAD
2143
Eli Bendersky58d548d2012-05-29 15:45:16 +03002144 PyObject *root; /* root node (first created node) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002145
Antoine Pitrouee329312012-10-04 19:53:29 +02002146 PyObject *this; /* current node */
2147 PyObject *last; /* most recently created node */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002148
Eli Bendersky58d548d2012-05-29 15:45:16 +03002149 PyObject *data; /* data collector (string or list), or NULL */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002150
Eli Bendersky58d548d2012-05-29 15:45:16 +03002151 PyObject *stack; /* element stack */
2152 Py_ssize_t index; /* current stack size (0 means empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002153
Eli Bendersky48d358b2012-05-30 17:57:50 +03002154 PyObject *element_factory;
2155
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002156 /* element tracing */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002157 PyObject *events; /* list of events, or NULL if not collecting */
2158 PyObject *start_event_obj; /* event objects (NULL to ignore) */
2159 PyObject *end_event_obj;
2160 PyObject *start_ns_event_obj;
2161 PyObject *end_ns_event_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002162} TreeBuilderObject;
2163
Neal Norwitz227b5332006-03-22 09:28:35 +00002164static PyTypeObject TreeBuilder_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002165
Christian Heimes90aa7642007-12-19 02:45:37 +00002166#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002167
2168/* -------------------------------------------------------------------- */
2169/* constructor and destructor */
2170
Eli Bendersky58d548d2012-05-29 15:45:16 +03002171static PyObject *
2172treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002173{
Eli Bendersky58d548d2012-05-29 15:45:16 +03002174 TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
2175 if (t != NULL) {
2176 t->root = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002177
Eli Bendersky58d548d2012-05-29 15:45:16 +03002178 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002179 t->this = Py_None;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002180 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002181 t->last = Py_None;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002182
Eli Bendersky58d548d2012-05-29 15:45:16 +03002183 t->data = NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002184 t->element_factory = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002185 t->stack = PyList_New(20);
2186 if (!t->stack) {
2187 Py_DECREF(t->this);
2188 Py_DECREF(t->last);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002189 Py_DECREF((PyObject *) t);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002190 return NULL;
2191 }
2192 t->index = 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002193
Eli Bendersky58d548d2012-05-29 15:45:16 +03002194 t->events = NULL;
2195 t->start_event_obj = t->end_event_obj = NULL;
2196 t->start_ns_event_obj = t->end_ns_event_obj = NULL;
2197 }
2198 return (PyObject *)t;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002199}
2200
Eli Bendersky58d548d2012-05-29 15:45:16 +03002201static int
2202treebuilder_init(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002203{
Eli Benderskyc68e1362012-06-03 06:09:42 +03002204 static char *kwlist[] = {"element_factory", 0};
Eli Bendersky48d358b2012-05-30 17:57:50 +03002205 PyObject *element_factory = NULL;
2206 TreeBuilderObject *self_tb = (TreeBuilderObject *)self;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002207 PyObject *tmp;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002208
2209 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:TreeBuilder", kwlist,
2210 &element_factory)) {
2211 return -1;
2212 }
2213
2214 if (element_factory) {
2215 Py_INCREF(element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002216 tmp = self_tb->element_factory;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002217 self_tb->element_factory = element_factory;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002218 Py_XDECREF(tmp);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002219 }
2220
Eli Bendersky58d548d2012-05-29 15:45:16 +03002221 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002222}
2223
Eli Bendersky48d358b2012-05-30 17:57:50 +03002224static int
2225treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
2226{
2227 Py_VISIT(self->root);
2228 Py_VISIT(self->this);
2229 Py_VISIT(self->last);
2230 Py_VISIT(self->data);
2231 Py_VISIT(self->stack);
2232 Py_VISIT(self->element_factory);
2233 return 0;
2234}
2235
2236static int
2237treebuilder_gc_clear(TreeBuilderObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002238{
Antoine Pitrouc1948842012-10-01 23:40:37 +02002239 Py_CLEAR(self->end_ns_event_obj);
2240 Py_CLEAR(self->start_ns_event_obj);
2241 Py_CLEAR(self->end_event_obj);
2242 Py_CLEAR(self->start_event_obj);
2243 Py_CLEAR(self->events);
2244 Py_CLEAR(self->stack);
2245 Py_CLEAR(self->data);
2246 Py_CLEAR(self->last);
2247 Py_CLEAR(self->this);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002248 Py_CLEAR(self->element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002249 Py_CLEAR(self->root);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002250 return 0;
2251}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002252
Eli Bendersky48d358b2012-05-30 17:57:50 +03002253static void
2254treebuilder_dealloc(TreeBuilderObject *self)
2255{
2256 PyObject_GC_UnTrack(self);
2257 treebuilder_gc_clear(self);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002258 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002259}
2260
2261/* -------------------------------------------------------------------- */
Antoine Pitrouee329312012-10-04 19:53:29 +02002262/* helpers for handling of arbitrary element-like objects */
2263
2264static int
2265treebuilder_set_element_text_or_tail(PyObject *element, PyObject *data,
2266 PyObject **dest, _Py_Identifier *name)
2267{
2268 if (Element_CheckExact(element)) {
2269 Py_DECREF(JOIN_OBJ(*dest));
2270 *dest = JOIN_SET(data, PyList_CheckExact(data));
2271 return 0;
2272 }
2273 else {
2274 PyObject *joined = list_join(data);
2275 int r;
2276 if (joined == NULL)
2277 return -1;
2278 r = _PyObject_SetAttrId(element, name, joined);
2279 Py_DECREF(joined);
2280 return r;
2281 }
2282}
2283
2284/* These two functions steal a reference to data */
2285static int
2286treebuilder_set_element_text(PyObject *element, PyObject *data)
2287{
2288 _Py_IDENTIFIER(text);
2289 return treebuilder_set_element_text_or_tail(
2290 element, data, &((ElementObject *) element)->text, &PyId_text);
2291}
2292
2293static int
2294treebuilder_set_element_tail(PyObject *element, PyObject *data)
2295{
2296 _Py_IDENTIFIER(tail);
2297 return treebuilder_set_element_text_or_tail(
2298 element, data, &((ElementObject *) element)->tail, &PyId_tail);
2299}
2300
2301static int
2302treebuilder_add_subelement(PyObject *element, PyObject *child)
2303{
2304 _Py_IDENTIFIER(append);
2305 if (Element_CheckExact(element)) {
2306 ElementObject *elem = (ElementObject *) element;
2307 return element_add_subelement(elem, child);
2308 }
2309 else {
2310 PyObject *res;
2311 res = _PyObject_CallMethodId(element, &PyId_append, "O", child);
2312 if (res == NULL)
2313 return -1;
2314 Py_DECREF(res);
2315 return 0;
2316 }
2317}
2318
2319/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002320/* handlers */
2321
2322LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002323treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
2324 PyObject* attrib)
2325{
2326 PyObject* node;
2327 PyObject* this;
2328
2329 if (self->data) {
2330 if (self->this == self->last) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002331 if (treebuilder_set_element_text(self->last, self->data))
2332 return NULL;
2333 }
2334 else {
2335 if (treebuilder_set_element_tail(self->last, self->data))
2336 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002337 }
2338 self->data = NULL;
2339 }
2340
Eli Bendersky48d358b2012-05-30 17:57:50 +03002341 if (self->element_factory) {
2342 node = PyObject_CallFunction(self->element_factory, "OO", tag, attrib);
2343 } else {
2344 node = create_new_element(tag, attrib);
2345 }
2346 if (!node) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002347 return NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002348 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002349
Antoine Pitrouee329312012-10-04 19:53:29 +02002350 this = self->this;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002351
2352 if (this != Py_None) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002353 if (treebuilder_add_subelement(this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002354 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002355 } else {
2356 if (self->root) {
2357 PyErr_SetString(
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002358 elementtree_parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002359 "multiple elements on top level"
2360 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002361 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002362 }
2363 Py_INCREF(node);
2364 self->root = node;
2365 }
2366
2367 if (self->index < PyList_GET_SIZE(self->stack)) {
2368 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002369 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002370 Py_INCREF(this);
2371 } else {
2372 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002373 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002374 }
2375 self->index++;
2376
2377 Py_DECREF(this);
2378 Py_INCREF(node);
Antoine Pitrouee329312012-10-04 19:53:29 +02002379 self->this = node;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002380
2381 Py_DECREF(self->last);
2382 Py_INCREF(node);
Antoine Pitrouee329312012-10-04 19:53:29 +02002383 self->last = node;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002384
2385 if (self->start_event_obj) {
2386 PyObject* res;
2387 PyObject* action = self->start_event_obj;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002388 res = PyTuple_Pack(2, action, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002389 if (res) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002390 PyList_Append(self->events, res);
2391 Py_DECREF(res);
2392 } else
2393 PyErr_Clear(); /* FIXME: propagate error */
2394 }
2395
2396 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002397
2398 error:
2399 Py_DECREF(node);
2400 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002401}
2402
2403LOCAL(PyObject*)
2404treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
2405{
2406 if (!self->data) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002407 if (self->last == Py_None) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00002408 /* ignore calls to data before the first call to start */
2409 Py_RETURN_NONE;
2410 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002411 /* store the first item as is */
2412 Py_INCREF(data); self->data = data;
2413 } else {
2414 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00002415 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
2416 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002417 /* XXX this code path unused in Python 3? */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002418 /* expat often generates single character data sections; handle
2419 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00002420 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
2421 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002422 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00002423 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002424 } else if (PyList_CheckExact(self->data)) {
2425 if (PyList_Append(self->data, data) < 0)
2426 return NULL;
2427 } else {
2428 PyObject* list = PyList_New(2);
2429 if (!list)
2430 return NULL;
2431 PyList_SET_ITEM(list, 0, self->data);
2432 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2433 self->data = list;
2434 }
2435 }
2436
2437 Py_RETURN_NONE;
2438}
2439
2440LOCAL(PyObject*)
2441treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2442{
2443 PyObject* item;
2444
2445 if (self->data) {
2446 if (self->this == self->last) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002447 if (treebuilder_set_element_text(self->last, self->data))
2448 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002449 } else {
Antoine Pitrouee329312012-10-04 19:53:29 +02002450 if (treebuilder_set_element_tail(self->last, self->data))
2451 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002452 }
2453 self->data = NULL;
2454 }
2455
2456 if (self->index == 0) {
2457 PyErr_SetString(
2458 PyExc_IndexError,
2459 "pop from empty stack"
2460 );
2461 return NULL;
2462 }
2463
2464 self->index--;
2465
2466 item = PyList_GET_ITEM(self->stack, self->index);
2467 Py_INCREF(item);
2468
2469 Py_DECREF(self->last);
2470
Antoine Pitrouee329312012-10-04 19:53:29 +02002471 self->last = self->this;
2472 self->this = item;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002473
2474 if (self->end_event_obj) {
2475 PyObject* res;
2476 PyObject* action = self->end_event_obj;
2477 PyObject* node = (PyObject*) self->last;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002478 res = PyTuple_Pack(2, action, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002479 if (res) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002480 PyList_Append(self->events, res);
2481 Py_DECREF(res);
2482 } else
2483 PyErr_Clear(); /* FIXME: propagate error */
2484 }
2485
2486 Py_INCREF(self->last);
2487 return (PyObject*) self->last;
2488}
2489
2490LOCAL(void)
2491treebuilder_handle_namespace(TreeBuilderObject* self, int start,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002492 PyObject *prefix, PyObject *uri)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002493{
2494 PyObject* res;
2495 PyObject* action;
2496 PyObject* parcel;
2497
2498 if (!self->events)
2499 return;
2500
2501 if (start) {
2502 if (!self->start_ns_event_obj)
2503 return;
2504 action = self->start_ns_event_obj;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002505 parcel = Py_BuildValue("OO", prefix, uri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002506 if (!parcel)
2507 return;
2508 Py_INCREF(action);
2509 } else {
2510 if (!self->end_ns_event_obj)
2511 return;
2512 action = self->end_ns_event_obj;
2513 Py_INCREF(action);
2514 parcel = Py_None;
2515 Py_INCREF(parcel);
2516 }
2517
2518 res = PyTuple_New(2);
2519
2520 if (res) {
2521 PyTuple_SET_ITEM(res, 0, action);
2522 PyTuple_SET_ITEM(res, 1, parcel);
2523 PyList_Append(self->events, res);
2524 Py_DECREF(res);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002525 }
2526 else {
2527 Py_DECREF(action);
2528 Py_DECREF(parcel);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002529 PyErr_Clear(); /* FIXME: propagate error */
Antoine Pitrouc1948842012-10-01 23:40:37 +02002530 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002531}
2532
2533/* -------------------------------------------------------------------- */
2534/* methods (in alphabetical order) */
2535
2536static PyObject*
2537treebuilder_data(TreeBuilderObject* self, PyObject* args)
2538{
2539 PyObject* data;
2540 if (!PyArg_ParseTuple(args, "O:data", &data))
2541 return NULL;
2542
2543 return treebuilder_handle_data(self, data);
2544}
2545
2546static PyObject*
2547treebuilder_end(TreeBuilderObject* self, PyObject* args)
2548{
2549 PyObject* tag;
2550 if (!PyArg_ParseTuple(args, "O:end", &tag))
2551 return NULL;
2552
2553 return treebuilder_handle_end(self, tag);
2554}
2555
2556LOCAL(PyObject*)
2557treebuilder_done(TreeBuilderObject* self)
2558{
2559 PyObject* res;
2560
2561 /* FIXME: check stack size? */
2562
2563 if (self->root)
2564 res = self->root;
2565 else
2566 res = Py_None;
2567
2568 Py_INCREF(res);
2569 return res;
2570}
2571
2572static PyObject*
2573treebuilder_close(TreeBuilderObject* self, PyObject* args)
2574{
2575 if (!PyArg_ParseTuple(args, ":close"))
2576 return NULL;
2577
2578 return treebuilder_done(self);
2579}
2580
2581static PyObject*
2582treebuilder_start(TreeBuilderObject* self, PyObject* args)
2583{
2584 PyObject* tag;
2585 PyObject* attrib = Py_None;
2586 if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib))
2587 return NULL;
2588
2589 return treebuilder_handle_start(self, tag, attrib);
2590}
2591
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002592static PyMethodDef treebuilder_methods[] = {
2593 {"data", (PyCFunction) treebuilder_data, METH_VARARGS},
2594 {"start", (PyCFunction) treebuilder_start, METH_VARARGS},
2595 {"end", (PyCFunction) treebuilder_end, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002596 {"close", (PyCFunction) treebuilder_close, METH_VARARGS},
2597 {NULL, NULL}
2598};
2599
Neal Norwitz227b5332006-03-22 09:28:35 +00002600static PyTypeObject TreeBuilder_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002601 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002602 "xml.etree.ElementTree.TreeBuilder", sizeof(TreeBuilderObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002603 /* methods */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002604 (destructor)treebuilder_dealloc, /* tp_dealloc */
2605 0, /* tp_print */
2606 0, /* tp_getattr */
2607 0, /* tp_setattr */
2608 0, /* tp_reserved */
2609 0, /* tp_repr */
2610 0, /* tp_as_number */
2611 0, /* tp_as_sequence */
2612 0, /* tp_as_mapping */
2613 0, /* tp_hash */
2614 0, /* tp_call */
2615 0, /* tp_str */
2616 0, /* tp_getattro */
2617 0, /* tp_setattro */
2618 0, /* tp_as_buffer */
Eli Bendersky48d358b2012-05-30 17:57:50 +03002619 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
2620 /* tp_flags */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002621 0, /* tp_doc */
Eli Bendersky48d358b2012-05-30 17:57:50 +03002622 (traverseproc)treebuilder_gc_traverse, /* tp_traverse */
2623 (inquiry)treebuilder_gc_clear, /* tp_clear */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002624 0, /* tp_richcompare */
2625 0, /* tp_weaklistoffset */
2626 0, /* tp_iter */
2627 0, /* tp_iternext */
2628 treebuilder_methods, /* tp_methods */
2629 0, /* tp_members */
2630 0, /* tp_getset */
2631 0, /* tp_base */
2632 0, /* tp_dict */
2633 0, /* tp_descr_get */
2634 0, /* tp_descr_set */
2635 0, /* tp_dictoffset */
2636 (initproc)treebuilder_init, /* tp_init */
2637 PyType_GenericAlloc, /* tp_alloc */
2638 treebuilder_new, /* tp_new */
2639 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002640};
2641
2642/* ==================================================================== */
2643/* the expat interface */
2644
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002645#include "expat.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002646#include "pyexpat.h"
Eli Bendersky20d41742012-06-01 09:48:37 +03002647static struct PyExpat_CAPI *expat_capi;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002648#define EXPAT(func) (expat_capi->func)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002649
Eli Bendersky52467b12012-06-01 07:13:08 +03002650static XML_Memory_Handling_Suite ExpatMemoryHandler = {
2651 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
2652
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002653typedef struct {
2654 PyObject_HEAD
2655
2656 XML_Parser parser;
2657
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002658 PyObject *target;
2659 PyObject *entity;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002660
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002661 PyObject *names;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002662
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002663 PyObject *handle_start;
2664 PyObject *handle_data;
2665 PyObject *handle_end;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002666
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002667 PyObject *handle_comment;
2668 PyObject *handle_pi;
2669 PyObject *handle_doctype;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002670
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002671 PyObject *handle_close;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002672
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002673} XMLParserObject;
2674
Neal Norwitz227b5332006-03-22 09:28:35 +00002675static PyTypeObject XMLParser_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002676
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002677#define XMLParser_CheckExact(op) (Py_TYPE(op) == &XMLParser_Type)
2678
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002679/* helpers */
2680
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002681LOCAL(PyObject*)
2682makeuniversal(XMLParserObject* self, const char* string)
2683{
2684 /* convert a UTF-8 tag/attribute name from the expat parser
2685 to a universal name string */
2686
Antoine Pitrouc1948842012-10-01 23:40:37 +02002687 Py_ssize_t size = (Py_ssize_t) strlen(string);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002688 PyObject* key;
2689 PyObject* value;
2690
2691 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00002692 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002693 if (!key)
2694 return NULL;
2695
2696 value = PyDict_GetItem(self->names, key);
2697
2698 if (value) {
2699 Py_INCREF(value);
2700 } else {
2701 /* new name. convert to universal name, and decode as
2702 necessary */
2703
2704 PyObject* tag;
2705 char* p;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002706 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002707
2708 /* look for namespace separator */
2709 for (i = 0; i < size; i++)
2710 if (string[i] == '}')
2711 break;
2712 if (i != size) {
2713 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002714 tag = PyBytes_FromStringAndSize(NULL, size+1);
2715 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002716 p[0] = '{';
2717 memcpy(p+1, string, size);
2718 size++;
2719 } else {
2720 /* plain name; use key as tag */
2721 Py_INCREF(key);
2722 tag = key;
2723 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002724
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002725 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002726 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00002727 value = PyUnicode_DecodeUTF8(p, size, "strict");
2728 Py_DECREF(tag);
2729 if (!value) {
2730 Py_DECREF(key);
2731 return NULL;
2732 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002733
2734 /* add to names dictionary */
2735 if (PyDict_SetItem(self->names, key, value) < 0) {
2736 Py_DECREF(key);
2737 Py_DECREF(value);
2738 return NULL;
2739 }
2740 }
2741
2742 Py_DECREF(key);
2743 return value;
2744}
2745
Eli Bendersky5b77d812012-03-16 08:20:05 +02002746/* Set the ParseError exception with the given parameters.
2747 * If message is not NULL, it's used as the error string. Otherwise, the
2748 * message string is the default for the given error_code.
2749*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002750static void
Eli Bendersky5b77d812012-03-16 08:20:05 +02002751expat_set_error(enum XML_Error error_code, int line, int column, char *message)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002752{
Eli Bendersky5b77d812012-03-16 08:20:05 +02002753 PyObject *errmsg, *error, *position, *code;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002754
Victor Stinner499dfcf2011-03-21 13:26:24 +01002755 errmsg = PyUnicode_FromFormat("%s: line %d, column %d",
Eli Bendersky5b77d812012-03-16 08:20:05 +02002756 message ? message : EXPAT(ErrorString)(error_code),
2757 line, column);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002758 if (errmsg == NULL)
2759 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002760
Victor Stinner499dfcf2011-03-21 13:26:24 +01002761 error = PyObject_CallFunction(elementtree_parseerror_obj, "O", errmsg);
2762 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002763 if (!error)
2764 return;
2765
Eli Bendersky5b77d812012-03-16 08:20:05 +02002766 /* Add code and position attributes */
2767 code = PyLong_FromLong((long)error_code);
2768 if (!code) {
2769 Py_DECREF(error);
2770 return;
2771 }
2772 if (PyObject_SetAttrString(error, "code", code) == -1) {
2773 Py_DECREF(error);
2774 Py_DECREF(code);
2775 return;
2776 }
2777 Py_DECREF(code);
2778
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002779 position = Py_BuildValue("(ii)", line, column);
2780 if (!position) {
2781 Py_DECREF(error);
2782 return;
2783 }
2784 if (PyObject_SetAttrString(error, "position", position) == -1) {
2785 Py_DECREF(error);
2786 Py_DECREF(position);
2787 return;
2788 }
2789 Py_DECREF(position);
2790
2791 PyErr_SetObject(elementtree_parseerror_obj, error);
2792 Py_DECREF(error);
2793}
2794
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002795/* -------------------------------------------------------------------- */
2796/* handlers */
2797
2798static void
2799expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2800 int data_len)
2801{
2802 PyObject* key;
2803 PyObject* value;
2804 PyObject* res;
2805
2806 if (data_len < 2 || data_in[0] != '&')
2807 return;
2808
Neal Norwitz0269b912007-08-08 06:56:02 +00002809 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002810 if (!key)
2811 return;
2812
2813 value = PyDict_GetItem(self->entity, key);
2814
2815 if (value) {
2816 if (TreeBuilder_CheckExact(self->target))
2817 res = treebuilder_handle_data(
2818 (TreeBuilderObject*) self->target, value
2819 );
2820 else if (self->handle_data)
2821 res = PyObject_CallFunction(self->handle_data, "O", value);
2822 else
2823 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002824 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002825 } else if (!PyErr_Occurred()) {
2826 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00002827 char message[128] = "undefined entity ";
2828 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002829 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02002830 XML_ERROR_UNDEFINED_ENTITY,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002831 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02002832 EXPAT(GetErrorColumnNumber)(self->parser),
2833 message
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002834 );
2835 }
2836
2837 Py_DECREF(key);
2838}
2839
2840static void
2841expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2842 const XML_Char **attrib_in)
2843{
2844 PyObject* res;
2845 PyObject* tag;
2846 PyObject* attrib;
2847 int ok;
2848
2849 /* tag name */
2850 tag = makeuniversal(self, tag_in);
2851 if (!tag)
2852 return; /* parser will look for errors */
2853
2854 /* attributes */
2855 if (attrib_in[0]) {
2856 attrib = PyDict_New();
2857 if (!attrib)
2858 return;
2859 while (attrib_in[0] && attrib_in[1]) {
2860 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00002861 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002862 if (!key || !value) {
2863 Py_XDECREF(value);
2864 Py_XDECREF(key);
2865 Py_DECREF(attrib);
2866 return;
2867 }
2868 ok = PyDict_SetItem(attrib, key, value);
2869 Py_DECREF(value);
2870 Py_DECREF(key);
2871 if (ok < 0) {
2872 Py_DECREF(attrib);
2873 return;
2874 }
2875 attrib_in += 2;
2876 }
2877 } else {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002878 /* Pass an empty dictionary on */
Eli Bendersky48d358b2012-05-30 17:57:50 +03002879 attrib = PyDict_New();
2880 if (!attrib)
2881 return;
2882 }
2883
2884 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002885 /* shortcut */
2886 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2887 tag, attrib);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002888 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002889 else if (self->handle_start) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002890 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002891 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002892 res = NULL;
2893
2894 Py_DECREF(tag);
2895 Py_DECREF(attrib);
2896
2897 Py_XDECREF(res);
2898}
2899
2900static void
2901expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2902 int data_len)
2903{
2904 PyObject* data;
2905 PyObject* res;
2906
Neal Norwitz0269b912007-08-08 06:56:02 +00002907 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002908 if (!data)
2909 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002910
2911 if (TreeBuilder_CheckExact(self->target))
2912 /* shortcut */
2913 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
2914 else if (self->handle_data)
2915 res = PyObject_CallFunction(self->handle_data, "O", data);
2916 else
2917 res = NULL;
2918
2919 Py_DECREF(data);
2920
2921 Py_XDECREF(res);
2922}
2923
2924static void
2925expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
2926{
2927 PyObject* tag;
2928 PyObject* res = NULL;
2929
2930 if (TreeBuilder_CheckExact(self->target))
2931 /* shortcut */
2932 /* the standard tree builder doesn't look at the end tag */
2933 res = treebuilder_handle_end(
2934 (TreeBuilderObject*) self->target, Py_None
2935 );
2936 else if (self->handle_end) {
2937 tag = makeuniversal(self, tag_in);
2938 if (tag) {
2939 res = PyObject_CallFunction(self->handle_end, "O", tag);
2940 Py_DECREF(tag);
2941 }
2942 }
2943
2944 Py_XDECREF(res);
2945}
2946
2947static void
2948expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
2949 const XML_Char *uri)
2950{
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002951 PyObject* sprefix = NULL;
2952 PyObject* suri = NULL;
2953
2954 suri = PyUnicode_DecodeUTF8(uri, strlen(uri), "strict");
2955 if (!suri)
2956 return;
2957
2958 if (prefix)
2959 sprefix = PyUnicode_DecodeUTF8(prefix, strlen(prefix), "strict");
2960 else
2961 sprefix = PyUnicode_FromString("");
2962 if (!sprefix) {
2963 Py_DECREF(suri);
2964 return;
2965 }
2966
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002967 treebuilder_handle_namespace(
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002968 (TreeBuilderObject*) self->target, 1, sprefix, suri
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002969 );
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002970
2971 Py_DECREF(sprefix);
2972 Py_DECREF(suri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002973}
2974
2975static void
2976expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
2977{
2978 treebuilder_handle_namespace(
2979 (TreeBuilderObject*) self->target, 0, NULL, NULL
2980 );
2981}
2982
2983static void
2984expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
2985{
2986 PyObject* comment;
2987 PyObject* res;
2988
2989 if (self->handle_comment) {
Neal Norwitz0269b912007-08-08 06:56:02 +00002990 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002991 if (comment) {
2992 res = PyObject_CallFunction(self->handle_comment, "O", comment);
2993 Py_XDECREF(res);
2994 Py_DECREF(comment);
2995 }
2996 }
2997}
2998
Eli Bendersky45839902013-01-13 05:14:47 -08002999static void
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003000expat_start_doctype_handler(XMLParserObject *self,
3001 const XML_Char *doctype_name,
3002 const XML_Char *sysid,
3003 const XML_Char *pubid,
3004 int has_internal_subset)
3005{
3006 PyObject *self_pyobj = (PyObject *)self;
3007 PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
3008 PyObject *parser_doctype = NULL;
3009 PyObject *res = NULL;
3010
3011 doctype_name_obj = makeuniversal(self, doctype_name);
3012 if (!doctype_name_obj)
3013 return;
3014
3015 if (sysid) {
3016 sysid_obj = makeuniversal(self, sysid);
3017 if (!sysid_obj) {
3018 Py_DECREF(doctype_name_obj);
3019 return;
3020 }
3021 } else {
3022 Py_INCREF(Py_None);
3023 sysid_obj = Py_None;
3024 }
3025
3026 if (pubid) {
3027 pubid_obj = makeuniversal(self, pubid);
3028 if (!pubid_obj) {
3029 Py_DECREF(doctype_name_obj);
3030 Py_DECREF(sysid_obj);
3031 return;
3032 }
3033 } else {
3034 Py_INCREF(Py_None);
3035 pubid_obj = Py_None;
3036 }
3037
3038 /* If the target has a handler for doctype, call it. */
3039 if (self->handle_doctype) {
3040 res = PyObject_CallFunction(self->handle_doctype, "OOO",
3041 doctype_name_obj, pubid_obj, sysid_obj);
3042 Py_CLEAR(res);
3043 }
3044
3045 /* Now see if the parser itself has a doctype method. If yes and it's
3046 * a subclass, call it but warn about deprecation. If it's not a subclass
3047 * (i.e. vanilla XMLParser), do nothing.
3048 */
3049 parser_doctype = PyObject_GetAttrString(self_pyobj, "doctype");
3050 if (parser_doctype) {
3051 if (!XMLParser_CheckExact(self_pyobj)) {
3052 if (PyErr_WarnEx(PyExc_DeprecationWarning,
3053 "This method of XMLParser is deprecated. Define"
3054 " doctype() method on the TreeBuilder target.",
3055 1) < 0) {
3056 goto clear;
3057 }
3058 res = PyObject_CallFunction(parser_doctype, "OOO",
3059 doctype_name_obj, pubid_obj, sysid_obj);
3060 Py_CLEAR(res);
3061 }
3062 }
3063
3064clear:
3065 Py_XDECREF(parser_doctype);
3066 Py_DECREF(doctype_name_obj);
3067 Py_DECREF(pubid_obj);
3068 Py_DECREF(sysid_obj);
3069}
3070
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003071static void
3072expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
3073 const XML_Char* data_in)
3074{
3075 PyObject* target;
3076 PyObject* data;
3077 PyObject* res;
3078
3079 if (self->handle_pi) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003080 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3081 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003082 if (target && data) {
3083 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
3084 Py_XDECREF(res);
3085 Py_DECREF(data);
3086 Py_DECREF(target);
3087 } else {
3088 Py_XDECREF(data);
3089 Py_XDECREF(target);
3090 }
3091 }
3092}
3093
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003094static int
3095expat_unknown_encoding_handler(XMLParserObject *self, const XML_Char *name,
3096 XML_Encoding *info)
3097{
3098 PyObject* u;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003099 unsigned char s[256];
3100 int i;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003101 void *data;
3102 unsigned int kind;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003103
3104 memset(info, 0, sizeof(XML_Encoding));
3105
3106 for (i = 0; i < 256; i++)
3107 s[i] = i;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003108
Fredrik Lundhc3389992005-12-25 11:40:19 +00003109 u = PyUnicode_Decode((char*) s, 256, name, "replace");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003110 if (!u)
3111 return XML_STATUS_ERROR;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003112 if (PyUnicode_READY(u))
3113 return XML_STATUS_ERROR;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003114
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003115 if (PyUnicode_GET_LENGTH(u) != 256) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003116 Py_DECREF(u);
3117 return XML_STATUS_ERROR;
3118 }
3119
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003120 kind = PyUnicode_KIND(u);
3121 data = PyUnicode_DATA(u);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003122 for (i = 0; i < 256; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003123 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
3124 if (ch != Py_UNICODE_REPLACEMENT_CHARACTER)
3125 info->map[i] = ch;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003126 else
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003127 info->map[i] = -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003128 }
3129
3130 Py_DECREF(u);
3131
3132 return XML_STATUS_OK;
3133}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003134
3135/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003136
Eli Bendersky52467b12012-06-01 07:13:08 +03003137static PyObject *
3138xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003139{
Eli Bendersky52467b12012-06-01 07:13:08 +03003140 XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
3141 if (self) {
3142 self->parser = NULL;
3143 self->target = self->entity = self->names = NULL;
3144 self->handle_start = self->handle_data = self->handle_end = NULL;
3145 self->handle_comment = self->handle_pi = self->handle_close = NULL;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003146 self->handle_doctype = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003147 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003148 return (PyObject *)self;
3149}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003150
Eli Bendersky52467b12012-06-01 07:13:08 +03003151static int
3152xmlparser_init(PyObject *self, PyObject *args, PyObject *kwds)
3153{
3154 XMLParserObject *self_xp = (XMLParserObject *)self;
3155 PyObject *target = NULL, *html = NULL;
3156 char *encoding = NULL;
Eli Benderskyc68e1362012-06-03 06:09:42 +03003157 static char *kwlist[] = {"html", "target", "encoding", 0};
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003158
Eli Bendersky52467b12012-06-01 07:13:08 +03003159 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|OOz:XMLParser", kwlist,
3160 &html, &target, &encoding)) {
3161 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003162 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003163
Eli Bendersky52467b12012-06-01 07:13:08 +03003164 self_xp->entity = PyDict_New();
3165 if (!self_xp->entity)
3166 return -1;
3167
3168 self_xp->names = PyDict_New();
3169 if (!self_xp->names) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02003170 Py_CLEAR(self_xp->entity);
Eli Bendersky52467b12012-06-01 07:13:08 +03003171 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003172 }
3173
Eli Bendersky52467b12012-06-01 07:13:08 +03003174 self_xp->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
3175 if (!self_xp->parser) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02003176 Py_CLEAR(self_xp->entity);
3177 Py_CLEAR(self_xp->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003178 PyErr_NoMemory();
Eli Bendersky52467b12012-06-01 07:13:08 +03003179 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003180 }
3181
Eli Bendersky52467b12012-06-01 07:13:08 +03003182 if (target) {
3183 Py_INCREF(target);
3184 } else {
Eli Bendersky58d548d2012-05-29 15:45:16 +03003185 target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003186 if (!target) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02003187 Py_CLEAR(self_xp->entity);
3188 Py_CLEAR(self_xp->names);
Eli Bendersky52467b12012-06-01 07:13:08 +03003189 EXPAT(ParserFree)(self_xp->parser);
3190 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003191 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003192 }
3193 self_xp->target = target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003194
Eli Bendersky52467b12012-06-01 07:13:08 +03003195 self_xp->handle_start = PyObject_GetAttrString(target, "start");
3196 self_xp->handle_data = PyObject_GetAttrString(target, "data");
3197 self_xp->handle_end = PyObject_GetAttrString(target, "end");
3198 self_xp->handle_comment = PyObject_GetAttrString(target, "comment");
3199 self_xp->handle_pi = PyObject_GetAttrString(target, "pi");
3200 self_xp->handle_close = PyObject_GetAttrString(target, "close");
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003201 self_xp->handle_doctype = PyObject_GetAttrString(target, "doctype");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003202
3203 PyErr_Clear();
Eli Bendersky45839902013-01-13 05:14:47 -08003204
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003205 /* configure parser */
Eli Bendersky52467b12012-06-01 07:13:08 +03003206 EXPAT(SetUserData)(self_xp->parser, self_xp);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003207 EXPAT(SetElementHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003208 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003209 (XML_StartElementHandler) expat_start_handler,
3210 (XML_EndElementHandler) expat_end_handler
3211 );
3212 EXPAT(SetDefaultHandlerExpand)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003213 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003214 (XML_DefaultHandler) expat_default_handler
3215 );
3216 EXPAT(SetCharacterDataHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003217 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003218 (XML_CharacterDataHandler) expat_data_handler
3219 );
Eli Bendersky52467b12012-06-01 07:13:08 +03003220 if (self_xp->handle_comment)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003221 EXPAT(SetCommentHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003222 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003223 (XML_CommentHandler) expat_comment_handler
3224 );
Eli Bendersky52467b12012-06-01 07:13:08 +03003225 if (self_xp->handle_pi)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003226 EXPAT(SetProcessingInstructionHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003227 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003228 (XML_ProcessingInstructionHandler) expat_pi_handler
3229 );
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003230 EXPAT(SetStartDoctypeDeclHandler)(
3231 self_xp->parser,
3232 (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
3233 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003234 EXPAT(SetUnknownEncodingHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003235 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003236 (XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL
3237 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003238
Eli Bendersky52467b12012-06-01 07:13:08 +03003239 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003240}
3241
Eli Bendersky52467b12012-06-01 07:13:08 +03003242static int
3243xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
3244{
3245 Py_VISIT(self->handle_close);
3246 Py_VISIT(self->handle_pi);
3247 Py_VISIT(self->handle_comment);
3248 Py_VISIT(self->handle_end);
3249 Py_VISIT(self->handle_data);
3250 Py_VISIT(self->handle_start);
3251
3252 Py_VISIT(self->target);
3253 Py_VISIT(self->entity);
3254 Py_VISIT(self->names);
3255
3256 return 0;
3257}
3258
3259static int
3260xmlparser_gc_clear(XMLParserObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003261{
3262 EXPAT(ParserFree)(self->parser);
3263
Antoine Pitrouc1948842012-10-01 23:40:37 +02003264 Py_CLEAR(self->handle_close);
3265 Py_CLEAR(self->handle_pi);
3266 Py_CLEAR(self->handle_comment);
3267 Py_CLEAR(self->handle_end);
3268 Py_CLEAR(self->handle_data);
3269 Py_CLEAR(self->handle_start);
3270 Py_CLEAR(self->handle_doctype);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003271
Antoine Pitrouc1948842012-10-01 23:40:37 +02003272 Py_CLEAR(self->target);
3273 Py_CLEAR(self->entity);
3274 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003275
Eli Bendersky52467b12012-06-01 07:13:08 +03003276 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003277}
3278
Eli Bendersky52467b12012-06-01 07:13:08 +03003279static void
3280xmlparser_dealloc(XMLParserObject* self)
3281{
3282 PyObject_GC_UnTrack(self);
3283 xmlparser_gc_clear(self);
3284 Py_TYPE(self)->tp_free((PyObject *)self);
3285}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003286
3287LOCAL(PyObject*)
3288expat_parse(XMLParserObject* self, char* data, int data_len, int final)
3289{
3290 int ok;
3291
3292 ok = EXPAT(Parse)(self->parser, data, data_len, final);
3293
3294 if (PyErr_Occurred())
3295 return NULL;
3296
3297 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003298 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02003299 EXPAT(GetErrorCode)(self->parser),
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003300 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02003301 EXPAT(GetErrorColumnNumber)(self->parser),
3302 NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003303 );
3304 return NULL;
3305 }
3306
3307 Py_RETURN_NONE;
3308}
3309
3310static PyObject*
3311xmlparser_close(XMLParserObject* self, PyObject* args)
3312{
3313 /* end feeding data to parser */
3314
3315 PyObject* res;
3316 if (!PyArg_ParseTuple(args, ":close"))
3317 return NULL;
3318
3319 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003320 if (!res)
3321 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003322
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003323 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003324 Py_DECREF(res);
3325 return treebuilder_done((TreeBuilderObject*) self->target);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003326 } if (self->handle_close) {
3327 Py_DECREF(res);
3328 return PyObject_CallFunction(self->handle_close, "");
3329 } else
3330 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003331}
3332
3333static PyObject*
3334xmlparser_feed(XMLParserObject* self, PyObject* args)
3335{
3336 /* feed data to parser */
3337
3338 char* data;
3339 int data_len;
3340 if (!PyArg_ParseTuple(args, "s#:feed", &data, &data_len))
3341 return NULL;
3342
3343 return expat_parse(self, data, data_len, 0);
3344}
3345
3346static PyObject*
3347xmlparser_parse(XMLParserObject* self, PyObject* args)
3348{
3349 /* (internal) parse until end of input stream */
3350
3351 PyObject* reader;
3352 PyObject* buffer;
Eli Benderskyf996e772012-03-16 05:53:30 +02003353 PyObject* temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003354 PyObject* res;
3355
3356 PyObject* fileobj;
3357 if (!PyArg_ParseTuple(args, "O:_parse", &fileobj))
3358 return NULL;
3359
3360 reader = PyObject_GetAttrString(fileobj, "read");
3361 if (!reader)
3362 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003363
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003364 /* read from open file object */
3365 for (;;) {
3366
3367 buffer = PyObject_CallFunction(reader, "i", 64*1024);
3368
3369 if (!buffer) {
3370 /* read failed (e.g. due to KeyboardInterrupt) */
3371 Py_DECREF(reader);
3372 return NULL;
3373 }
3374
Eli Benderskyf996e772012-03-16 05:53:30 +02003375 if (PyUnicode_CheckExact(buffer)) {
3376 /* A unicode object is encoded into bytes using UTF-8 */
3377 if (PyUnicode_GET_SIZE(buffer) == 0) {
3378 Py_DECREF(buffer);
3379 break;
3380 }
3381 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
Antoine Pitrouc1948842012-10-01 23:40:37 +02003382 Py_DECREF(buffer);
Eli Benderskyf996e772012-03-16 05:53:30 +02003383 if (!temp) {
3384 /* Propagate exception from PyUnicode_AsEncodedString */
Eli Benderskyf996e772012-03-16 05:53:30 +02003385 Py_DECREF(reader);
3386 return NULL;
3387 }
Eli Benderskyf996e772012-03-16 05:53:30 +02003388 buffer = temp;
3389 }
3390 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003391 Py_DECREF(buffer);
3392 break;
3393 }
3394
3395 res = expat_parse(
Christian Heimes72b710a2008-05-26 13:28:38 +00003396 self, PyBytes_AS_STRING(buffer), PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003397 );
3398
3399 Py_DECREF(buffer);
3400
3401 if (!res) {
3402 Py_DECREF(reader);
3403 return NULL;
3404 }
3405 Py_DECREF(res);
3406
3407 }
3408
3409 Py_DECREF(reader);
3410
3411 res = expat_parse(self, "", 0, 1);
3412
3413 if (res && TreeBuilder_CheckExact(self->target)) {
3414 Py_DECREF(res);
3415 return treebuilder_done((TreeBuilderObject*) self->target);
3416 }
3417
3418 return res;
3419}
3420
3421static PyObject*
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003422xmlparser_doctype(XMLParserObject *self, PyObject *args)
3423{
3424 Py_RETURN_NONE;
3425}
3426
3427static PyObject*
3428xmlparser_setevents(XMLParserObject *self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003429{
3430 /* activate element event reporting */
3431
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003432 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003433 TreeBuilderObject* target;
3434
3435 PyObject* events; /* event collector */
3436 PyObject* event_set = Py_None;
3437 if (!PyArg_ParseTuple(args, "O!|O:_setevents", &PyList_Type, &events,
3438 &event_set))
3439 return NULL;
3440
3441 if (!TreeBuilder_CheckExact(self->target)) {
3442 PyErr_SetString(
3443 PyExc_TypeError,
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003444 "event handling only supported for ElementTree.TreeBuilder "
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003445 "targets"
3446 );
3447 return NULL;
3448 }
3449
3450 target = (TreeBuilderObject*) self->target;
3451
3452 Py_INCREF(events);
3453 Py_XDECREF(target->events);
3454 target->events = events;
3455
3456 /* clear out existing events */
Antoine Pitrouc1948842012-10-01 23:40:37 +02003457 Py_CLEAR(target->start_event_obj);
3458 Py_CLEAR(target->end_event_obj);
3459 Py_CLEAR(target->start_ns_event_obj);
3460 Py_CLEAR(target->end_ns_event_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003461
3462 if (event_set == Py_None) {
3463 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003464 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003465 Py_RETURN_NONE;
3466 }
3467
3468 if (!PyTuple_Check(event_set)) /* FIXME: handle arbitrary sequences */
3469 goto error;
3470
3471 for (i = 0; i < PyTuple_GET_SIZE(event_set); i++) {
3472 PyObject* item = PyTuple_GET_ITEM(event_set, i);
3473 char* event;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003474 if (PyUnicode_Check(item)) {
3475 event = _PyUnicode_AsString(item);
Victor Stinner0477bf32010-03-22 12:11:44 +00003476 if (event == NULL)
3477 goto error;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003478 } else if (PyBytes_Check(item))
3479 event = PyBytes_AS_STRING(item);
3480 else {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003481 goto error;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003482 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003483 if (strcmp(event, "start") == 0) {
3484 Py_INCREF(item);
3485 target->start_event_obj = item;
3486 } else if (strcmp(event, "end") == 0) {
3487 Py_INCREF(item);
3488 Py_XDECREF(target->end_event_obj);
3489 target->end_event_obj = item;
3490 } else if (strcmp(event, "start-ns") == 0) {
3491 Py_INCREF(item);
3492 Py_XDECREF(target->start_ns_event_obj);
3493 target->start_ns_event_obj = item;
3494 EXPAT(SetNamespaceDeclHandler)(
3495 self->parser,
3496 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3497 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3498 );
3499 } else if (strcmp(event, "end-ns") == 0) {
3500 Py_INCREF(item);
3501 Py_XDECREF(target->end_ns_event_obj);
3502 target->end_ns_event_obj = item;
3503 EXPAT(SetNamespaceDeclHandler)(
3504 self->parser,
3505 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3506 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3507 );
3508 } else {
3509 PyErr_Format(
3510 PyExc_ValueError,
3511 "unknown event '%s'", event
3512 );
3513 return NULL;
3514 }
3515 }
3516
3517 Py_RETURN_NONE;
3518
3519 error:
3520 PyErr_SetString(
3521 PyExc_TypeError,
3522 "invalid event tuple"
3523 );
3524 return NULL;
3525}
3526
3527static PyMethodDef xmlparser_methods[] = {
3528 {"feed", (PyCFunction) xmlparser_feed, METH_VARARGS},
3529 {"close", (PyCFunction) xmlparser_close, METH_VARARGS},
3530 {"_parse", (PyCFunction) xmlparser_parse, METH_VARARGS},
3531 {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003532 {"doctype", (PyCFunction) xmlparser_doctype, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003533 {NULL, NULL}
3534};
3535
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003536static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003537xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003538{
Alexander Belopolskye239d232010-12-08 23:31:48 +00003539 if (PyUnicode_Check(nameobj)) {
3540 PyObject* res;
3541 if (PyUnicode_CompareWithASCIIString(nameobj, "entity") == 0)
3542 res = self->entity;
3543 else if (PyUnicode_CompareWithASCIIString(nameobj, "target") == 0)
3544 res = self->target;
3545 else if (PyUnicode_CompareWithASCIIString(nameobj, "version") == 0) {
3546 return PyUnicode_FromFormat(
3547 "Expat %d.%d.%d", XML_MAJOR_VERSION,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003548 XML_MINOR_VERSION, XML_MICRO_VERSION);
Alexander Belopolskye239d232010-12-08 23:31:48 +00003549 }
3550 else
3551 goto generic;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003552
Alexander Belopolskye239d232010-12-08 23:31:48 +00003553 Py_INCREF(res);
3554 return res;
3555 }
3556 generic:
3557 return PyObject_GenericGetAttr((PyObject*) self, nameobj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003558}
3559
Neal Norwitz227b5332006-03-22 09:28:35 +00003560static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003561 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08003562 "xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003563 /* methods */
Eli Bendersky52467b12012-06-01 07:13:08 +03003564 (destructor)xmlparser_dealloc, /* tp_dealloc */
3565 0, /* tp_print */
3566 0, /* tp_getattr */
3567 0, /* tp_setattr */
3568 0, /* tp_reserved */
3569 0, /* tp_repr */
3570 0, /* tp_as_number */
3571 0, /* tp_as_sequence */
3572 0, /* tp_as_mapping */
3573 0, /* tp_hash */
3574 0, /* tp_call */
3575 0, /* tp_str */
3576 (getattrofunc)xmlparser_getattro, /* tp_getattro */
3577 0, /* tp_setattro */
3578 0, /* tp_as_buffer */
3579 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3580 /* tp_flags */
3581 0, /* tp_doc */
3582 (traverseproc)xmlparser_gc_traverse, /* tp_traverse */
3583 (inquiry)xmlparser_gc_clear, /* tp_clear */
3584 0, /* tp_richcompare */
3585 0, /* tp_weaklistoffset */
3586 0, /* tp_iter */
3587 0, /* tp_iternext */
3588 xmlparser_methods, /* tp_methods */
3589 0, /* tp_members */
3590 0, /* tp_getset */
3591 0, /* tp_base */
3592 0, /* tp_dict */
3593 0, /* tp_descr_get */
3594 0, /* tp_descr_set */
3595 0, /* tp_dictoffset */
3596 (initproc)xmlparser_init, /* tp_init */
3597 PyType_GenericAlloc, /* tp_alloc */
3598 xmlparser_new, /* tp_new */
3599 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003600};
3601
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003602/* ==================================================================== */
3603/* python module interface */
3604
3605static PyMethodDef _functions[] = {
Eli Benderskya8736902013-01-05 06:26:39 -08003606 {"SubElement", (PyCFunction) subelement, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003607 {NULL, NULL}
3608};
3609
Martin v. Löwis1a214512008-06-11 05:26:20 +00003610
3611static struct PyModuleDef _elementtreemodule = {
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003612 PyModuleDef_HEAD_INIT,
3613 "_elementtree",
3614 NULL,
3615 -1,
3616 _functions,
3617 NULL,
3618 NULL,
3619 NULL,
3620 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00003621};
3622
Neal Norwitzf6657e62006-12-28 04:47:50 +00003623PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00003624PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003625{
Eli Bendersky64d11e62012-06-15 07:42:50 +03003626 PyObject *m, *temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003627
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003628 /* Initialize object types */
3629 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003630 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003631 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003632 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003633 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003634 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003635
Martin v. Löwis1a214512008-06-11 05:26:20 +00003636 m = PyModule_Create(&_elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003637 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00003638 return NULL;
3639
Eli Bendersky828efde2012-04-05 05:40:58 +03003640 if (!(temp = PyImport_ImportModule("copy")))
3641 return NULL;
3642 elementtree_deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
3643 Py_XDECREF(temp);
3644
3645 if (!(elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
3646 return NULL;
3647
Eli Bendersky20d41742012-06-01 09:48:37 +03003648 /* link against pyexpat */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003649 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
3650 if (expat_capi) {
3651 /* check that it's usable */
3652 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
3653 expat_capi->size < sizeof(struct PyExpat_CAPI) ||
3654 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
3655 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
Eli Bendersky52467b12012-06-01 07:13:08 +03003656 expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
Eli Benderskyef391ac2012-07-21 20:28:46 +03003657 PyErr_SetString(PyExc_ImportError,
3658 "pyexpat version is incompatible");
3659 return NULL;
Eli Bendersky52467b12012-06-01 07:13:08 +03003660 }
Eli Benderskyef391ac2012-07-21 20:28:46 +03003661 } else {
Eli Bendersky52467b12012-06-01 07:13:08 +03003662 return NULL;
Eli Benderskyef391ac2012-07-21 20:28:46 +03003663 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003664
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003665 elementtree_parseerror_obj = PyErr_NewException(
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003666 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003667 );
3668 Py_INCREF(elementtree_parseerror_obj);
3669 PyModule_AddObject(m, "ParseError", elementtree_parseerror_obj);
3670
Eli Bendersky092af1f2012-03-04 07:14:03 +02003671 Py_INCREF((PyObject *)&Element_Type);
3672 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
3673
Eli Bendersky58d548d2012-05-29 15:45:16 +03003674 Py_INCREF((PyObject *)&TreeBuilder_Type);
3675 PyModule_AddObject(m, "TreeBuilder", (PyObject *)&TreeBuilder_Type);
3676
Eli Bendersky52467b12012-06-01 07:13:08 +03003677 Py_INCREF((PyObject *)&XMLParser_Type);
3678 PyModule_AddObject(m, "XMLParser", (PyObject *)&XMLParser_Type);
Eli Bendersky52467b12012-06-01 07:13:08 +03003679
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003680 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003681}