blob: d984b51bed15aa966dacf75daab7d37f97226b85 [file] [log] [blame]
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001/*
2 * ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003 * $Id: _elementtree.c 3473 2009-01-11 22:53:55Z fredrik $
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
5 * elementtree accelerator
6 *
7 * History:
8 * 1999-06-20 fl created (as part of sgmlop)
9 * 2001-05-29 fl effdom edition
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000010 * 2003-02-27 fl elementtree edition (alpha)
11 * 2004-06-03 fl updates for elementtree 1.2
Florent Xiclunaf15351d2010-03-13 23:24:31 +000012 * 2005-01-05 fl major optimization effort
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000013 * 2005-01-11 fl first public release (cElementTree 0.8)
14 * 2005-01-12 fl split element object into base and extras
15 * 2005-01-13 fl use tagged pointers for tail/text (cElementTree 0.9)
16 * 2005-01-17 fl added treebuilder close method
17 * 2005-01-17 fl fixed crash in getchildren
18 * 2005-01-18 fl removed observer api, added iterparse (cElementTree 0.9.3)
19 * 2005-01-23 fl revised iterparse api; added namespace event support (0.9.8)
20 * 2005-01-26 fl added VERSION module property (cElementTree 1.0)
21 * 2005-01-28 fl added remove method (1.0.1)
22 * 2005-03-01 fl added iselement function; fixed makeelement aliasing (1.0.2)
23 * 2005-03-13 fl export Comment and ProcessingInstruction/PI helpers
24 * 2005-03-26 fl added Comment and PI support to XMLParser
25 * 2005-03-27 fl event optimizations; complain about bogus events
26 * 2005-08-08 fl fixed read error handling in parse
27 * 2005-08-11 fl added runtime test for copy workaround (1.0.3)
28 * 2005-12-13 fl added expat_capi support (for xml.etree) (1.0.4)
29 * 2005-12-16 fl added support for non-standard encodings
Fredrik Lundh44ed4db2006-03-12 21:06:35 +000030 * 2006-03-08 fl fixed a couple of potential null-refs and leaks
31 * 2006-03-12 fl merge in 2.5 ssize_t changes
Florent Xiclunaf15351d2010-03-13 23:24:31 +000032 * 2007-08-25 fl call custom builder's close method from XMLParser
33 * 2007-08-31 fl added iter, extend from ET 1.3
34 * 2007-09-01 fl fixed ParseError exception, setslice source type, etc
35 * 2007-09-03 fl fixed handling of negative insert indexes
36 * 2007-09-04 fl added itertext from ET 1.3
37 * 2007-09-06 fl added position attribute to ParseError exception
38 * 2008-06-06 fl delay error reporting in iterparse (from Hrvoje Niksic)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000039 *
Florent Xiclunaf15351d2010-03-13 23:24:31 +000040 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
41 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000042 *
43 * info@pythonware.com
44 * http://www.pythonware.com
45 */
46
Fredrik Lundh6d52b552005-12-16 22:06:43 +000047/* Licensed to PSF under a Contributor Agreement. */
Florent Xiclunaf15351d2010-03-13 23:24:31 +000048/* See http://www.python.org/psf/license for licensing details. */
Fredrik Lundh6d52b552005-12-16 22:06:43 +000049
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000050#include "Python.h"
Eli Benderskyebf37a22012-04-03 22:02:37 +030051#include "structmember.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000052
Thomas Wouters00ee7ba2006-08-21 19:07:27 +000053#define VERSION "1.0.6"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000054
55/* -------------------------------------------------------------------- */
56/* configuration */
57
58/* Leave defined to include the expat-based XMLParser type */
59#define USE_EXPAT
60
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000061/* An element can hold this many children without extra memory
62 allocations. */
63#define STATIC_CHILDREN 4
64
65/* For best performance, chose a value so that 80-90% of all nodes
66 have no more than the given number of children. Set this to zero
67 to minimize the size of the element structure itself (this only
68 helps if you have lots of leaf nodes with attributes). */
69
70/* Also note that pymalloc always allocates blocks in multiples of
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010071 eight bytes. For the current C version of ElementTree, this means
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000072 that the number of children should be an even number, at least on
73 32-bit platforms. */
74
75/* -------------------------------------------------------------------- */
76
77#if 0
78static int memory = 0;
79#define ALLOC(size, comment)\
80do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
81#define RELEASE(size, comment)\
82do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
83#else
84#define ALLOC(size, comment)
85#define RELEASE(size, comment)
86#endif
87
88/* compiler tweaks */
89#if defined(_MSC_VER)
90#define LOCAL(type) static __inline type __fastcall
91#else
92#define LOCAL(type) static type
93#endif
94
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000095/* macros used to store 'join' flags in string object pointers. note
96 that all use of text and tail as object pointers must be wrapped in
97 JOIN_OBJ. see comments in the ElementObject definition for more
98 info. */
99#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
100#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
101#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~1))
102
103/* glue functions (see the init function for details) */
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000104static PyObject* elementtree_parseerror_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000105static PyObject* elementtree_deepcopy_obj;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000106static PyObject* elementtree_iter_obj;
107static PyObject* elementtree_itertext_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000108static PyObject* elementpath_obj;
109
110/* helpers */
111
112LOCAL(PyObject*)
113deepcopy(PyObject* object, PyObject* memo)
114{
115 /* do a deep copy of the given object */
116
117 PyObject* args;
118 PyObject* result;
119
120 if (!elementtree_deepcopy_obj) {
121 PyErr_SetString(
122 PyExc_RuntimeError,
123 "deepcopy helper not found"
124 );
125 return NULL;
126 }
127
128 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000129 if (!args)
130 return NULL;
131
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000132 Py_INCREF(object); PyTuple_SET_ITEM(args, 0, (PyObject*) object);
133 Py_INCREF(memo); PyTuple_SET_ITEM(args, 1, (PyObject*) memo);
134
135 result = PyObject_CallObject(elementtree_deepcopy_obj, args);
136
137 Py_DECREF(args);
138
139 return result;
140}
141
142LOCAL(PyObject*)
143list_join(PyObject* list)
144{
145 /* join list elements (destroying the list in the process) */
146
147 PyObject* joiner;
148 PyObject* function;
149 PyObject* args;
150 PyObject* result;
151
152 switch (PyList_GET_SIZE(list)) {
153 case 0:
154 Py_DECREF(list);
Christian Heimes72b710a2008-05-26 13:28:38 +0000155 return PyBytes_FromString("");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000156 case 1:
157 result = PyList_GET_ITEM(list, 0);
158 Py_INCREF(result);
159 Py_DECREF(list);
160 return result;
161 }
162
163 /* two or more elements: slice out a suitable separator from the
164 first member, and use that to join the entire list */
165
166 joiner = PySequence_GetSlice(PyList_GET_ITEM(list, 0), 0, 0);
167 if (!joiner)
168 return NULL;
169
170 function = PyObject_GetAttrString(joiner, "join");
171 if (!function) {
172 Py_DECREF(joiner);
173 return NULL;
174 }
175
176 args = PyTuple_New(1);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000177 if (!args)
178 return NULL;
179
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000180 PyTuple_SET_ITEM(args, 0, list);
181
182 result = PyObject_CallObject(function, args);
183
184 Py_DECREF(args); /* also removes list */
185 Py_DECREF(function);
186 Py_DECREF(joiner);
187
188 return result;
189}
190
Eli Bendersky48d358b2012-05-30 17:57:50 +0300191/* Is the given object an empty dictionary?
192*/
193static int
194is_empty_dict(PyObject *obj)
195{
196 return PyDict_CheckExact(obj) && PyDict_Size(obj) == 0;
197}
198
199
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000200/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200201/* the Element type */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000202
203typedef struct {
204
205 /* attributes (a dictionary object), or None if no attributes */
206 PyObject* attrib;
207
208 /* child elements */
209 int length; /* actual number of items */
210 int allocated; /* allocated items */
211
212 /* this either points to _children or to a malloced buffer */
213 PyObject* *children;
214
215 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100216
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000217} ElementObjectExtra;
218
219typedef struct {
220 PyObject_HEAD
221
222 /* element tag (a string). */
223 PyObject* tag;
224
225 /* text before first child. note that this is a tagged pointer;
226 use JOIN_OBJ to get the object pointer. the join flag is used
227 to distinguish lists created by the tree builder from lists
228 assigned to the attribute by application code; the former
229 should be joined before being returned to the user, the latter
230 should be left intact. */
231 PyObject* text;
232
233 /* text after this element, in parent. note that this is a tagged
234 pointer; use JOIN_OBJ to get the object pointer. */
235 PyObject* tail;
236
237 ElementObjectExtra* extra;
238
Eli Benderskyebf37a22012-04-03 22:02:37 +0300239 PyObject *weakreflist; /* For tp_weaklistoffset */
240
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000241} ElementObject;
242
Neal Norwitz227b5332006-03-22 09:28:35 +0000243static PyTypeObject Element_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000244
Christian Heimes90aa7642007-12-19 02:45:37 +0000245#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000246
247/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200248/* Element constructors and destructor */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000249
250LOCAL(int)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200251create_extra(ElementObject* self, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000252{
253 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
254 if (!self->extra)
255 return -1;
256
257 if (!attrib)
258 attrib = Py_None;
259
260 Py_INCREF(attrib);
261 self->extra->attrib = attrib;
262
263 self->extra->length = 0;
264 self->extra->allocated = STATIC_CHILDREN;
265 self->extra->children = self->extra->_children;
266
267 return 0;
268}
269
270LOCAL(void)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200271dealloc_extra(ElementObject* self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000272{
Eli Bendersky08b85292012-04-04 15:55:07 +0300273 ElementObjectExtra *myextra;
274 int i;
275
Eli Benderskyebf37a22012-04-03 22:02:37 +0300276 if (!self->extra)
277 return;
278
279 /* Avoid DECREFs calling into this code again (cycles, etc.)
280 */
Eli Bendersky08b85292012-04-04 15:55:07 +0300281 myextra = self->extra;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300282 self->extra = NULL;
283
284 Py_DECREF(myextra->attrib);
285
Eli Benderskyebf37a22012-04-03 22:02:37 +0300286 for (i = 0; i < myextra->length; i++)
287 Py_DECREF(myextra->children[i]);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000288
Eli Benderskyebf37a22012-04-03 22:02:37 +0300289 if (myextra->children != myextra->_children)
290 PyObject_Free(myextra->children);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000291
Eli Benderskyebf37a22012-04-03 22:02:37 +0300292 PyObject_Free(myextra);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000293}
294
Eli Bendersky092af1f2012-03-04 07:14:03 +0200295/* Convenience internal function to create new Element objects with the given
296 * tag and attributes.
297*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000298LOCAL(PyObject*)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200299create_new_element(PyObject* tag, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000300{
301 ElementObject* self;
302
Eli Bendersky0192ba32012-03-30 16:38:33 +0300303 self = PyObject_GC_New(ElementObject, &Element_Type);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000304 if (self == NULL)
305 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000306 self->extra = NULL;
307
Eli Bendersky48d358b2012-05-30 17:57:50 +0300308 if (attrib != Py_None && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200309 if (create_extra(self, attrib) < 0) {
Thomas Wouters477c8d52006-05-27 19:21:47 +0000310 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000311 return NULL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000312 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000313 }
314
315 Py_INCREF(tag);
316 self->tag = tag;
317
318 Py_INCREF(Py_None);
319 self->text = Py_None;
320
321 Py_INCREF(Py_None);
322 self->tail = Py_None;
323
Eli Benderskyebf37a22012-04-03 22:02:37 +0300324 self->weakreflist = NULL;
325
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000326 ALLOC(sizeof(ElementObject), "create element");
Eli Bendersky0192ba32012-03-30 16:38:33 +0300327 PyObject_GC_Track(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000328 return (PyObject*) self;
329}
330
Eli Bendersky092af1f2012-03-04 07:14:03 +0200331static PyObject *
332element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
333{
334 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
335 if (e != NULL) {
336 Py_INCREF(Py_None);
337 e->tag = Py_None;
338
339 Py_INCREF(Py_None);
340 e->text = Py_None;
341
342 Py_INCREF(Py_None);
343 e->tail = Py_None;
344
345 e->extra = NULL;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300346 e->weakreflist = NULL;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200347 }
348 return (PyObject *)e;
349}
350
Eli Bendersky737b1732012-05-29 06:02:56 +0300351/* Helper function for extracting the attrib dictionary from a keywords dict.
352 * This is required by some constructors/functions in this module that can
353 * either accept attrib as a keyword argument or all attributes splashed
354 * directly into *kwds.
355 * If there is no 'attrib' keyword, return an empty dict.
356 */
357static PyObject*
358get_attrib_from_keywords(PyObject *kwds)
359{
360 PyObject *attrib_str = PyUnicode_FromString("attrib");
361 PyObject *attrib = PyDict_GetItem(kwds, attrib_str);
362
363 if (attrib) {
364 /* If attrib was found in kwds, copy its value and remove it from
365 * kwds
366 */
367 if (!PyDict_Check(attrib)) {
368 Py_DECREF(attrib_str);
369 PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
370 Py_TYPE(attrib)->tp_name);
371 return NULL;
372 }
373 attrib = PyDict_Copy(attrib);
374 PyDict_DelItem(kwds, attrib_str);
375 } else {
376 attrib = PyDict_New();
377 }
378
379 Py_DECREF(attrib_str);
380
381 if (attrib)
382 PyDict_Update(attrib, kwds);
383 return attrib;
384}
385
Eli Bendersky092af1f2012-03-04 07:14:03 +0200386static int
387element_init(PyObject *self, PyObject *args, PyObject *kwds)
388{
389 PyObject *tag;
390 PyObject *tmp;
391 PyObject *attrib = NULL;
392 ElementObject *self_elem;
393
394 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
395 return -1;
396
Eli Bendersky737b1732012-05-29 06:02:56 +0300397 if (attrib) {
398 /* attrib passed as positional arg */
399 attrib = PyDict_Copy(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200400 if (!attrib)
401 return -1;
Eli Bendersky737b1732012-05-29 06:02:56 +0300402 if (kwds) {
403 if (PyDict_Update(attrib, kwds) < 0) {
404 return -1;
405 }
406 }
407 } else if (kwds) {
408 /* have keywords args */
409 attrib = get_attrib_from_keywords(kwds);
410 if (!attrib)
411 return -1;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200412 } else {
Eli Bendersky737b1732012-05-29 06:02:56 +0300413 /* no attrib arg, no kwds, so no attributes */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200414 Py_INCREF(Py_None);
415 attrib = Py_None;
416 }
417
418 self_elem = (ElementObject *)self;
419
Eli Bendersky48d358b2012-05-30 17:57:50 +0300420 if (attrib != Py_None && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200421 if (create_extra(self_elem, attrib) < 0) {
422 PyObject_Del(self_elem);
423 return -1;
424 }
425 }
426
Eli Bendersky48d358b2012-05-30 17:57:50 +0300427 /* We own a reference to attrib here and it's no longer needed. */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200428 Py_DECREF(attrib);
429
430 /* Replace the objects already pointed to by tag, text and tail. */
431 tmp = self_elem->tag;
432 self_elem->tag = tag;
433 Py_INCREF(tag);
434 Py_DECREF(tmp);
435
436 tmp = self_elem->text;
437 self_elem->text = Py_None;
438 Py_INCREF(Py_None);
439 Py_DECREF(JOIN_OBJ(tmp));
440
441 tmp = self_elem->tail;
442 self_elem->tail = Py_None;
443 Py_INCREF(Py_None);
444 Py_DECREF(JOIN_OBJ(tmp));
445
446 return 0;
447}
448
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000449LOCAL(int)
450element_resize(ElementObject* self, int extra)
451{
452 int size;
453 PyObject* *children;
454
455 /* make sure self->children can hold the given number of extra
456 elements. set an exception and return -1 if allocation failed */
457
458 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200459 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000460
461 size = self->extra->length + extra;
462
463 if (size > self->extra->allocated) {
464 /* use Python 2.4's list growth strategy */
465 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000466 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100467 * which needs at least 4 bytes.
468 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000469 * be safe.
470 */
471 size = size ? size : 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000472 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000473 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100474 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000475 * false alarm always assume at least one child to be safe.
476 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000477 children = PyObject_Realloc(self->extra->children,
478 size * sizeof(PyObject*));
479 if (!children)
480 goto nomemory;
481 } else {
482 children = PyObject_Malloc(size * sizeof(PyObject*));
483 if (!children)
484 goto nomemory;
485 /* copy existing children from static area to malloc buffer */
486 memcpy(children, self->extra->children,
487 self->extra->length * sizeof(PyObject*));
488 }
489 self->extra->children = children;
490 self->extra->allocated = size;
491 }
492
493 return 0;
494
495 nomemory:
496 PyErr_NoMemory();
497 return -1;
498}
499
500LOCAL(int)
501element_add_subelement(ElementObject* self, PyObject* element)
502{
503 /* add a child element to a parent */
504
505 if (element_resize(self, 1) < 0)
506 return -1;
507
508 Py_INCREF(element);
509 self->extra->children[self->extra->length] = element;
510
511 self->extra->length++;
512
513 return 0;
514}
515
516LOCAL(PyObject*)
517element_get_attrib(ElementObject* self)
518{
519 /* return borrowed reference to attrib dictionary */
520 /* note: this function assumes that the extra section exists */
521
522 PyObject* res = self->extra->attrib;
523
524 if (res == Py_None) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000525 Py_DECREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000526 /* create missing dictionary */
527 res = PyDict_New();
528 if (!res)
529 return NULL;
530 self->extra->attrib = res;
531 }
532
533 return res;
534}
535
536LOCAL(PyObject*)
537element_get_text(ElementObject* self)
538{
539 /* return borrowed reference to text attribute */
540
541 PyObject* res = self->text;
542
543 if (JOIN_GET(res)) {
544 res = JOIN_OBJ(res);
545 if (PyList_CheckExact(res)) {
546 res = list_join(res);
547 if (!res)
548 return NULL;
549 self->text = res;
550 }
551 }
552
553 return res;
554}
555
556LOCAL(PyObject*)
557element_get_tail(ElementObject* self)
558{
559 /* return borrowed reference to text attribute */
560
561 PyObject* res = self->tail;
562
563 if (JOIN_GET(res)) {
564 res = JOIN_OBJ(res);
565 if (PyList_CheckExact(res)) {
566 res = list_join(res);
567 if (!res)
568 return NULL;
569 self->tail = res;
570 }
571 }
572
573 return res;
574}
575
576static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300577subelement(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000578{
579 PyObject* elem;
580
581 ElementObject* parent;
582 PyObject* tag;
583 PyObject* attrib = NULL;
584 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
585 &Element_Type, &parent, &tag,
586 &PyDict_Type, &attrib))
587 return NULL;
588
Eli Bendersky737b1732012-05-29 06:02:56 +0300589 if (attrib) {
590 /* attrib passed as positional arg */
591 attrib = PyDict_Copy(attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000592 if (!attrib)
593 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300594 if (kwds) {
595 if (PyDict_Update(attrib, kwds) < 0) {
596 return NULL;
597 }
598 }
599 } else if (kwds) {
600 /* have keyword args */
601 attrib = get_attrib_from_keywords(kwds);
602 if (!attrib)
603 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000604 } else {
Eli Bendersky737b1732012-05-29 06:02:56 +0300605 /* no attrib arg, no kwds, so no attribute */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000606 Py_INCREF(Py_None);
607 attrib = Py_None;
608 }
609
Eli Bendersky092af1f2012-03-04 07:14:03 +0200610 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000611
612 Py_DECREF(attrib);
613
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000614 if (element_add_subelement(parent, elem) < 0) {
615 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000616 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000617 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000618
619 return elem;
620}
621
Eli Bendersky0192ba32012-03-30 16:38:33 +0300622static int
623element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
624{
625 Py_VISIT(self->tag);
626 Py_VISIT(JOIN_OBJ(self->text));
627 Py_VISIT(JOIN_OBJ(self->tail));
628
629 if (self->extra) {
630 int i;
631 Py_VISIT(self->extra->attrib);
632
633 for (i = 0; i < self->extra->length; ++i)
634 Py_VISIT(self->extra->children[i]);
635 }
636 return 0;
637}
638
639static int
640element_gc_clear(ElementObject *self)
641{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300642 Py_CLEAR(self->tag);
Eli Benderskyebf37a22012-04-03 22:02:37 +0300643
644 /* The following is like Py_CLEAR for self->text and self->tail, but
645 * written explicitily because the real pointers hide behind access
646 * macros.
647 */
648 if (self->text) {
649 PyObject *tmp = JOIN_OBJ(self->text);
650 self->text = NULL;
651 Py_DECREF(tmp);
652 }
653
654 if (self->tail) {
655 PyObject *tmp = JOIN_OBJ(self->tail);
656 self->tail = NULL;
657 Py_DECREF(tmp);
658 }
Eli Bendersky0192ba32012-03-30 16:38:33 +0300659
660 /* After dropping all references from extra, it's no longer valid anyway,
Eli Benderskyebf37a22012-04-03 22:02:37 +0300661 * so fully deallocate it.
Eli Bendersky0192ba32012-03-30 16:38:33 +0300662 */
Eli Benderskyebf37a22012-04-03 22:02:37 +0300663 dealloc_extra(self);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300664 return 0;
665}
666
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000667static void
668element_dealloc(ElementObject* self)
669{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300670 PyObject_GC_UnTrack(self);
Eli Benderskyebf37a22012-04-03 22:02:37 +0300671
672 if (self->weakreflist != NULL)
673 PyObject_ClearWeakRefs((PyObject *) self);
674
Eli Bendersky0192ba32012-03-30 16:38:33 +0300675 /* element_gc_clear clears all references and deallocates extra
676 */
677 element_gc_clear(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000678
679 RELEASE(sizeof(ElementObject), "destroy element");
Eli Bendersky092af1f2012-03-04 07:14:03 +0200680 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000681}
682
683/* -------------------------------------------------------------------- */
684/* methods (in alphabetical order) */
685
686static PyObject*
687element_append(ElementObject* self, PyObject* args)
688{
689 PyObject* element;
690 if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element))
691 return NULL;
692
693 if (element_add_subelement(self, element) < 0)
694 return NULL;
695
696 Py_RETURN_NONE;
697}
698
699static PyObject*
Eli Bendersky0192ba32012-03-30 16:38:33 +0300700element_clearmethod(ElementObject* self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000701{
702 if (!PyArg_ParseTuple(args, ":clear"))
703 return NULL;
704
Eli Benderskyebf37a22012-04-03 22:02:37 +0300705 dealloc_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000706
707 Py_INCREF(Py_None);
708 Py_DECREF(JOIN_OBJ(self->text));
709 self->text = Py_None;
710
711 Py_INCREF(Py_None);
712 Py_DECREF(JOIN_OBJ(self->tail));
713 self->tail = Py_None;
714
715 Py_RETURN_NONE;
716}
717
718static PyObject*
719element_copy(ElementObject* self, PyObject* args)
720{
721 int i;
722 ElementObject* element;
723
724 if (!PyArg_ParseTuple(args, ":__copy__"))
725 return NULL;
726
Eli Bendersky092af1f2012-03-04 07:14:03 +0200727 element = (ElementObject*) create_new_element(
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000728 self->tag, (self->extra) ? self->extra->attrib : Py_None
729 );
730 if (!element)
731 return NULL;
732
733 Py_DECREF(JOIN_OBJ(element->text));
734 element->text = self->text;
735 Py_INCREF(JOIN_OBJ(element->text));
736
737 Py_DECREF(JOIN_OBJ(element->tail));
738 element->tail = self->tail;
739 Py_INCREF(JOIN_OBJ(element->tail));
740
741 if (self->extra) {
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100742
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000743 if (element_resize(element, self->extra->length) < 0) {
744 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000745 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000746 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000747
748 for (i = 0; i < self->extra->length; i++) {
749 Py_INCREF(self->extra->children[i]);
750 element->extra->children[i] = self->extra->children[i];
751 }
752
753 element->extra->length = self->extra->length;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100754
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000755 }
756
757 return (PyObject*) element;
758}
759
760static PyObject*
761element_deepcopy(ElementObject* self, PyObject* args)
762{
763 int i;
764 ElementObject* element;
765 PyObject* tag;
766 PyObject* attrib;
767 PyObject* text;
768 PyObject* tail;
769 PyObject* id;
770
771 PyObject* memo;
772 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
773 return NULL;
774
775 tag = deepcopy(self->tag, memo);
776 if (!tag)
777 return NULL;
778
779 if (self->extra) {
780 attrib = deepcopy(self->extra->attrib, memo);
781 if (!attrib) {
782 Py_DECREF(tag);
783 return NULL;
784 }
785 } else {
786 Py_INCREF(Py_None);
787 attrib = Py_None;
788 }
789
Eli Bendersky092af1f2012-03-04 07:14:03 +0200790 element = (ElementObject*) create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000791
792 Py_DECREF(tag);
793 Py_DECREF(attrib);
794
795 if (!element)
796 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100797
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000798 text = deepcopy(JOIN_OBJ(self->text), memo);
799 if (!text)
800 goto error;
801 Py_DECREF(element->text);
802 element->text = JOIN_SET(text, JOIN_GET(self->text));
803
804 tail = deepcopy(JOIN_OBJ(self->tail), memo);
805 if (!tail)
806 goto error;
807 Py_DECREF(element->tail);
808 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
809
810 if (self->extra) {
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100811
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000812 if (element_resize(element, self->extra->length) < 0)
813 goto error;
814
815 for (i = 0; i < self->extra->length; i++) {
816 PyObject* child = deepcopy(self->extra->children[i], memo);
817 if (!child) {
818 element->extra->length = i;
819 goto error;
820 }
821 element->extra->children[i] = child;
822 }
823
824 element->extra->length = self->extra->length;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100825
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000826 }
827
828 /* add object to memo dictionary (so deepcopy won't visit it again) */
Christian Heimes217cfd12007-12-02 14:31:20 +0000829 id = PyLong_FromLong((Py_uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000830 if (!id)
831 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000832
833 i = PyDict_SetItem(memo, id, (PyObject*) element);
834
835 Py_DECREF(id);
836
837 if (i < 0)
838 goto error;
839
840 return (PyObject*) element;
841
842 error:
843 Py_DECREF(element);
844 return NULL;
845}
846
847LOCAL(int)
848checkpath(PyObject* tag)
849{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000850 Py_ssize_t i;
851 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000852
853 /* check if a tag contains an xpath character */
854
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000855#define PATHCHAR(ch) \
856 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000857
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000858 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200859 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
860 void *data = PyUnicode_DATA(tag);
861 unsigned int kind = PyUnicode_KIND(tag);
862 for (i = 0; i < len; i++) {
863 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
864 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000865 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200866 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000867 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200868 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000869 return 1;
870 }
871 return 0;
872 }
Christian Heimes72b710a2008-05-26 13:28:38 +0000873 if (PyBytes_Check(tag)) {
874 char *p = PyBytes_AS_STRING(tag);
875 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000876 if (p[i] == '{')
877 check = 0;
878 else if (p[i] == '}')
879 check = 1;
880 else if (check && PATHCHAR(p[i]))
881 return 1;
882 }
883 return 0;
884 }
885
886 return 1; /* unknown type; might be path expression */
887}
888
889static PyObject*
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000890element_extend(ElementObject* self, PyObject* args)
891{
892 PyObject* seq;
893 Py_ssize_t i, seqlen = 0;
894
895 PyObject* seq_in;
896 if (!PyArg_ParseTuple(args, "O:extend", &seq_in))
897 return NULL;
898
899 seq = PySequence_Fast(seq_in, "");
900 if (!seq) {
901 PyErr_Format(
902 PyExc_TypeError,
903 "expected sequence, not \"%.200s\"", Py_TYPE(seq_in)->tp_name
904 );
905 return NULL;
906 }
907
908 seqlen = PySequence_Size(seq);
909 for (i = 0; i < seqlen; i++) {
910 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
Eli Bendersky396e8fc2012-03-23 14:24:20 +0200911 if (!PyObject_IsInstance(element, (PyObject *)&Element_Type)) {
912 Py_DECREF(seq);
913 PyErr_Format(
914 PyExc_TypeError,
915 "expected an Element, not \"%.200s\"",
916 Py_TYPE(element)->tp_name);
917 return NULL;
918 }
919
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000920 if (element_add_subelement(self, element) < 0) {
921 Py_DECREF(seq);
922 return NULL;
923 }
924 }
925
926 Py_DECREF(seq);
927
928 Py_RETURN_NONE;
929}
930
931static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300932element_find(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000933{
934 int i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000935 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000936 PyObject* namespaces = Py_None;
Eli Bendersky737b1732012-05-29 06:02:56 +0300937 static char *kwlist[] = {"path", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200938
Eli Bendersky737b1732012-05-29 06:02:56 +0300939 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:find", kwlist,
940 &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000941 return NULL;
942
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200943 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200944 _Py_IDENTIFIER(find);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200945 return _PyObject_CallMethodId(
946 elementpath_obj, &PyId_find, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000947 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200948 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000949
950 if (!self->extra)
951 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100952
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000953 for (i = 0; i < self->extra->length; i++) {
954 PyObject* item = self->extra->children[i];
955 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +0000956 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000957 Py_INCREF(item);
958 return item;
959 }
960 }
961
962 Py_RETURN_NONE;
963}
964
965static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300966element_findtext(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000967{
968 int i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000969 PyObject* tag;
970 PyObject* default_value = Py_None;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000971 PyObject* namespaces = Py_None;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200972 _Py_IDENTIFIER(findtext);
Eli Bendersky737b1732012-05-29 06:02:56 +0300973 static char *kwlist[] = {"path", "default", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200974
Eli Bendersky737b1732012-05-29 06:02:56 +0300975 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO:findtext", kwlist,
976 &tag, &default_value, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000977 return NULL;
978
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000979 if (checkpath(tag) || namespaces != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200980 return _PyObject_CallMethodId(
981 elementpath_obj, &PyId_findtext, "OOOO", self, tag, default_value, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000982 );
983
984 if (!self->extra) {
985 Py_INCREF(default_value);
986 return default_value;
987 }
988
989 for (i = 0; i < self->extra->length; i++) {
990 ElementObject* item = (ElementObject*) self->extra->children[i];
Mark Dickinson211c6252009-02-01 10:28:51 +0000991 if (Element_CheckExact(item) && (PyObject_RichCompareBool(item->tag, tag, Py_EQ) == 1)) {
992
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000993 PyObject* text = element_get_text(item);
994 if (text == Py_None)
Christian Heimes72b710a2008-05-26 13:28:38 +0000995 return PyBytes_FromString("");
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000996 Py_XINCREF(text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000997 return text;
998 }
999 }
1000
1001 Py_INCREF(default_value);
1002 return default_value;
1003}
1004
1005static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001006element_findall(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001007{
1008 int i;
1009 PyObject* out;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001010 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001011 PyObject* namespaces = Py_None;
Eli Bendersky737b1732012-05-29 06:02:56 +03001012 static char *kwlist[] = {"path", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001013
Eli Bendersky737b1732012-05-29 06:02:56 +03001014 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:findall", kwlist,
1015 &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001016 return NULL;
1017
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001018 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001019 _Py_IDENTIFIER(findall);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001020 return _PyObject_CallMethodId(
1021 elementpath_obj, &PyId_findall, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001022 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001023 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001024
1025 out = PyList_New(0);
1026 if (!out)
1027 return NULL;
1028
1029 if (!self->extra)
1030 return out;
1031
1032 for (i = 0; i < self->extra->length; i++) {
1033 PyObject* item = self->extra->children[i];
1034 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +00001035 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001036 if (PyList_Append(out, item) < 0) {
1037 Py_DECREF(out);
1038 return NULL;
1039 }
1040 }
1041 }
1042
1043 return out;
1044}
1045
1046static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001047element_iterfind(ElementObject *self, PyObject *args, PyObject *kwds)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001048{
1049 PyObject* tag;
1050 PyObject* namespaces = Py_None;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001051 _Py_IDENTIFIER(iterfind);
Eli Bendersky737b1732012-05-29 06:02:56 +03001052 static char *kwlist[] = {"path", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001053
Eli Bendersky737b1732012-05-29 06:02:56 +03001054 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:iterfind", kwlist,
1055 &tag, &namespaces))
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001056 return NULL;
1057
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001058 return _PyObject_CallMethodId(
1059 elementpath_obj, &PyId_iterfind, "OOO", self, tag, namespaces
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001060 );
1061}
1062
1063static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001064element_get(ElementObject* self, PyObject* args)
1065{
1066 PyObject* value;
1067
1068 PyObject* key;
1069 PyObject* default_value = Py_None;
1070 if (!PyArg_ParseTuple(args, "O|O:get", &key, &default_value))
1071 return NULL;
1072
1073 if (!self->extra || self->extra->attrib == Py_None)
1074 value = default_value;
1075 else {
1076 value = PyDict_GetItem(self->extra->attrib, key);
1077 if (!value)
1078 value = default_value;
1079 }
1080
1081 Py_INCREF(value);
1082 return value;
1083}
1084
1085static PyObject*
1086element_getchildren(ElementObject* self, PyObject* args)
1087{
1088 int i;
1089 PyObject* list;
1090
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001091 /* FIXME: report as deprecated? */
1092
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001093 if (!PyArg_ParseTuple(args, ":getchildren"))
1094 return NULL;
1095
1096 if (!self->extra)
1097 return PyList_New(0);
1098
1099 list = PyList_New(self->extra->length);
1100 if (!list)
1101 return NULL;
1102
1103 for (i = 0; i < self->extra->length; i++) {
1104 PyObject* item = self->extra->children[i];
1105 Py_INCREF(item);
1106 PyList_SET_ITEM(list, i, item);
1107 }
1108
1109 return list;
1110}
1111
1112static PyObject*
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001113element_iter(ElementObject* self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001114{
1115 PyObject* result;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001116
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001117 PyObject* tag = Py_None;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001118 if (!PyArg_ParseTuple(args, "|O:iter", &tag))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001119 return NULL;
1120
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001121 if (!elementtree_iter_obj) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001122 PyErr_SetString(
1123 PyExc_RuntimeError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001124 "iter helper not found"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001125 );
1126 return NULL;
1127 }
1128
1129 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001130 if (!args)
1131 return NULL;
Neal Norwitz02876df2006-02-07 06:58:52 +00001132
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001133 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
1134 Py_INCREF(tag); PyTuple_SET_ITEM(args, 1, (PyObject*) tag);
1135
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001136 result = PyObject_CallObject(elementtree_iter_obj, args);
1137
1138 Py_DECREF(args);
1139
1140 return result;
1141}
1142
1143
1144static PyObject*
1145element_itertext(ElementObject* self, PyObject* args)
1146{
1147 PyObject* result;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001148
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001149 if (!PyArg_ParseTuple(args, ":itertext"))
1150 return NULL;
1151
1152 if (!elementtree_itertext_obj) {
1153 PyErr_SetString(
1154 PyExc_RuntimeError,
1155 "itertext helper not found"
1156 );
1157 return NULL;
1158 }
1159
1160 args = PyTuple_New(1);
1161 if (!args)
1162 return NULL;
1163
1164 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
1165
1166 result = PyObject_CallObject(elementtree_itertext_obj, args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001167
1168 Py_DECREF(args);
1169
1170 return result;
1171}
1172
1173static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001174element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001175{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001176 ElementObject* self = (ElementObject*) self_;
1177
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001178 if (!self->extra || index < 0 || index >= self->extra->length) {
1179 PyErr_SetString(
1180 PyExc_IndexError,
1181 "child index out of range"
1182 );
1183 return NULL;
1184 }
1185
1186 Py_INCREF(self->extra->children[index]);
1187 return self->extra->children[index];
1188}
1189
1190static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001191element_insert(ElementObject* self, PyObject* args)
1192{
1193 int i;
1194
1195 int index;
1196 PyObject* element;
1197 if (!PyArg_ParseTuple(args, "iO!:insert", &index,
1198 &Element_Type, &element))
1199 return NULL;
1200
1201 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001202 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001203
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001204 if (index < 0) {
1205 index += self->extra->length;
1206 if (index < 0)
1207 index = 0;
1208 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001209 if (index > self->extra->length)
1210 index = self->extra->length;
1211
1212 if (element_resize(self, 1) < 0)
1213 return NULL;
1214
1215 for (i = self->extra->length; i > index; i--)
1216 self->extra->children[i] = self->extra->children[i-1];
1217
1218 Py_INCREF(element);
1219 self->extra->children[index] = element;
1220
1221 self->extra->length++;
1222
1223 Py_RETURN_NONE;
1224}
1225
1226static PyObject*
1227element_items(ElementObject* self, PyObject* args)
1228{
1229 if (!PyArg_ParseTuple(args, ":items"))
1230 return NULL;
1231
1232 if (!self->extra || self->extra->attrib == Py_None)
1233 return PyList_New(0);
1234
1235 return PyDict_Items(self->extra->attrib);
1236}
1237
1238static PyObject*
1239element_keys(ElementObject* self, PyObject* args)
1240{
1241 if (!PyArg_ParseTuple(args, ":keys"))
1242 return NULL;
1243
1244 if (!self->extra || self->extra->attrib == Py_None)
1245 return PyList_New(0);
1246
1247 return PyDict_Keys(self->extra->attrib);
1248}
1249
Martin v. Löwis18e16552006-02-15 17:27:45 +00001250static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001251element_length(ElementObject* self)
1252{
1253 if (!self->extra)
1254 return 0;
1255
1256 return self->extra->length;
1257}
1258
1259static PyObject*
1260element_makeelement(PyObject* self, PyObject* args, PyObject* kw)
1261{
1262 PyObject* elem;
1263
1264 PyObject* tag;
1265 PyObject* attrib;
1266 if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib))
1267 return NULL;
1268
1269 attrib = PyDict_Copy(attrib);
1270 if (!attrib)
1271 return NULL;
1272
Eli Bendersky092af1f2012-03-04 07:14:03 +02001273 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001274
1275 Py_DECREF(attrib);
1276
1277 return elem;
1278}
1279
1280static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001281element_remove(ElementObject* self, PyObject* args)
1282{
1283 int i;
1284
1285 PyObject* element;
1286 if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element))
1287 return NULL;
1288
1289 if (!self->extra) {
1290 /* element has no children, so raise exception */
1291 PyErr_SetString(
1292 PyExc_ValueError,
1293 "list.remove(x): x not in list"
1294 );
1295 return NULL;
1296 }
1297
1298 for (i = 0; i < self->extra->length; i++) {
1299 if (self->extra->children[i] == element)
1300 break;
Mark Dickinson211c6252009-02-01 10:28:51 +00001301 if (PyObject_RichCompareBool(self->extra->children[i], element, Py_EQ) == 1)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001302 break;
1303 }
1304
1305 if (i == self->extra->length) {
1306 /* element is not in children, so raise exception */
1307 PyErr_SetString(
1308 PyExc_ValueError,
1309 "list.remove(x): x not in list"
1310 );
1311 return NULL;
1312 }
1313
1314 Py_DECREF(self->extra->children[i]);
1315
1316 self->extra->length--;
1317
1318 for (; i < self->extra->length; i++)
1319 self->extra->children[i] = self->extra->children[i+1];
1320
1321 Py_RETURN_NONE;
1322}
1323
1324static PyObject*
1325element_repr(ElementObject* self)
1326{
Eli Bendersky092af1f2012-03-04 07:14:03 +02001327 if (self->tag)
1328 return PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1329 else
1330 return PyUnicode_FromFormat("<Element at %p>", self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001331}
1332
1333static PyObject*
1334element_set(ElementObject* self, PyObject* args)
1335{
1336 PyObject* attrib;
1337
1338 PyObject* key;
1339 PyObject* value;
1340 if (!PyArg_ParseTuple(args, "OO:set", &key, &value))
1341 return NULL;
1342
1343 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001344 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001345
1346 attrib = element_get_attrib(self);
1347 if (!attrib)
1348 return NULL;
1349
1350 if (PyDict_SetItem(attrib, key, value) < 0)
1351 return NULL;
1352
1353 Py_RETURN_NONE;
1354}
1355
1356static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001357element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001358{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001359 ElementObject* self = (ElementObject*) self_;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001360 int i;
1361 PyObject* old;
1362
1363 if (!self->extra || index < 0 || index >= self->extra->length) {
1364 PyErr_SetString(
1365 PyExc_IndexError,
1366 "child assignment index out of range");
1367 return -1;
1368 }
1369
1370 old = self->extra->children[index];
1371
1372 if (item) {
1373 Py_INCREF(item);
1374 self->extra->children[index] = item;
1375 } else {
1376 self->extra->length--;
1377 for (i = index; i < self->extra->length; i++)
1378 self->extra->children[i] = self->extra->children[i+1];
1379 }
1380
1381 Py_DECREF(old);
1382
1383 return 0;
1384}
1385
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001386static PyObject*
1387element_subscr(PyObject* self_, PyObject* item)
1388{
1389 ElementObject* self = (ElementObject*) self_;
1390
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001391 if (PyIndex_Check(item)) {
1392 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001393
1394 if (i == -1 && PyErr_Occurred()) {
1395 return NULL;
1396 }
1397 if (i < 0 && self->extra)
1398 i += self->extra->length;
1399 return element_getitem(self_, i);
1400 }
1401 else if (PySlice_Check(item)) {
1402 Py_ssize_t start, stop, step, slicelen, cur, i;
1403 PyObject* list;
1404
1405 if (!self->extra)
1406 return PyList_New(0);
1407
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001408 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001409 self->extra->length,
1410 &start, &stop, &step, &slicelen) < 0) {
1411 return NULL;
1412 }
1413
1414 if (slicelen <= 0)
1415 return PyList_New(0);
1416 else {
1417 list = PyList_New(slicelen);
1418 if (!list)
1419 return NULL;
1420
1421 for (cur = start, i = 0; i < slicelen;
1422 cur += step, i++) {
1423 PyObject* item = self->extra->children[cur];
1424 Py_INCREF(item);
1425 PyList_SET_ITEM(list, i, item);
1426 }
1427
1428 return list;
1429 }
1430 }
1431 else {
1432 PyErr_SetString(PyExc_TypeError,
1433 "element indices must be integers");
1434 return NULL;
1435 }
1436}
1437
1438static int
1439element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1440{
1441 ElementObject* self = (ElementObject*) self_;
1442
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001443 if (PyIndex_Check(item)) {
1444 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001445
1446 if (i == -1 && PyErr_Occurred()) {
1447 return -1;
1448 }
1449 if (i < 0 && self->extra)
1450 i += self->extra->length;
1451 return element_setitem(self_, i, value);
1452 }
1453 else if (PySlice_Check(item)) {
1454 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1455
1456 PyObject* recycle = NULL;
1457 PyObject* seq = NULL;
1458
1459 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001460 create_extra(self, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001461
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001462 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001463 self->extra->length,
1464 &start, &stop, &step, &slicelen) < 0) {
1465 return -1;
1466 }
1467
Eli Bendersky865756a2012-03-09 13:38:15 +02001468 if (value == NULL) {
1469 /* Delete slice */
1470 size_t cur;
1471 Py_ssize_t i;
1472
1473 if (slicelen <= 0)
1474 return 0;
1475
1476 /* Since we're deleting, the direction of the range doesn't matter,
1477 * so for simplicity make it always ascending.
1478 */
1479 if (step < 0) {
1480 stop = start + 1;
1481 start = stop + step * (slicelen - 1) - 1;
1482 step = -step;
1483 }
1484
1485 assert((size_t)slicelen <= PY_SIZE_MAX / sizeof(PyObject *));
1486
1487 /* recycle is a list that will contain all the children
1488 * scheduled for removal.
1489 */
1490 if (!(recycle = PyList_New(slicelen))) {
1491 PyErr_NoMemory();
1492 return -1;
1493 }
1494
1495 /* This loop walks over all the children that have to be deleted,
1496 * with cur pointing at them. num_moved is the amount of children
1497 * until the next deleted child that have to be "shifted down" to
1498 * occupy the deleted's places.
1499 * Note that in the ith iteration, shifting is done i+i places down
1500 * because i children were already removed.
1501 */
1502 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1503 /* Compute how many children have to be moved, clipping at the
1504 * list end.
1505 */
1506 Py_ssize_t num_moved = step - 1;
1507 if (cur + step >= (size_t)self->extra->length) {
1508 num_moved = self->extra->length - cur - 1;
1509 }
1510
1511 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1512
1513 memmove(
1514 self->extra->children + cur - i,
1515 self->extra->children + cur + 1,
1516 num_moved * sizeof(PyObject *));
1517 }
1518
1519 /* Leftover "tail" after the last removed child */
1520 cur = start + (size_t)slicelen * step;
1521 if (cur < (size_t)self->extra->length) {
1522 memmove(
1523 self->extra->children + cur - slicelen,
1524 self->extra->children + cur,
1525 (self->extra->length - cur) * sizeof(PyObject *));
1526 }
1527
1528 self->extra->length -= slicelen;
1529
1530 /* Discard the recycle list with all the deleted sub-elements */
1531 Py_XDECREF(recycle);
1532 return 0;
1533 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001534 else {
Eli Bendersky865756a2012-03-09 13:38:15 +02001535 /* A new slice is actually being assigned */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001536 seq = PySequence_Fast(value, "");
1537 if (!seq) {
1538 PyErr_Format(
1539 PyExc_TypeError,
1540 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1541 );
1542 return -1;
1543 }
1544 newlen = PySequence_Size(seq);
1545 }
1546
1547 if (step != 1 && newlen != slicelen)
1548 {
1549 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001550 "attempt to assign sequence of size %zd "
1551 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001552 newlen, slicelen
1553 );
1554 return -1;
1555 }
1556
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001557 /* Resize before creating the recycle bin, to prevent refleaks. */
1558 if (newlen > slicelen) {
1559 if (element_resize(self, newlen - slicelen) < 0) {
1560 if (seq) {
1561 Py_DECREF(seq);
1562 }
1563 return -1;
1564 }
1565 }
1566
1567 if (slicelen > 0) {
1568 /* to avoid recursive calls to this method (via decref), move
1569 old items to the recycle bin here, and get rid of them when
1570 we're done modifying the element */
1571 recycle = PyList_New(slicelen);
1572 if (!recycle) {
1573 if (seq) {
1574 Py_DECREF(seq);
1575 }
1576 return -1;
1577 }
1578 for (cur = start, i = 0; i < slicelen;
1579 cur += step, i++)
1580 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1581 }
1582
1583 if (newlen < slicelen) {
1584 /* delete slice */
1585 for (i = stop; i < self->extra->length; i++)
1586 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1587 } else if (newlen > slicelen) {
1588 /* insert slice */
1589 for (i = self->extra->length-1; i >= stop; i--)
1590 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1591 }
1592
1593 /* replace the slice */
1594 for (cur = start, i = 0; i < newlen;
1595 cur += step, i++) {
1596 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1597 Py_INCREF(element);
1598 self->extra->children[cur] = element;
1599 }
1600
1601 self->extra->length += newlen - slicelen;
1602
1603 if (seq) {
1604 Py_DECREF(seq);
1605 }
1606
1607 /* discard the recycle bin, and everything in it */
1608 Py_XDECREF(recycle);
1609
1610 return 0;
1611 }
1612 else {
1613 PyErr_SetString(PyExc_TypeError,
1614 "element indices must be integers");
1615 return -1;
1616 }
1617}
1618
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001619static PyMethodDef element_methods[] = {
1620
Eli Bendersky0192ba32012-03-30 16:38:33 +03001621 {"clear", (PyCFunction) element_clearmethod, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001622
1623 {"get", (PyCFunction) element_get, METH_VARARGS},
1624 {"set", (PyCFunction) element_set, METH_VARARGS},
1625
Eli Bendersky737b1732012-05-29 06:02:56 +03001626 {"find", (PyCFunction) element_find, METH_VARARGS | METH_KEYWORDS},
1627 {"findtext", (PyCFunction) element_findtext, METH_VARARGS | METH_KEYWORDS},
1628 {"findall", (PyCFunction) element_findall, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001629
1630 {"append", (PyCFunction) element_append, METH_VARARGS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001631 {"extend", (PyCFunction) element_extend, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001632 {"insert", (PyCFunction) element_insert, METH_VARARGS},
1633 {"remove", (PyCFunction) element_remove, METH_VARARGS},
1634
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001635 {"iter", (PyCFunction) element_iter, METH_VARARGS},
1636 {"itertext", (PyCFunction) element_itertext, METH_VARARGS},
Eli Bendersky737b1732012-05-29 06:02:56 +03001637 {"iterfind", (PyCFunction) element_iterfind, METH_VARARGS | METH_KEYWORDS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001638
1639 {"getiterator", (PyCFunction) element_iter, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001640 {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS},
1641
1642 {"items", (PyCFunction) element_items, METH_VARARGS},
1643 {"keys", (PyCFunction) element_keys, METH_VARARGS},
1644
1645 {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS},
1646
1647 {"__copy__", (PyCFunction) element_copy, METH_VARARGS},
1648 {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS},
1649
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001650 {NULL, NULL}
1651};
1652
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001653static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001654element_getattro(ElementObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001655{
1656 PyObject* res;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001657 char *name = "";
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001658
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001659 if (PyUnicode_Check(nameobj))
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001660 name = _PyUnicode_AsString(nameobj);
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001661
Alexander Belopolskye239d232010-12-08 23:31:48 +00001662 if (name == NULL)
1663 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001664
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001665 /* handle common attributes first */
1666 if (strcmp(name, "tag") == 0) {
1667 res = self->tag;
1668 Py_INCREF(res);
1669 return res;
1670 } else if (strcmp(name, "text") == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001671 res = element_get_text(self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001672 Py_INCREF(res);
1673 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001674 }
1675
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001676 /* methods */
1677 res = PyObject_GenericGetAttr((PyObject*) self, nameobj);
1678 if (res)
1679 return res;
1680
1681 /* less common attributes */
1682 if (strcmp(name, "tail") == 0) {
1683 PyErr_Clear();
1684 res = element_get_tail(self);
1685 } else if (strcmp(name, "attrib") == 0) {
1686 PyErr_Clear();
1687 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001688 create_extra(self, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001689 res = element_get_attrib(self);
1690 }
1691
1692 if (!res)
1693 return NULL;
1694
1695 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001696 return res;
1697}
1698
Eli Benderskyb20df952012-05-20 06:33:29 +03001699static PyObject*
1700element_setattro(ElementObject* self, PyObject* nameobj, PyObject* value)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001701{
Eli Benderskyb20df952012-05-20 06:33:29 +03001702 char *name = "";
1703 if (PyUnicode_Check(nameobj))
1704 name = _PyUnicode_AsString(nameobj);
1705
1706 if (name == NULL)
1707 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001708
1709 if (strcmp(name, "tag") == 0) {
1710 Py_DECREF(self->tag);
1711 self->tag = value;
1712 Py_INCREF(self->tag);
1713 } else if (strcmp(name, "text") == 0) {
1714 Py_DECREF(JOIN_OBJ(self->text));
1715 self->text = value;
1716 Py_INCREF(self->text);
1717 } else if (strcmp(name, "tail") == 0) {
1718 Py_DECREF(JOIN_OBJ(self->tail));
1719 self->tail = value;
1720 Py_INCREF(self->tail);
1721 } else if (strcmp(name, "attrib") == 0) {
1722 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001723 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001724 Py_DECREF(self->extra->attrib);
1725 self->extra->attrib = value;
1726 Py_INCREF(self->extra->attrib);
1727 } else {
1728 PyErr_SetString(PyExc_AttributeError, name);
Eli Benderskyb20df952012-05-20 06:33:29 +03001729 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001730 }
1731
Eli Benderskyb20df952012-05-20 06:33:29 +03001732 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001733}
1734
1735static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001736 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001737 0, /* sq_concat */
1738 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001739 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001740 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001741 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001742 0,
1743};
1744
1745static PyMappingMethods element_as_mapping = {
1746 (lenfunc) element_length,
1747 (binaryfunc) element_subscr,
1748 (objobjargproc) element_ass_subscr,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001749};
1750
Neal Norwitz227b5332006-03-22 09:28:35 +00001751static PyTypeObject Element_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001752 PyVarObject_HEAD_INIT(NULL, 0)
1753 "Element", sizeof(ElementObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001754 /* methods */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001755 (destructor)element_dealloc, /* tp_dealloc */
1756 0, /* tp_print */
1757 0, /* tp_getattr */
Eli Benderskyb20df952012-05-20 06:33:29 +03001758 0, /* tp_setattr */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001759 0, /* tp_reserved */
1760 (reprfunc)element_repr, /* tp_repr */
1761 0, /* tp_as_number */
1762 &element_as_sequence, /* tp_as_sequence */
1763 &element_as_mapping, /* tp_as_mapping */
1764 0, /* tp_hash */
1765 0, /* tp_call */
1766 0, /* tp_str */
1767 (getattrofunc)element_getattro, /* tp_getattro */
Eli Benderskyb20df952012-05-20 06:33:29 +03001768 (setattrofunc)element_setattro, /* tp_setattro */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001769 0, /* tp_as_buffer */
Eli Bendersky0192ba32012-03-30 16:38:33 +03001770 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
1771 /* tp_flags */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001772 0, /* tp_doc */
Eli Bendersky0192ba32012-03-30 16:38:33 +03001773 (traverseproc)element_gc_traverse, /* tp_traverse */
1774 (inquiry)element_gc_clear, /* tp_clear */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001775 0, /* tp_richcompare */
Eli Benderskyebf37a22012-04-03 22:02:37 +03001776 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001777 0, /* tp_iter */
1778 0, /* tp_iternext */
1779 element_methods, /* tp_methods */
1780 0, /* tp_members */
1781 0, /* tp_getset */
1782 0, /* tp_base */
1783 0, /* tp_dict */
1784 0, /* tp_descr_get */
1785 0, /* tp_descr_set */
1786 0, /* tp_dictoffset */
1787 (initproc)element_init, /* tp_init */
1788 PyType_GenericAlloc, /* tp_alloc */
1789 element_new, /* tp_new */
1790 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001791};
1792
1793/* ==================================================================== */
1794/* the tree builder type */
1795
1796typedef struct {
1797 PyObject_HEAD
1798
Eli Bendersky58d548d2012-05-29 15:45:16 +03001799 PyObject *root; /* root node (first created node) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001800
Eli Bendersky58d548d2012-05-29 15:45:16 +03001801 ElementObject *this; /* current node */
1802 ElementObject *last; /* most recently created node */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001803
Eli Bendersky58d548d2012-05-29 15:45:16 +03001804 PyObject *data; /* data collector (string or list), or NULL */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001805
Eli Bendersky58d548d2012-05-29 15:45:16 +03001806 PyObject *stack; /* element stack */
1807 Py_ssize_t index; /* current stack size (0 means empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001808
Eli Bendersky48d358b2012-05-30 17:57:50 +03001809 PyObject *element_factory;
1810
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001811 /* element tracing */
Eli Bendersky58d548d2012-05-29 15:45:16 +03001812 PyObject *events; /* list of events, or NULL if not collecting */
1813 PyObject *start_event_obj; /* event objects (NULL to ignore) */
1814 PyObject *end_event_obj;
1815 PyObject *start_ns_event_obj;
1816 PyObject *end_ns_event_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001817} TreeBuilderObject;
1818
Neal Norwitz227b5332006-03-22 09:28:35 +00001819static PyTypeObject TreeBuilder_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001820
Christian Heimes90aa7642007-12-19 02:45:37 +00001821#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001822
1823/* -------------------------------------------------------------------- */
1824/* constructor and destructor */
1825
Eli Bendersky58d548d2012-05-29 15:45:16 +03001826static PyObject *
1827treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001828{
Eli Bendersky58d548d2012-05-29 15:45:16 +03001829 TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
1830 if (t != NULL) {
1831 t->root = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001832
Eli Bendersky58d548d2012-05-29 15:45:16 +03001833 Py_INCREF(Py_None);
1834 t->this = (ElementObject *)Py_None;
1835 Py_INCREF(Py_None);
1836 t->last = (ElementObject *)Py_None;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001837
Eli Bendersky58d548d2012-05-29 15:45:16 +03001838 t->data = NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03001839 t->element_factory = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03001840 t->stack = PyList_New(20);
1841 if (!t->stack) {
1842 Py_DECREF(t->this);
1843 Py_DECREF(t->last);
1844 return NULL;
1845 }
1846 t->index = 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001847
Eli Bendersky58d548d2012-05-29 15:45:16 +03001848 t->events = NULL;
1849 t->start_event_obj = t->end_event_obj = NULL;
1850 t->start_ns_event_obj = t->end_ns_event_obj = NULL;
1851 }
1852 return (PyObject *)t;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001853}
1854
Eli Bendersky58d548d2012-05-29 15:45:16 +03001855static int
1856treebuilder_init(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001857{
Eli Bendersky48d358b2012-05-30 17:57:50 +03001858 static char *kwlist[] = {"element_factory", NULL};
1859 PyObject *element_factory = NULL;
1860 TreeBuilderObject *self_tb = (TreeBuilderObject *)self;
1861
1862 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:TreeBuilder", kwlist,
1863 &element_factory)) {
1864 return -1;
1865 }
1866
1867 if (element_factory) {
1868 Py_INCREF(element_factory);
1869 Py_XDECREF(self_tb->element_factory);
1870 self_tb->element_factory = element_factory;
1871 }
1872
Eli Bendersky58d548d2012-05-29 15:45:16 +03001873 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001874}
1875
Eli Bendersky48d358b2012-05-30 17:57:50 +03001876static int
1877treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
1878{
1879 Py_VISIT(self->root);
1880 Py_VISIT(self->this);
1881 Py_VISIT(self->last);
1882 Py_VISIT(self->data);
1883 Py_VISIT(self->stack);
1884 Py_VISIT(self->element_factory);
1885 return 0;
1886}
1887
1888static int
1889treebuilder_gc_clear(TreeBuilderObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001890{
1891 Py_XDECREF(self->end_ns_event_obj);
1892 Py_XDECREF(self->start_ns_event_obj);
1893 Py_XDECREF(self->end_event_obj);
1894 Py_XDECREF(self->start_event_obj);
1895 Py_XDECREF(self->events);
1896 Py_DECREF(self->stack);
1897 Py_XDECREF(self->data);
1898 Py_DECREF(self->last);
1899 Py_DECREF(self->this);
Eli Bendersky48d358b2012-05-30 17:57:50 +03001900 Py_CLEAR(self->element_factory);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001901 Py_XDECREF(self->root);
Eli Bendersky48d358b2012-05-30 17:57:50 +03001902 return 0;
1903}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001904
Eli Bendersky48d358b2012-05-30 17:57:50 +03001905static void
1906treebuilder_dealloc(TreeBuilderObject *self)
1907{
1908 PyObject_GC_UnTrack(self);
1909 treebuilder_gc_clear(self);
Eli Bendersky58d548d2012-05-29 15:45:16 +03001910 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001911}
1912
1913/* -------------------------------------------------------------------- */
1914/* handlers */
1915
1916LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001917treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
1918 PyObject* attrib)
1919{
1920 PyObject* node;
1921 PyObject* this;
1922
1923 if (self->data) {
1924 if (self->this == self->last) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001925 Py_DECREF(JOIN_OBJ(self->last->text));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001926 self->last->text = JOIN_SET(
1927 self->data, PyList_CheckExact(self->data)
1928 );
1929 } else {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001930 Py_DECREF(JOIN_OBJ(self->last->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001931 self->last->tail = JOIN_SET(
1932 self->data, PyList_CheckExact(self->data)
1933 );
1934 }
1935 self->data = NULL;
1936 }
1937
Eli Bendersky48d358b2012-05-30 17:57:50 +03001938 if (self->element_factory) {
1939 node = PyObject_CallFunction(self->element_factory, "OO", tag, attrib);
1940 } else {
1941 node = create_new_element(tag, attrib);
1942 }
1943 if (!node) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001944 return NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03001945 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001946
1947 this = (PyObject*) self->this;
1948
1949 if (this != Py_None) {
1950 if (element_add_subelement((ElementObject*) this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001951 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001952 } else {
1953 if (self->root) {
1954 PyErr_SetString(
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001955 elementtree_parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001956 "multiple elements on top level"
1957 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001958 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001959 }
1960 Py_INCREF(node);
1961 self->root = node;
1962 }
1963
1964 if (self->index < PyList_GET_SIZE(self->stack)) {
1965 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001966 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001967 Py_INCREF(this);
1968 } else {
1969 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001970 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001971 }
1972 self->index++;
1973
1974 Py_DECREF(this);
1975 Py_INCREF(node);
1976 self->this = (ElementObject*) node;
1977
1978 Py_DECREF(self->last);
1979 Py_INCREF(node);
1980 self->last = (ElementObject*) node;
1981
1982 if (self->start_event_obj) {
1983 PyObject* res;
1984 PyObject* action = self->start_event_obj;
1985 res = PyTuple_New(2);
1986 if (res) {
1987 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
1988 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
1989 PyList_Append(self->events, res);
1990 Py_DECREF(res);
1991 } else
1992 PyErr_Clear(); /* FIXME: propagate error */
1993 }
1994
1995 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001996
1997 error:
1998 Py_DECREF(node);
1999 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002000}
2001
2002LOCAL(PyObject*)
2003treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
2004{
2005 if (!self->data) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00002006 if (self->last == (ElementObject*) Py_None) {
2007 /* ignore calls to data before the first call to start */
2008 Py_RETURN_NONE;
2009 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002010 /* store the first item as is */
2011 Py_INCREF(data); self->data = data;
2012 } else {
2013 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00002014 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
2015 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002016 /* expat often generates single character data sections; handle
2017 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00002018 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
2019 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002020 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00002021 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002022 } else if (PyList_CheckExact(self->data)) {
2023 if (PyList_Append(self->data, data) < 0)
2024 return NULL;
2025 } else {
2026 PyObject* list = PyList_New(2);
2027 if (!list)
2028 return NULL;
2029 PyList_SET_ITEM(list, 0, self->data);
2030 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2031 self->data = list;
2032 }
2033 }
2034
2035 Py_RETURN_NONE;
2036}
2037
2038LOCAL(PyObject*)
2039treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2040{
2041 PyObject* item;
2042
2043 if (self->data) {
2044 if (self->this == self->last) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002045 Py_DECREF(JOIN_OBJ(self->last->text));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002046 self->last->text = JOIN_SET(
2047 self->data, PyList_CheckExact(self->data)
2048 );
2049 } else {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002050 Py_DECREF(JOIN_OBJ(self->last->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002051 self->last->tail = JOIN_SET(
2052 self->data, PyList_CheckExact(self->data)
2053 );
2054 }
2055 self->data = NULL;
2056 }
2057
2058 if (self->index == 0) {
2059 PyErr_SetString(
2060 PyExc_IndexError,
2061 "pop from empty stack"
2062 );
2063 return NULL;
2064 }
2065
2066 self->index--;
2067
2068 item = PyList_GET_ITEM(self->stack, self->index);
2069 Py_INCREF(item);
2070
2071 Py_DECREF(self->last);
2072
2073 self->last = (ElementObject*) self->this;
2074 self->this = (ElementObject*) item;
2075
2076 if (self->end_event_obj) {
2077 PyObject* res;
2078 PyObject* action = self->end_event_obj;
2079 PyObject* node = (PyObject*) self->last;
2080 res = PyTuple_New(2);
2081 if (res) {
2082 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
2083 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
2084 PyList_Append(self->events, res);
2085 Py_DECREF(res);
2086 } else
2087 PyErr_Clear(); /* FIXME: propagate error */
2088 }
2089
2090 Py_INCREF(self->last);
2091 return (PyObject*) self->last;
2092}
2093
2094LOCAL(void)
2095treebuilder_handle_namespace(TreeBuilderObject* self, int start,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002096 PyObject *prefix, PyObject *uri)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002097{
2098 PyObject* res;
2099 PyObject* action;
2100 PyObject* parcel;
2101
2102 if (!self->events)
2103 return;
2104
2105 if (start) {
2106 if (!self->start_ns_event_obj)
2107 return;
2108 action = self->start_ns_event_obj;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002109 parcel = Py_BuildValue("OO", prefix, uri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002110 if (!parcel)
2111 return;
2112 Py_INCREF(action);
2113 } else {
2114 if (!self->end_ns_event_obj)
2115 return;
2116 action = self->end_ns_event_obj;
2117 Py_INCREF(action);
2118 parcel = Py_None;
2119 Py_INCREF(parcel);
2120 }
2121
2122 res = PyTuple_New(2);
2123
2124 if (res) {
2125 PyTuple_SET_ITEM(res, 0, action);
2126 PyTuple_SET_ITEM(res, 1, parcel);
2127 PyList_Append(self->events, res);
2128 Py_DECREF(res);
2129 } else
2130 PyErr_Clear(); /* FIXME: propagate error */
2131}
2132
2133/* -------------------------------------------------------------------- */
2134/* methods (in alphabetical order) */
2135
2136static PyObject*
2137treebuilder_data(TreeBuilderObject* self, PyObject* args)
2138{
2139 PyObject* data;
2140 if (!PyArg_ParseTuple(args, "O:data", &data))
2141 return NULL;
2142
2143 return treebuilder_handle_data(self, data);
2144}
2145
2146static PyObject*
2147treebuilder_end(TreeBuilderObject* self, PyObject* args)
2148{
2149 PyObject* tag;
2150 if (!PyArg_ParseTuple(args, "O:end", &tag))
2151 return NULL;
2152
2153 return treebuilder_handle_end(self, tag);
2154}
2155
2156LOCAL(PyObject*)
2157treebuilder_done(TreeBuilderObject* self)
2158{
2159 PyObject* res;
2160
2161 /* FIXME: check stack size? */
2162
2163 if (self->root)
2164 res = self->root;
2165 else
2166 res = Py_None;
2167
2168 Py_INCREF(res);
2169 return res;
2170}
2171
2172static PyObject*
2173treebuilder_close(TreeBuilderObject* self, PyObject* args)
2174{
2175 if (!PyArg_ParseTuple(args, ":close"))
2176 return NULL;
2177
2178 return treebuilder_done(self);
2179}
2180
2181static PyObject*
2182treebuilder_start(TreeBuilderObject* self, PyObject* args)
2183{
2184 PyObject* tag;
2185 PyObject* attrib = Py_None;
2186 if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib))
2187 return NULL;
2188
2189 return treebuilder_handle_start(self, tag, attrib);
2190}
2191
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002192static PyMethodDef treebuilder_methods[] = {
2193 {"data", (PyCFunction) treebuilder_data, METH_VARARGS},
2194 {"start", (PyCFunction) treebuilder_start, METH_VARARGS},
2195 {"end", (PyCFunction) treebuilder_end, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002196 {"close", (PyCFunction) treebuilder_close, METH_VARARGS},
2197 {NULL, NULL}
2198};
2199
Neal Norwitz227b5332006-03-22 09:28:35 +00002200static PyTypeObject TreeBuilder_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002201 PyVarObject_HEAD_INIT(NULL, 0)
2202 "TreeBuilder", sizeof(TreeBuilderObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002203 /* methods */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002204 (destructor)treebuilder_dealloc, /* tp_dealloc */
2205 0, /* tp_print */
2206 0, /* tp_getattr */
2207 0, /* tp_setattr */
2208 0, /* tp_reserved */
2209 0, /* tp_repr */
2210 0, /* tp_as_number */
2211 0, /* tp_as_sequence */
2212 0, /* tp_as_mapping */
2213 0, /* tp_hash */
2214 0, /* tp_call */
2215 0, /* tp_str */
2216 0, /* tp_getattro */
2217 0, /* tp_setattro */
2218 0, /* tp_as_buffer */
Eli Bendersky48d358b2012-05-30 17:57:50 +03002219 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
2220 /* tp_flags */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002221 0, /* tp_doc */
Eli Bendersky48d358b2012-05-30 17:57:50 +03002222 (traverseproc)treebuilder_gc_traverse, /* tp_traverse */
2223 (inquiry)treebuilder_gc_clear, /* tp_clear */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002224 0, /* tp_richcompare */
2225 0, /* tp_weaklistoffset */
2226 0, /* tp_iter */
2227 0, /* tp_iternext */
2228 treebuilder_methods, /* tp_methods */
2229 0, /* tp_members */
2230 0, /* tp_getset */
2231 0, /* tp_base */
2232 0, /* tp_dict */
2233 0, /* tp_descr_get */
2234 0, /* tp_descr_set */
2235 0, /* tp_dictoffset */
2236 (initproc)treebuilder_init, /* tp_init */
2237 PyType_GenericAlloc, /* tp_alloc */
2238 treebuilder_new, /* tp_new */
2239 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002240};
2241
2242/* ==================================================================== */
2243/* the expat interface */
2244
2245#if defined(USE_EXPAT)
2246
2247#include "expat.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002248#include "pyexpat.h"
Eli Bendersky20d41742012-06-01 09:48:37 +03002249static struct PyExpat_CAPI *expat_capi;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002250#define EXPAT(func) (expat_capi->func)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002251
Eli Bendersky52467b12012-06-01 07:13:08 +03002252static XML_Memory_Handling_Suite ExpatMemoryHandler = {
2253 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
2254
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002255typedef struct {
2256 PyObject_HEAD
2257
2258 XML_Parser parser;
2259
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002260 PyObject *target;
2261 PyObject *entity;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002262
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002263 PyObject *names;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002264
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002265 PyObject *handle_start;
2266 PyObject *handle_data;
2267 PyObject *handle_end;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002268
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002269 PyObject *handle_comment;
2270 PyObject *handle_pi;
2271 PyObject *handle_doctype;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002272
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002273 PyObject *handle_close;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002274
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002275} XMLParserObject;
2276
Neal Norwitz227b5332006-03-22 09:28:35 +00002277static PyTypeObject XMLParser_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002278
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002279#define XMLParser_CheckExact(op) (Py_TYPE(op) == &XMLParser_Type)
2280
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002281/* helpers */
2282
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002283LOCAL(PyObject*)
2284makeuniversal(XMLParserObject* self, const char* string)
2285{
2286 /* convert a UTF-8 tag/attribute name from the expat parser
2287 to a universal name string */
2288
2289 int size = strlen(string);
2290 PyObject* key;
2291 PyObject* value;
2292
2293 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00002294 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002295 if (!key)
2296 return NULL;
2297
2298 value = PyDict_GetItem(self->names, key);
2299
2300 if (value) {
2301 Py_INCREF(value);
2302 } else {
2303 /* new name. convert to universal name, and decode as
2304 necessary */
2305
2306 PyObject* tag;
2307 char* p;
2308 int i;
2309
2310 /* look for namespace separator */
2311 for (i = 0; i < size; i++)
2312 if (string[i] == '}')
2313 break;
2314 if (i != size) {
2315 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002316 tag = PyBytes_FromStringAndSize(NULL, size+1);
2317 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002318 p[0] = '{';
2319 memcpy(p+1, string, size);
2320 size++;
2321 } else {
2322 /* plain name; use key as tag */
2323 Py_INCREF(key);
2324 tag = key;
2325 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002326
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002327 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002328 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00002329 value = PyUnicode_DecodeUTF8(p, size, "strict");
2330 Py_DECREF(tag);
2331 if (!value) {
2332 Py_DECREF(key);
2333 return NULL;
2334 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002335
2336 /* add to names dictionary */
2337 if (PyDict_SetItem(self->names, key, value) < 0) {
2338 Py_DECREF(key);
2339 Py_DECREF(value);
2340 return NULL;
2341 }
2342 }
2343
2344 Py_DECREF(key);
2345 return value;
2346}
2347
Eli Bendersky5b77d812012-03-16 08:20:05 +02002348/* Set the ParseError exception with the given parameters.
2349 * If message is not NULL, it's used as the error string. Otherwise, the
2350 * message string is the default for the given error_code.
2351*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002352static void
Eli Bendersky5b77d812012-03-16 08:20:05 +02002353expat_set_error(enum XML_Error error_code, int line, int column, char *message)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002354{
Eli Bendersky5b77d812012-03-16 08:20:05 +02002355 PyObject *errmsg, *error, *position, *code;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002356
Victor Stinner499dfcf2011-03-21 13:26:24 +01002357 errmsg = PyUnicode_FromFormat("%s: line %d, column %d",
Eli Bendersky5b77d812012-03-16 08:20:05 +02002358 message ? message : EXPAT(ErrorString)(error_code),
2359 line, column);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002360 if (errmsg == NULL)
2361 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002362
Victor Stinner499dfcf2011-03-21 13:26:24 +01002363 error = PyObject_CallFunction(elementtree_parseerror_obj, "O", errmsg);
2364 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002365 if (!error)
2366 return;
2367
Eli Bendersky5b77d812012-03-16 08:20:05 +02002368 /* Add code and position attributes */
2369 code = PyLong_FromLong((long)error_code);
2370 if (!code) {
2371 Py_DECREF(error);
2372 return;
2373 }
2374 if (PyObject_SetAttrString(error, "code", code) == -1) {
2375 Py_DECREF(error);
2376 Py_DECREF(code);
2377 return;
2378 }
2379 Py_DECREF(code);
2380
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002381 position = Py_BuildValue("(ii)", line, column);
2382 if (!position) {
2383 Py_DECREF(error);
2384 return;
2385 }
2386 if (PyObject_SetAttrString(error, "position", position) == -1) {
2387 Py_DECREF(error);
2388 Py_DECREF(position);
2389 return;
2390 }
2391 Py_DECREF(position);
2392
2393 PyErr_SetObject(elementtree_parseerror_obj, error);
2394 Py_DECREF(error);
2395}
2396
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002397/* -------------------------------------------------------------------- */
2398/* handlers */
2399
2400static void
2401expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2402 int data_len)
2403{
2404 PyObject* key;
2405 PyObject* value;
2406 PyObject* res;
2407
2408 if (data_len < 2 || data_in[0] != '&')
2409 return;
2410
Neal Norwitz0269b912007-08-08 06:56:02 +00002411 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002412 if (!key)
2413 return;
2414
2415 value = PyDict_GetItem(self->entity, key);
2416
2417 if (value) {
2418 if (TreeBuilder_CheckExact(self->target))
2419 res = treebuilder_handle_data(
2420 (TreeBuilderObject*) self->target, value
2421 );
2422 else if (self->handle_data)
2423 res = PyObject_CallFunction(self->handle_data, "O", value);
2424 else
2425 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002426 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002427 } else if (!PyErr_Occurred()) {
2428 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00002429 char message[128] = "undefined entity ";
2430 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002431 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02002432 XML_ERROR_UNDEFINED_ENTITY,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002433 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02002434 EXPAT(GetErrorColumnNumber)(self->parser),
2435 message
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002436 );
2437 }
2438
2439 Py_DECREF(key);
2440}
2441
2442static void
2443expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2444 const XML_Char **attrib_in)
2445{
2446 PyObject* res;
2447 PyObject* tag;
2448 PyObject* attrib;
2449 int ok;
2450
2451 /* tag name */
2452 tag = makeuniversal(self, tag_in);
2453 if (!tag)
2454 return; /* parser will look for errors */
2455
2456 /* attributes */
2457 if (attrib_in[0]) {
2458 attrib = PyDict_New();
2459 if (!attrib)
2460 return;
2461 while (attrib_in[0] && attrib_in[1]) {
2462 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00002463 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002464 if (!key || !value) {
2465 Py_XDECREF(value);
2466 Py_XDECREF(key);
2467 Py_DECREF(attrib);
2468 return;
2469 }
2470 ok = PyDict_SetItem(attrib, key, value);
2471 Py_DECREF(value);
2472 Py_DECREF(key);
2473 if (ok < 0) {
2474 Py_DECREF(attrib);
2475 return;
2476 }
2477 attrib_in += 2;
2478 }
2479 } else {
2480 Py_INCREF(Py_None);
2481 attrib = Py_None;
2482 }
2483
Eli Bendersky48d358b2012-05-30 17:57:50 +03002484 /* If we get None, pass an empty dictionary on */
2485 if (attrib == Py_None) {
2486 Py_DECREF(attrib);
2487 attrib = PyDict_New();
2488 if (!attrib)
2489 return;
2490 }
2491
2492 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002493 /* shortcut */
2494 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2495 tag, attrib);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002496 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002497 else if (self->handle_start) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002498 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002499 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002500 res = NULL;
2501
2502 Py_DECREF(tag);
2503 Py_DECREF(attrib);
2504
2505 Py_XDECREF(res);
2506}
2507
2508static void
2509expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2510 int data_len)
2511{
2512 PyObject* data;
2513 PyObject* res;
2514
Neal Norwitz0269b912007-08-08 06:56:02 +00002515 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002516 if (!data)
2517 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002518
2519 if (TreeBuilder_CheckExact(self->target))
2520 /* shortcut */
2521 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
2522 else if (self->handle_data)
2523 res = PyObject_CallFunction(self->handle_data, "O", data);
2524 else
2525 res = NULL;
2526
2527 Py_DECREF(data);
2528
2529 Py_XDECREF(res);
2530}
2531
2532static void
2533expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
2534{
2535 PyObject* tag;
2536 PyObject* res = NULL;
2537
2538 if (TreeBuilder_CheckExact(self->target))
2539 /* shortcut */
2540 /* the standard tree builder doesn't look at the end tag */
2541 res = treebuilder_handle_end(
2542 (TreeBuilderObject*) self->target, Py_None
2543 );
2544 else if (self->handle_end) {
2545 tag = makeuniversal(self, tag_in);
2546 if (tag) {
2547 res = PyObject_CallFunction(self->handle_end, "O", tag);
2548 Py_DECREF(tag);
2549 }
2550 }
2551
2552 Py_XDECREF(res);
2553}
2554
2555static void
2556expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
2557 const XML_Char *uri)
2558{
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002559 PyObject* sprefix = NULL;
2560 PyObject* suri = NULL;
2561
2562 suri = PyUnicode_DecodeUTF8(uri, strlen(uri), "strict");
2563 if (!suri)
2564 return;
2565
2566 if (prefix)
2567 sprefix = PyUnicode_DecodeUTF8(prefix, strlen(prefix), "strict");
2568 else
2569 sprefix = PyUnicode_FromString("");
2570 if (!sprefix) {
2571 Py_DECREF(suri);
2572 return;
2573 }
2574
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002575 treebuilder_handle_namespace(
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002576 (TreeBuilderObject*) self->target, 1, sprefix, suri
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002577 );
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002578
2579 Py_DECREF(sprefix);
2580 Py_DECREF(suri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002581}
2582
2583static void
2584expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
2585{
2586 treebuilder_handle_namespace(
2587 (TreeBuilderObject*) self->target, 0, NULL, NULL
2588 );
2589}
2590
2591static void
2592expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
2593{
2594 PyObject* comment;
2595 PyObject* res;
2596
2597 if (self->handle_comment) {
Neal Norwitz0269b912007-08-08 06:56:02 +00002598 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002599 if (comment) {
2600 res = PyObject_CallFunction(self->handle_comment, "O", comment);
2601 Py_XDECREF(res);
2602 Py_DECREF(comment);
2603 }
2604 }
2605}
2606
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002607static void
2608expat_start_doctype_handler(XMLParserObject *self,
2609 const XML_Char *doctype_name,
2610 const XML_Char *sysid,
2611 const XML_Char *pubid,
2612 int has_internal_subset)
2613{
2614 PyObject *self_pyobj = (PyObject *)self;
2615 PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
2616 PyObject *parser_doctype = NULL;
2617 PyObject *res = NULL;
2618
2619 doctype_name_obj = makeuniversal(self, doctype_name);
2620 if (!doctype_name_obj)
2621 return;
2622
2623 if (sysid) {
2624 sysid_obj = makeuniversal(self, sysid);
2625 if (!sysid_obj) {
2626 Py_DECREF(doctype_name_obj);
2627 return;
2628 }
2629 } else {
2630 Py_INCREF(Py_None);
2631 sysid_obj = Py_None;
2632 }
2633
2634 if (pubid) {
2635 pubid_obj = makeuniversal(self, pubid);
2636 if (!pubid_obj) {
2637 Py_DECREF(doctype_name_obj);
2638 Py_DECREF(sysid_obj);
2639 return;
2640 }
2641 } else {
2642 Py_INCREF(Py_None);
2643 pubid_obj = Py_None;
2644 }
2645
2646 /* If the target has a handler for doctype, call it. */
2647 if (self->handle_doctype) {
2648 res = PyObject_CallFunction(self->handle_doctype, "OOO",
2649 doctype_name_obj, pubid_obj, sysid_obj);
2650 Py_CLEAR(res);
2651 }
2652
2653 /* Now see if the parser itself has a doctype method. If yes and it's
2654 * a subclass, call it but warn about deprecation. If it's not a subclass
2655 * (i.e. vanilla XMLParser), do nothing.
2656 */
2657 parser_doctype = PyObject_GetAttrString(self_pyobj, "doctype");
2658 if (parser_doctype) {
2659 if (!XMLParser_CheckExact(self_pyobj)) {
2660 if (PyErr_WarnEx(PyExc_DeprecationWarning,
2661 "This method of XMLParser is deprecated. Define"
2662 " doctype() method on the TreeBuilder target.",
2663 1) < 0) {
2664 goto clear;
2665 }
2666 res = PyObject_CallFunction(parser_doctype, "OOO",
2667 doctype_name_obj, pubid_obj, sysid_obj);
2668 Py_CLEAR(res);
2669 }
2670 }
2671
2672clear:
2673 Py_XDECREF(parser_doctype);
2674 Py_DECREF(doctype_name_obj);
2675 Py_DECREF(pubid_obj);
2676 Py_DECREF(sysid_obj);
2677}
2678
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002679static void
2680expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
2681 const XML_Char* data_in)
2682{
2683 PyObject* target;
2684 PyObject* data;
2685 PyObject* res;
2686
2687 if (self->handle_pi) {
Neal Norwitz0269b912007-08-08 06:56:02 +00002688 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
2689 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002690 if (target && data) {
2691 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
2692 Py_XDECREF(res);
2693 Py_DECREF(data);
2694 Py_DECREF(target);
2695 } else {
2696 Py_XDECREF(data);
2697 Py_XDECREF(target);
2698 }
2699 }
2700}
2701
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002702static int
2703expat_unknown_encoding_handler(XMLParserObject *self, const XML_Char *name,
2704 XML_Encoding *info)
2705{
2706 PyObject* u;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002707 unsigned char s[256];
2708 int i;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002709 void *data;
2710 unsigned int kind;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002711
2712 memset(info, 0, sizeof(XML_Encoding));
2713
2714 for (i = 0; i < 256; i++)
2715 s[i] = i;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002716
Fredrik Lundhc3389992005-12-25 11:40:19 +00002717 u = PyUnicode_Decode((char*) s, 256, name, "replace");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002718 if (!u)
2719 return XML_STATUS_ERROR;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002720 if (PyUnicode_READY(u))
2721 return XML_STATUS_ERROR;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002722
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002723 if (PyUnicode_GET_LENGTH(u) != 256) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002724 Py_DECREF(u);
2725 return XML_STATUS_ERROR;
2726 }
2727
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002728 kind = PyUnicode_KIND(u);
2729 data = PyUnicode_DATA(u);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002730 for (i = 0; i < 256; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002731 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
2732 if (ch != Py_UNICODE_REPLACEMENT_CHARACTER)
2733 info->map[i] = ch;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002734 else
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002735 info->map[i] = -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002736 }
2737
2738 Py_DECREF(u);
2739
2740 return XML_STATUS_OK;
2741}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002742
2743/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002744
Eli Bendersky52467b12012-06-01 07:13:08 +03002745static PyObject *
2746xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002747{
Eli Bendersky52467b12012-06-01 07:13:08 +03002748 XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
2749 if (self) {
2750 self->parser = NULL;
2751 self->target = self->entity = self->names = NULL;
2752 self->handle_start = self->handle_data = self->handle_end = NULL;
2753 self->handle_comment = self->handle_pi = self->handle_close = NULL;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002754 self->handle_doctype = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002755 }
Eli Bendersky52467b12012-06-01 07:13:08 +03002756 return (PyObject *)self;
2757}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002758
Eli Bendersky52467b12012-06-01 07:13:08 +03002759static int
2760xmlparser_init(PyObject *self, PyObject *args, PyObject *kwds)
2761{
2762 XMLParserObject *self_xp = (XMLParserObject *)self;
2763 PyObject *target = NULL, *html = NULL;
2764 char *encoding = NULL;
2765 static char *kwlist[] = {"html", "target", "encoding"};
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002766
Eli Bendersky52467b12012-06-01 07:13:08 +03002767 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|OOz:XMLParser", kwlist,
2768 &html, &target, &encoding)) {
2769 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002770 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002771
Eli Bendersky52467b12012-06-01 07:13:08 +03002772 self_xp->entity = PyDict_New();
2773 if (!self_xp->entity)
2774 return -1;
2775
2776 self_xp->names = PyDict_New();
2777 if (!self_xp->names) {
2778 Py_XDECREF(self_xp->entity);
2779 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002780 }
2781
Eli Bendersky52467b12012-06-01 07:13:08 +03002782 self_xp->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
2783 if (!self_xp->parser) {
2784 Py_XDECREF(self_xp->entity);
2785 Py_XDECREF(self_xp->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002786 PyErr_NoMemory();
Eli Bendersky52467b12012-06-01 07:13:08 +03002787 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002788 }
2789
Eli Bendersky52467b12012-06-01 07:13:08 +03002790 if (target) {
2791 Py_INCREF(target);
2792 } else {
Eli Bendersky58d548d2012-05-29 15:45:16 +03002793 target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002794 if (!target) {
Eli Bendersky52467b12012-06-01 07:13:08 +03002795 Py_XDECREF(self_xp->entity);
2796 Py_XDECREF(self_xp->names);
2797 EXPAT(ParserFree)(self_xp->parser);
2798 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002799 }
Eli Bendersky52467b12012-06-01 07:13:08 +03002800 }
2801 self_xp->target = target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002802
Eli Bendersky52467b12012-06-01 07:13:08 +03002803 self_xp->handle_start = PyObject_GetAttrString(target, "start");
2804 self_xp->handle_data = PyObject_GetAttrString(target, "data");
2805 self_xp->handle_end = PyObject_GetAttrString(target, "end");
2806 self_xp->handle_comment = PyObject_GetAttrString(target, "comment");
2807 self_xp->handle_pi = PyObject_GetAttrString(target, "pi");
2808 self_xp->handle_close = PyObject_GetAttrString(target, "close");
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002809 self_xp->handle_doctype = PyObject_GetAttrString(target, "doctype");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002810
2811 PyErr_Clear();
Eli Bendersky52467b12012-06-01 07:13:08 +03002812
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002813 /* configure parser */
Eli Bendersky52467b12012-06-01 07:13:08 +03002814 EXPAT(SetUserData)(self_xp->parser, self_xp);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002815 EXPAT(SetElementHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03002816 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002817 (XML_StartElementHandler) expat_start_handler,
2818 (XML_EndElementHandler) expat_end_handler
2819 );
2820 EXPAT(SetDefaultHandlerExpand)(
Eli Bendersky52467b12012-06-01 07:13:08 +03002821 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002822 (XML_DefaultHandler) expat_default_handler
2823 );
2824 EXPAT(SetCharacterDataHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03002825 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002826 (XML_CharacterDataHandler) expat_data_handler
2827 );
Eli Bendersky52467b12012-06-01 07:13:08 +03002828 if (self_xp->handle_comment)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002829 EXPAT(SetCommentHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03002830 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002831 (XML_CommentHandler) expat_comment_handler
2832 );
Eli Bendersky52467b12012-06-01 07:13:08 +03002833 if (self_xp->handle_pi)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002834 EXPAT(SetProcessingInstructionHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03002835 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002836 (XML_ProcessingInstructionHandler) expat_pi_handler
2837 );
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002838 EXPAT(SetStartDoctypeDeclHandler)(
2839 self_xp->parser,
2840 (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
2841 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002842 EXPAT(SetUnknownEncodingHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03002843 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002844 (XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL
2845 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002846
Eli Bendersky52467b12012-06-01 07:13:08 +03002847 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002848}
2849
Eli Bendersky52467b12012-06-01 07:13:08 +03002850static int
2851xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
2852{
2853 Py_VISIT(self->handle_close);
2854 Py_VISIT(self->handle_pi);
2855 Py_VISIT(self->handle_comment);
2856 Py_VISIT(self->handle_end);
2857 Py_VISIT(self->handle_data);
2858 Py_VISIT(self->handle_start);
2859
2860 Py_VISIT(self->target);
2861 Py_VISIT(self->entity);
2862 Py_VISIT(self->names);
2863
2864 return 0;
2865}
2866
2867static int
2868xmlparser_gc_clear(XMLParserObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002869{
2870 EXPAT(ParserFree)(self->parser);
2871
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002872 Py_XDECREF(self->handle_close);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002873 Py_XDECREF(self->handle_pi);
2874 Py_XDECREF(self->handle_comment);
2875 Py_XDECREF(self->handle_end);
2876 Py_XDECREF(self->handle_data);
2877 Py_XDECREF(self->handle_start);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002878 Py_XDECREF(self->handle_doctype);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002879
Eli Bendersky52467b12012-06-01 07:13:08 +03002880 Py_XDECREF(self->target);
2881 Py_XDECREF(self->entity);
2882 Py_XDECREF(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002883
Eli Bendersky52467b12012-06-01 07:13:08 +03002884 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002885}
2886
Eli Bendersky52467b12012-06-01 07:13:08 +03002887static void
2888xmlparser_dealloc(XMLParserObject* self)
2889{
2890 PyObject_GC_UnTrack(self);
2891 xmlparser_gc_clear(self);
2892 Py_TYPE(self)->tp_free((PyObject *)self);
2893}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002894
2895LOCAL(PyObject*)
2896expat_parse(XMLParserObject* self, char* data, int data_len, int final)
2897{
2898 int ok;
2899
2900 ok = EXPAT(Parse)(self->parser, data, data_len, final);
2901
2902 if (PyErr_Occurred())
2903 return NULL;
2904
2905 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002906 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02002907 EXPAT(GetErrorCode)(self->parser),
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002908 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02002909 EXPAT(GetErrorColumnNumber)(self->parser),
2910 NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002911 );
2912 return NULL;
2913 }
2914
2915 Py_RETURN_NONE;
2916}
2917
2918static PyObject*
2919xmlparser_close(XMLParserObject* self, PyObject* args)
2920{
2921 /* end feeding data to parser */
2922
2923 PyObject* res;
2924 if (!PyArg_ParseTuple(args, ":close"))
2925 return NULL;
2926
2927 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002928 if (!res)
2929 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002930
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002931 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002932 Py_DECREF(res);
2933 return treebuilder_done((TreeBuilderObject*) self->target);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002934 } if (self->handle_close) {
2935 Py_DECREF(res);
2936 return PyObject_CallFunction(self->handle_close, "");
2937 } else
2938 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002939}
2940
2941static PyObject*
2942xmlparser_feed(XMLParserObject* self, PyObject* args)
2943{
2944 /* feed data to parser */
2945
2946 char* data;
2947 int data_len;
2948 if (!PyArg_ParseTuple(args, "s#:feed", &data, &data_len))
2949 return NULL;
2950
2951 return expat_parse(self, data, data_len, 0);
2952}
2953
2954static PyObject*
2955xmlparser_parse(XMLParserObject* self, PyObject* args)
2956{
2957 /* (internal) parse until end of input stream */
2958
2959 PyObject* reader;
2960 PyObject* buffer;
Eli Benderskyf996e772012-03-16 05:53:30 +02002961 PyObject* temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002962 PyObject* res;
2963
2964 PyObject* fileobj;
2965 if (!PyArg_ParseTuple(args, "O:_parse", &fileobj))
2966 return NULL;
2967
2968 reader = PyObject_GetAttrString(fileobj, "read");
2969 if (!reader)
2970 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002971
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002972 /* read from open file object */
2973 for (;;) {
2974
2975 buffer = PyObject_CallFunction(reader, "i", 64*1024);
2976
2977 if (!buffer) {
2978 /* read failed (e.g. due to KeyboardInterrupt) */
2979 Py_DECREF(reader);
2980 return NULL;
2981 }
2982
Eli Benderskyf996e772012-03-16 05:53:30 +02002983 if (PyUnicode_CheckExact(buffer)) {
2984 /* A unicode object is encoded into bytes using UTF-8 */
2985 if (PyUnicode_GET_SIZE(buffer) == 0) {
2986 Py_DECREF(buffer);
2987 break;
2988 }
2989 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
2990 if (!temp) {
2991 /* Propagate exception from PyUnicode_AsEncodedString */
2992 Py_DECREF(buffer);
2993 Py_DECREF(reader);
2994 return NULL;
2995 }
2996
2997 /* Here we no longer need the original buffer since it contains
2998 * unicode. Make it point to the encoded bytes object.
2999 */
3000 Py_DECREF(buffer);
3001 buffer = temp;
3002 }
3003 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003004 Py_DECREF(buffer);
3005 break;
3006 }
3007
3008 res = expat_parse(
Christian Heimes72b710a2008-05-26 13:28:38 +00003009 self, PyBytes_AS_STRING(buffer), PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003010 );
3011
3012 Py_DECREF(buffer);
3013
3014 if (!res) {
3015 Py_DECREF(reader);
3016 return NULL;
3017 }
3018 Py_DECREF(res);
3019
3020 }
3021
3022 Py_DECREF(reader);
3023
3024 res = expat_parse(self, "", 0, 1);
3025
3026 if (res && TreeBuilder_CheckExact(self->target)) {
3027 Py_DECREF(res);
3028 return treebuilder_done((TreeBuilderObject*) self->target);
3029 }
3030
3031 return res;
3032}
3033
3034static PyObject*
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003035xmlparser_doctype(XMLParserObject *self, PyObject *args)
3036{
3037 Py_RETURN_NONE;
3038}
3039
3040static PyObject*
3041xmlparser_setevents(XMLParserObject *self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003042{
3043 /* activate element event reporting */
3044
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003045 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003046 TreeBuilderObject* target;
3047
3048 PyObject* events; /* event collector */
3049 PyObject* event_set = Py_None;
3050 if (!PyArg_ParseTuple(args, "O!|O:_setevents", &PyList_Type, &events,
3051 &event_set))
3052 return NULL;
3053
3054 if (!TreeBuilder_CheckExact(self->target)) {
3055 PyErr_SetString(
3056 PyExc_TypeError,
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003057 "event handling only supported for ElementTree.TreeBuilder "
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003058 "targets"
3059 );
3060 return NULL;
3061 }
3062
3063 target = (TreeBuilderObject*) self->target;
3064
3065 Py_INCREF(events);
3066 Py_XDECREF(target->events);
3067 target->events = events;
3068
3069 /* clear out existing events */
3070 Py_XDECREF(target->start_event_obj); target->start_event_obj = NULL;
3071 Py_XDECREF(target->end_event_obj); target->end_event_obj = NULL;
3072 Py_XDECREF(target->start_ns_event_obj); target->start_ns_event_obj = NULL;
3073 Py_XDECREF(target->end_ns_event_obj); target->end_ns_event_obj = NULL;
3074
3075 if (event_set == Py_None) {
3076 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003077 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003078 Py_RETURN_NONE;
3079 }
3080
3081 if (!PyTuple_Check(event_set)) /* FIXME: handle arbitrary sequences */
3082 goto error;
3083
3084 for (i = 0; i < PyTuple_GET_SIZE(event_set); i++) {
3085 PyObject* item = PyTuple_GET_ITEM(event_set, i);
3086 char* event;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003087 if (PyUnicode_Check(item)) {
3088 event = _PyUnicode_AsString(item);
Victor Stinner0477bf32010-03-22 12:11:44 +00003089 if (event == NULL)
3090 goto error;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003091 } else if (PyBytes_Check(item))
3092 event = PyBytes_AS_STRING(item);
3093 else {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003094 goto error;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003095 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003096 if (strcmp(event, "start") == 0) {
3097 Py_INCREF(item);
3098 target->start_event_obj = item;
3099 } else if (strcmp(event, "end") == 0) {
3100 Py_INCREF(item);
3101 Py_XDECREF(target->end_event_obj);
3102 target->end_event_obj = item;
3103 } else if (strcmp(event, "start-ns") == 0) {
3104 Py_INCREF(item);
3105 Py_XDECREF(target->start_ns_event_obj);
3106 target->start_ns_event_obj = item;
3107 EXPAT(SetNamespaceDeclHandler)(
3108 self->parser,
3109 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3110 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3111 );
3112 } else if (strcmp(event, "end-ns") == 0) {
3113 Py_INCREF(item);
3114 Py_XDECREF(target->end_ns_event_obj);
3115 target->end_ns_event_obj = item;
3116 EXPAT(SetNamespaceDeclHandler)(
3117 self->parser,
3118 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3119 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3120 );
3121 } else {
3122 PyErr_Format(
3123 PyExc_ValueError,
3124 "unknown event '%s'", event
3125 );
3126 return NULL;
3127 }
3128 }
3129
3130 Py_RETURN_NONE;
3131
3132 error:
3133 PyErr_SetString(
3134 PyExc_TypeError,
3135 "invalid event tuple"
3136 );
3137 return NULL;
3138}
3139
3140static PyMethodDef xmlparser_methods[] = {
3141 {"feed", (PyCFunction) xmlparser_feed, METH_VARARGS},
3142 {"close", (PyCFunction) xmlparser_close, METH_VARARGS},
3143 {"_parse", (PyCFunction) xmlparser_parse, METH_VARARGS},
3144 {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003145 {"doctype", (PyCFunction) xmlparser_doctype, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003146 {NULL, NULL}
3147};
3148
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003149static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003150xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003151{
Alexander Belopolskye239d232010-12-08 23:31:48 +00003152 if (PyUnicode_Check(nameobj)) {
3153 PyObject* res;
3154 if (PyUnicode_CompareWithASCIIString(nameobj, "entity") == 0)
3155 res = self->entity;
3156 else if (PyUnicode_CompareWithASCIIString(nameobj, "target") == 0)
3157 res = self->target;
3158 else if (PyUnicode_CompareWithASCIIString(nameobj, "version") == 0) {
3159 return PyUnicode_FromFormat(
3160 "Expat %d.%d.%d", XML_MAJOR_VERSION,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003161 XML_MINOR_VERSION, XML_MICRO_VERSION);
Alexander Belopolskye239d232010-12-08 23:31:48 +00003162 }
3163 else
3164 goto generic;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003165
Alexander Belopolskye239d232010-12-08 23:31:48 +00003166 Py_INCREF(res);
3167 return res;
3168 }
3169 generic:
3170 return PyObject_GenericGetAttr((PyObject*) self, nameobj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003171}
3172
Neal Norwitz227b5332006-03-22 09:28:35 +00003173static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003174 PyVarObject_HEAD_INIT(NULL, 0)
3175 "XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003176 /* methods */
Eli Bendersky52467b12012-06-01 07:13:08 +03003177 (destructor)xmlparser_dealloc, /* tp_dealloc */
3178 0, /* tp_print */
3179 0, /* tp_getattr */
3180 0, /* tp_setattr */
3181 0, /* tp_reserved */
3182 0, /* tp_repr */
3183 0, /* tp_as_number */
3184 0, /* tp_as_sequence */
3185 0, /* tp_as_mapping */
3186 0, /* tp_hash */
3187 0, /* tp_call */
3188 0, /* tp_str */
3189 (getattrofunc)xmlparser_getattro, /* tp_getattro */
3190 0, /* tp_setattro */
3191 0, /* tp_as_buffer */
3192 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3193 /* tp_flags */
3194 0, /* tp_doc */
3195 (traverseproc)xmlparser_gc_traverse, /* tp_traverse */
3196 (inquiry)xmlparser_gc_clear, /* tp_clear */
3197 0, /* tp_richcompare */
3198 0, /* tp_weaklistoffset */
3199 0, /* tp_iter */
3200 0, /* tp_iternext */
3201 xmlparser_methods, /* tp_methods */
3202 0, /* tp_members */
3203 0, /* tp_getset */
3204 0, /* tp_base */
3205 0, /* tp_dict */
3206 0, /* tp_descr_get */
3207 0, /* tp_descr_set */
3208 0, /* tp_dictoffset */
3209 (initproc)xmlparser_init, /* tp_init */
3210 PyType_GenericAlloc, /* tp_alloc */
3211 xmlparser_new, /* tp_new */
3212 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003213};
3214
3215#endif
3216
3217/* ==================================================================== */
3218/* python module interface */
3219
3220static PyMethodDef _functions[] = {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003221 {"SubElement", (PyCFunction) subelement, METH_VARARGS|METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003222 {NULL, NULL}
3223};
3224
Martin v. Löwis1a214512008-06-11 05:26:20 +00003225
3226static struct PyModuleDef _elementtreemodule = {
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003227 PyModuleDef_HEAD_INIT,
3228 "_elementtree",
3229 NULL,
3230 -1,
3231 _functions,
3232 NULL,
3233 NULL,
3234 NULL,
3235 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00003236};
3237
Neal Norwitzf6657e62006-12-28 04:47:50 +00003238PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00003239PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003240{
Eli Bendersky828efde2012-04-05 05:40:58 +03003241 PyObject *m, *g, *temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003242 char* bootstrap;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003243
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003244 /* Initialize object types */
3245 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003246 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003247 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003248 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003249#if defined(USE_EXPAT)
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003250 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003251 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003252#endif
3253
Martin v. Löwis1a214512008-06-11 05:26:20 +00003254 m = PyModule_Create(&_elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003255 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00003256 return NULL;
3257
3258 /* The code below requires that the module gets already added
3259 to sys.modules. */
3260 PyDict_SetItemString(PyImport_GetModuleDict(),
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003261 _elementtreemodule.m_name,
3262 m);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003263
3264 /* python glue code */
3265
3266 g = PyDict_New();
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003267 if (!g)
Martin v. Löwis1a214512008-06-11 05:26:20 +00003268 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003269
3270 PyDict_SetItemString(g, "__builtins__", PyEval_GetBuiltins());
3271
3272 bootstrap = (
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003273 "def iter(node, tag=None):\n" /* helper */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003274 " if tag == '*':\n"
3275 " tag = None\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003276 " if tag is None or node.tag == tag:\n"
3277 " yield node\n"
3278 " for node in node:\n"
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003279 " for node in iter(node, tag):\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003280 " yield node\n"
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003281
3282 "def itertext(node):\n" /* helper */
3283 " if node.text:\n"
3284 " yield node.text\n"
3285 " for e in node:\n"
3286 " for s in e.itertext():\n"
3287 " yield s\n"
3288 " if e.tail:\n"
3289 " yield e.tail\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003290
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003291 );
3292
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003293 if (!PyRun_String(bootstrap, Py_file_input, g, NULL))
3294 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003295
Eli Bendersky828efde2012-04-05 05:40:58 +03003296 if (!(temp = PyImport_ImportModule("copy")))
3297 return NULL;
3298 elementtree_deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
3299 Py_XDECREF(temp);
3300
3301 if (!(elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
3302 return NULL;
3303
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003304 elementtree_iter_obj = PyDict_GetItemString(g, "iter");
3305 elementtree_itertext_obj = PyDict_GetItemString(g, "itertext");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003306
Eli Bendersky20d41742012-06-01 09:48:37 +03003307 /* link against pyexpat */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003308 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
3309 if (expat_capi) {
3310 /* check that it's usable */
3311 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
3312 expat_capi->size < sizeof(struct PyExpat_CAPI) ||
3313 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
3314 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
Eli Bendersky52467b12012-06-01 07:13:08 +03003315 expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003316 expat_capi = NULL;
Eli Bendersky52467b12012-06-01 07:13:08 +03003317 }
3318 }
3319 if (!expat_capi) {
3320 PyErr_SetString(
3321 PyExc_RuntimeError, "cannot load dispatch table from pyexpat"
3322 );
3323 return NULL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003324 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003325
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003326 elementtree_parseerror_obj = PyErr_NewException(
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003327 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003328 );
3329 Py_INCREF(elementtree_parseerror_obj);
3330 PyModule_AddObject(m, "ParseError", elementtree_parseerror_obj);
3331
Eli Bendersky092af1f2012-03-04 07:14:03 +02003332 Py_INCREF((PyObject *)&Element_Type);
3333 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
3334
Eli Bendersky58d548d2012-05-29 15:45:16 +03003335 Py_INCREF((PyObject *)&TreeBuilder_Type);
3336 PyModule_AddObject(m, "TreeBuilder", (PyObject *)&TreeBuilder_Type);
3337
Eli Bendersky52467b12012-06-01 07:13:08 +03003338#if defined(USE_EXPAT)
3339 Py_INCREF((PyObject *)&XMLParser_Type);
3340 PyModule_AddObject(m, "XMLParser", (PyObject *)&XMLParser_Type);
3341#endif
3342
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003343 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003344}