blob: 12607fae84ca2d5eb0fc8e05e1076ca6e6a8a0b5 [file] [log] [blame]
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001/*
2 * ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003 * $Id: _elementtree.c 3473 2009-01-11 22:53:55Z fredrik $
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
5 * elementtree accelerator
6 *
7 * History:
8 * 1999-06-20 fl created (as part of sgmlop)
9 * 2001-05-29 fl effdom edition
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000010 * 2003-02-27 fl elementtree edition (alpha)
11 * 2004-06-03 fl updates for elementtree 1.2
Florent Xiclunaf15351d2010-03-13 23:24:31 +000012 * 2005-01-05 fl major optimization effort
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000013 * 2005-01-11 fl first public release (cElementTree 0.8)
14 * 2005-01-12 fl split element object into base and extras
15 * 2005-01-13 fl use tagged pointers for tail/text (cElementTree 0.9)
16 * 2005-01-17 fl added treebuilder close method
17 * 2005-01-17 fl fixed crash in getchildren
18 * 2005-01-18 fl removed observer api, added iterparse (cElementTree 0.9.3)
19 * 2005-01-23 fl revised iterparse api; added namespace event support (0.9.8)
20 * 2005-01-26 fl added VERSION module property (cElementTree 1.0)
21 * 2005-01-28 fl added remove method (1.0.1)
22 * 2005-03-01 fl added iselement function; fixed makeelement aliasing (1.0.2)
23 * 2005-03-13 fl export Comment and ProcessingInstruction/PI helpers
24 * 2005-03-26 fl added Comment and PI support to XMLParser
25 * 2005-03-27 fl event optimizations; complain about bogus events
26 * 2005-08-08 fl fixed read error handling in parse
27 * 2005-08-11 fl added runtime test for copy workaround (1.0.3)
28 * 2005-12-13 fl added expat_capi support (for xml.etree) (1.0.4)
29 * 2005-12-16 fl added support for non-standard encodings
Fredrik Lundh44ed4db2006-03-12 21:06:35 +000030 * 2006-03-08 fl fixed a couple of potential null-refs and leaks
31 * 2006-03-12 fl merge in 2.5 ssize_t changes
Florent Xiclunaf15351d2010-03-13 23:24:31 +000032 * 2007-08-25 fl call custom builder's close method from XMLParser
33 * 2007-08-31 fl added iter, extend from ET 1.3
34 * 2007-09-01 fl fixed ParseError exception, setslice source type, etc
35 * 2007-09-03 fl fixed handling of negative insert indexes
36 * 2007-09-04 fl added itertext from ET 1.3
37 * 2007-09-06 fl added position attribute to ParseError exception
38 * 2008-06-06 fl delay error reporting in iterparse (from Hrvoje Niksic)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000039 *
Florent Xiclunaf15351d2010-03-13 23:24:31 +000040 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
41 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000042 *
43 * info@pythonware.com
44 * http://www.pythonware.com
45 */
46
Fredrik Lundh6d52b552005-12-16 22:06:43 +000047/* Licensed to PSF under a Contributor Agreement. */
Florent Xiclunaf15351d2010-03-13 23:24:31 +000048/* See http://www.python.org/psf/license for licensing details. */
Fredrik Lundh6d52b552005-12-16 22:06:43 +000049
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000050#include "Python.h"
Eli Benderskyebf37a22012-04-03 22:02:37 +030051#include "structmember.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000052
Thomas Wouters00ee7ba2006-08-21 19:07:27 +000053#define VERSION "1.0.6"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000054
55/* -------------------------------------------------------------------- */
56/* configuration */
57
58/* Leave defined to include the expat-based XMLParser type */
59#define USE_EXPAT
60
Florent Xiclunaf15351d2010-03-13 23:24:31 +000061/* Define to do all expat calls via pyexpat's embedded expat library */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000062/* #define USE_PYEXPAT_CAPI */
63
64/* An element can hold this many children without extra memory
65 allocations. */
66#define STATIC_CHILDREN 4
67
68/* For best performance, chose a value so that 80-90% of all nodes
69 have no more than the given number of children. Set this to zero
70 to minimize the size of the element structure itself (this only
71 helps if you have lots of leaf nodes with attributes). */
72
73/* Also note that pymalloc always allocates blocks in multiples of
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010074 eight bytes. For the current C version of ElementTree, this means
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000075 that the number of children should be an even number, at least on
76 32-bit platforms. */
77
78/* -------------------------------------------------------------------- */
79
80#if 0
81static int memory = 0;
82#define ALLOC(size, comment)\
83do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
84#define RELEASE(size, comment)\
85do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
86#else
87#define ALLOC(size, comment)
88#define RELEASE(size, comment)
89#endif
90
91/* compiler tweaks */
92#if defined(_MSC_VER)
93#define LOCAL(type) static __inline type __fastcall
94#else
95#define LOCAL(type) static type
96#endif
97
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000098/* macros used to store 'join' flags in string object pointers. note
99 that all use of text and tail as object pointers must be wrapped in
100 JOIN_OBJ. see comments in the ElementObject definition for more
101 info. */
102#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
103#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
104#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~1))
105
106/* glue functions (see the init function for details) */
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000107static PyObject* elementtree_parseerror_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000108static PyObject* elementtree_deepcopy_obj;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000109static PyObject* elementtree_iter_obj;
110static PyObject* elementtree_itertext_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000111static PyObject* elementpath_obj;
112
113/* helpers */
114
115LOCAL(PyObject*)
116deepcopy(PyObject* object, PyObject* memo)
117{
118 /* do a deep copy of the given object */
119
120 PyObject* args;
121 PyObject* result;
122
123 if (!elementtree_deepcopy_obj) {
124 PyErr_SetString(
125 PyExc_RuntimeError,
126 "deepcopy helper not found"
127 );
128 return NULL;
129 }
130
131 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000132 if (!args)
133 return NULL;
134
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000135 Py_INCREF(object); PyTuple_SET_ITEM(args, 0, (PyObject*) object);
136 Py_INCREF(memo); PyTuple_SET_ITEM(args, 1, (PyObject*) memo);
137
138 result = PyObject_CallObject(elementtree_deepcopy_obj, args);
139
140 Py_DECREF(args);
141
142 return result;
143}
144
145LOCAL(PyObject*)
146list_join(PyObject* list)
147{
148 /* join list elements (destroying the list in the process) */
149
150 PyObject* joiner;
151 PyObject* function;
152 PyObject* args;
153 PyObject* result;
154
155 switch (PyList_GET_SIZE(list)) {
156 case 0:
157 Py_DECREF(list);
Christian Heimes72b710a2008-05-26 13:28:38 +0000158 return PyBytes_FromString("");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000159 case 1:
160 result = PyList_GET_ITEM(list, 0);
161 Py_INCREF(result);
162 Py_DECREF(list);
163 return result;
164 }
165
166 /* two or more elements: slice out a suitable separator from the
167 first member, and use that to join the entire list */
168
169 joiner = PySequence_GetSlice(PyList_GET_ITEM(list, 0), 0, 0);
170 if (!joiner)
171 return NULL;
172
173 function = PyObject_GetAttrString(joiner, "join");
174 if (!function) {
175 Py_DECREF(joiner);
176 return NULL;
177 }
178
179 args = PyTuple_New(1);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000180 if (!args)
181 return NULL;
182
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000183 PyTuple_SET_ITEM(args, 0, list);
184
185 result = PyObject_CallObject(function, args);
186
187 Py_DECREF(args); /* also removes list */
188 Py_DECREF(function);
189 Py_DECREF(joiner);
190
191 return result;
192}
193
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000194/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200195/* the Element type */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000196
197typedef struct {
198
199 /* attributes (a dictionary object), or None if no attributes */
200 PyObject* attrib;
201
202 /* child elements */
203 int length; /* actual number of items */
204 int allocated; /* allocated items */
205
206 /* this either points to _children or to a malloced buffer */
207 PyObject* *children;
208
209 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100210
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000211} ElementObjectExtra;
212
213typedef struct {
214 PyObject_HEAD
215
216 /* element tag (a string). */
217 PyObject* tag;
218
219 /* text before first child. note that this is a tagged pointer;
220 use JOIN_OBJ to get the object pointer. the join flag is used
221 to distinguish lists created by the tree builder from lists
222 assigned to the attribute by application code; the former
223 should be joined before being returned to the user, the latter
224 should be left intact. */
225 PyObject* text;
226
227 /* text after this element, in parent. note that this is a tagged
228 pointer; use JOIN_OBJ to get the object pointer. */
229 PyObject* tail;
230
231 ElementObjectExtra* extra;
232
Eli Benderskyebf37a22012-04-03 22:02:37 +0300233 PyObject *weakreflist; /* For tp_weaklistoffset */
234
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000235} ElementObject;
236
Neal Norwitz227b5332006-03-22 09:28:35 +0000237static PyTypeObject Element_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000238
Christian Heimes90aa7642007-12-19 02:45:37 +0000239#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000240
241/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200242/* Element constructors and destructor */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000243
244LOCAL(int)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200245create_extra(ElementObject* self, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000246{
247 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
248 if (!self->extra)
249 return -1;
250
251 if (!attrib)
252 attrib = Py_None;
253
254 Py_INCREF(attrib);
255 self->extra->attrib = attrib;
256
257 self->extra->length = 0;
258 self->extra->allocated = STATIC_CHILDREN;
259 self->extra->children = self->extra->_children;
260
261 return 0;
262}
263
264LOCAL(void)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200265dealloc_extra(ElementObject* self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000266{
Eli Bendersky08b85292012-04-04 15:55:07 +0300267 ElementObjectExtra *myextra;
268 int i;
269
Eli Benderskyebf37a22012-04-03 22:02:37 +0300270 if (!self->extra)
271 return;
272
273 /* Avoid DECREFs calling into this code again (cycles, etc.)
274 */
Eli Bendersky08b85292012-04-04 15:55:07 +0300275 myextra = self->extra;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300276 self->extra = NULL;
277
278 Py_DECREF(myextra->attrib);
279
Eli Benderskyebf37a22012-04-03 22:02:37 +0300280 for (i = 0; i < myextra->length; i++)
281 Py_DECREF(myextra->children[i]);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000282
Eli Benderskyebf37a22012-04-03 22:02:37 +0300283 if (myextra->children != myextra->_children)
284 PyObject_Free(myextra->children);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000285
Eli Benderskyebf37a22012-04-03 22:02:37 +0300286 PyObject_Free(myextra);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000287}
288
Eli Bendersky092af1f2012-03-04 07:14:03 +0200289/* Convenience internal function to create new Element objects with the given
290 * tag and attributes.
291*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000292LOCAL(PyObject*)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200293create_new_element(PyObject* tag, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000294{
295 ElementObject* self;
296
Eli Bendersky0192ba32012-03-30 16:38:33 +0300297 self = PyObject_GC_New(ElementObject, &Element_Type);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000298 if (self == NULL)
299 return NULL;
300
301 /* use None for empty dictionaries */
302 if (PyDict_CheckExact(attrib) && !PyDict_Size(attrib))
303 attrib = Py_None;
304
305 self->extra = NULL;
306
307 if (attrib != Py_None) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200308 if (create_extra(self, attrib) < 0) {
Thomas Wouters477c8d52006-05-27 19:21:47 +0000309 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000310 return NULL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000311 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000312 }
313
314 Py_INCREF(tag);
315 self->tag = tag;
316
317 Py_INCREF(Py_None);
318 self->text = Py_None;
319
320 Py_INCREF(Py_None);
321 self->tail = Py_None;
322
Eli Benderskyebf37a22012-04-03 22:02:37 +0300323 self->weakreflist = NULL;
324
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000325 ALLOC(sizeof(ElementObject), "create element");
Eli Bendersky0192ba32012-03-30 16:38:33 +0300326 PyObject_GC_Track(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000327 return (PyObject*) self;
328}
329
Eli Bendersky092af1f2012-03-04 07:14:03 +0200330static PyObject *
331element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
332{
333 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
334 if (e != NULL) {
335 Py_INCREF(Py_None);
336 e->tag = Py_None;
337
338 Py_INCREF(Py_None);
339 e->text = Py_None;
340
341 Py_INCREF(Py_None);
342 e->tail = Py_None;
343
344 e->extra = NULL;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300345 e->weakreflist = NULL;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200346 }
347 return (PyObject *)e;
348}
349
Eli Bendersky737b1732012-05-29 06:02:56 +0300350/* Helper function for extracting the attrib dictionary from a keywords dict.
351 * This is required by some constructors/functions in this module that can
352 * either accept attrib as a keyword argument or all attributes splashed
353 * directly into *kwds.
354 * If there is no 'attrib' keyword, return an empty dict.
355 */
356static PyObject*
357get_attrib_from_keywords(PyObject *kwds)
358{
359 PyObject *attrib_str = PyUnicode_FromString("attrib");
360 PyObject *attrib = PyDict_GetItem(kwds, attrib_str);
361
362 if (attrib) {
363 /* If attrib was found in kwds, copy its value and remove it from
364 * kwds
365 */
366 if (!PyDict_Check(attrib)) {
367 Py_DECREF(attrib_str);
368 PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
369 Py_TYPE(attrib)->tp_name);
370 return NULL;
371 }
372 attrib = PyDict_Copy(attrib);
373 PyDict_DelItem(kwds, attrib_str);
374 } else {
375 attrib = PyDict_New();
376 }
377
378 Py_DECREF(attrib_str);
379
380 if (attrib)
381 PyDict_Update(attrib, kwds);
382 return attrib;
383}
384
Eli Bendersky092af1f2012-03-04 07:14:03 +0200385static int
386element_init(PyObject *self, PyObject *args, PyObject *kwds)
387{
388 PyObject *tag;
389 PyObject *tmp;
390 PyObject *attrib = NULL;
391 ElementObject *self_elem;
392
393 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
394 return -1;
395
Eli Bendersky737b1732012-05-29 06:02:56 +0300396 if (attrib) {
397 /* attrib passed as positional arg */
398 attrib = PyDict_Copy(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200399 if (!attrib)
400 return -1;
Eli Bendersky737b1732012-05-29 06:02:56 +0300401 if (kwds) {
402 if (PyDict_Update(attrib, kwds) < 0) {
403 return -1;
404 }
405 }
406 } else if (kwds) {
407 /* have keywords args */
408 attrib = get_attrib_from_keywords(kwds);
409 if (!attrib)
410 return -1;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200411 } else {
Eli Bendersky737b1732012-05-29 06:02:56 +0300412 /* no attrib arg, no kwds, so no attributes */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200413 Py_INCREF(Py_None);
414 attrib = Py_None;
415 }
416
417 self_elem = (ElementObject *)self;
418
419 /* Use None for empty dictionaries */
420 if (PyDict_CheckExact(attrib) && PyDict_Size(attrib) == 0) {
421 Py_INCREF(Py_None);
422 attrib = Py_None;
423 }
424
425 if (attrib != Py_None) {
426 if (create_extra(self_elem, attrib) < 0) {
427 PyObject_Del(self_elem);
428 return -1;
429 }
430 }
431
432 /* If create_extra needed attrib, it took a reference to it, so we can
433 * release ours anyway.
434 */
435 Py_DECREF(attrib);
436
437 /* Replace the objects already pointed to by tag, text and tail. */
438 tmp = self_elem->tag;
439 self_elem->tag = tag;
440 Py_INCREF(tag);
441 Py_DECREF(tmp);
442
443 tmp = self_elem->text;
444 self_elem->text = Py_None;
445 Py_INCREF(Py_None);
446 Py_DECREF(JOIN_OBJ(tmp));
447
448 tmp = self_elem->tail;
449 self_elem->tail = Py_None;
450 Py_INCREF(Py_None);
451 Py_DECREF(JOIN_OBJ(tmp));
452
453 return 0;
454}
455
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000456LOCAL(int)
457element_resize(ElementObject* self, int extra)
458{
459 int size;
460 PyObject* *children;
461
462 /* make sure self->children can hold the given number of extra
463 elements. set an exception and return -1 if allocation failed */
464
465 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200466 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000467
468 size = self->extra->length + extra;
469
470 if (size > self->extra->allocated) {
471 /* use Python 2.4's list growth strategy */
472 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000473 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100474 * which needs at least 4 bytes.
475 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000476 * be safe.
477 */
478 size = size ? size : 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000479 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000480 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100481 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000482 * false alarm always assume at least one child to be safe.
483 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000484 children = PyObject_Realloc(self->extra->children,
485 size * sizeof(PyObject*));
486 if (!children)
487 goto nomemory;
488 } else {
489 children = PyObject_Malloc(size * sizeof(PyObject*));
490 if (!children)
491 goto nomemory;
492 /* copy existing children from static area to malloc buffer */
493 memcpy(children, self->extra->children,
494 self->extra->length * sizeof(PyObject*));
495 }
496 self->extra->children = children;
497 self->extra->allocated = size;
498 }
499
500 return 0;
501
502 nomemory:
503 PyErr_NoMemory();
504 return -1;
505}
506
507LOCAL(int)
508element_add_subelement(ElementObject* self, PyObject* element)
509{
510 /* add a child element to a parent */
511
512 if (element_resize(self, 1) < 0)
513 return -1;
514
515 Py_INCREF(element);
516 self->extra->children[self->extra->length] = element;
517
518 self->extra->length++;
519
520 return 0;
521}
522
523LOCAL(PyObject*)
524element_get_attrib(ElementObject* self)
525{
526 /* return borrowed reference to attrib dictionary */
527 /* note: this function assumes that the extra section exists */
528
529 PyObject* res = self->extra->attrib;
530
531 if (res == Py_None) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000532 Py_DECREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000533 /* create missing dictionary */
534 res = PyDict_New();
535 if (!res)
536 return NULL;
537 self->extra->attrib = res;
538 }
539
540 return res;
541}
542
543LOCAL(PyObject*)
544element_get_text(ElementObject* self)
545{
546 /* return borrowed reference to text attribute */
547
548 PyObject* res = self->text;
549
550 if (JOIN_GET(res)) {
551 res = JOIN_OBJ(res);
552 if (PyList_CheckExact(res)) {
553 res = list_join(res);
554 if (!res)
555 return NULL;
556 self->text = res;
557 }
558 }
559
560 return res;
561}
562
563LOCAL(PyObject*)
564element_get_tail(ElementObject* self)
565{
566 /* return borrowed reference to text attribute */
567
568 PyObject* res = self->tail;
569
570 if (JOIN_GET(res)) {
571 res = JOIN_OBJ(res);
572 if (PyList_CheckExact(res)) {
573 res = list_join(res);
574 if (!res)
575 return NULL;
576 self->tail = res;
577 }
578 }
579
580 return res;
581}
582
583static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300584subelement(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000585{
586 PyObject* elem;
587
588 ElementObject* parent;
589 PyObject* tag;
590 PyObject* attrib = NULL;
591 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
592 &Element_Type, &parent, &tag,
593 &PyDict_Type, &attrib))
594 return NULL;
595
Eli Bendersky737b1732012-05-29 06:02:56 +0300596 if (attrib) {
597 /* attrib passed as positional arg */
598 attrib = PyDict_Copy(attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000599 if (!attrib)
600 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300601 if (kwds) {
602 if (PyDict_Update(attrib, kwds) < 0) {
603 return NULL;
604 }
605 }
606 } else if (kwds) {
607 /* have keyword args */
608 attrib = get_attrib_from_keywords(kwds);
609 if (!attrib)
610 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000611 } else {
Eli Bendersky737b1732012-05-29 06:02:56 +0300612 /* no attrib arg, no kwds, so no attribute */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000613 Py_INCREF(Py_None);
614 attrib = Py_None;
615 }
616
Eli Bendersky092af1f2012-03-04 07:14:03 +0200617 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000618
619 Py_DECREF(attrib);
620
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000621 if (element_add_subelement(parent, elem) < 0) {
622 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000623 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000624 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000625
626 return elem;
627}
628
Eli Bendersky0192ba32012-03-30 16:38:33 +0300629static int
630element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
631{
632 Py_VISIT(self->tag);
633 Py_VISIT(JOIN_OBJ(self->text));
634 Py_VISIT(JOIN_OBJ(self->tail));
635
636 if (self->extra) {
637 int i;
638 Py_VISIT(self->extra->attrib);
639
640 for (i = 0; i < self->extra->length; ++i)
641 Py_VISIT(self->extra->children[i]);
642 }
643 return 0;
644}
645
646static int
647element_gc_clear(ElementObject *self)
648{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300649 Py_CLEAR(self->tag);
Eli Benderskyebf37a22012-04-03 22:02:37 +0300650
651 /* The following is like Py_CLEAR for self->text and self->tail, but
652 * written explicitily because the real pointers hide behind access
653 * macros.
654 */
655 if (self->text) {
656 PyObject *tmp = JOIN_OBJ(self->text);
657 self->text = NULL;
658 Py_DECREF(tmp);
659 }
660
661 if (self->tail) {
662 PyObject *tmp = JOIN_OBJ(self->tail);
663 self->tail = NULL;
664 Py_DECREF(tmp);
665 }
Eli Bendersky0192ba32012-03-30 16:38:33 +0300666
667 /* After dropping all references from extra, it's no longer valid anyway,
Eli Benderskyebf37a22012-04-03 22:02:37 +0300668 * so fully deallocate it.
Eli Bendersky0192ba32012-03-30 16:38:33 +0300669 */
Eli Benderskyebf37a22012-04-03 22:02:37 +0300670 dealloc_extra(self);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300671 return 0;
672}
673
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000674static void
675element_dealloc(ElementObject* self)
676{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300677 PyObject_GC_UnTrack(self);
Eli Benderskyebf37a22012-04-03 22:02:37 +0300678
679 if (self->weakreflist != NULL)
680 PyObject_ClearWeakRefs((PyObject *) self);
681
Eli Bendersky0192ba32012-03-30 16:38:33 +0300682 /* element_gc_clear clears all references and deallocates extra
683 */
684 element_gc_clear(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000685
686 RELEASE(sizeof(ElementObject), "destroy element");
Eli Bendersky092af1f2012-03-04 07:14:03 +0200687 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000688}
689
690/* -------------------------------------------------------------------- */
691/* methods (in alphabetical order) */
692
693static PyObject*
694element_append(ElementObject* self, PyObject* args)
695{
696 PyObject* element;
697 if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element))
698 return NULL;
699
700 if (element_add_subelement(self, element) < 0)
701 return NULL;
702
703 Py_RETURN_NONE;
704}
705
706static PyObject*
Eli Bendersky0192ba32012-03-30 16:38:33 +0300707element_clearmethod(ElementObject* self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000708{
709 if (!PyArg_ParseTuple(args, ":clear"))
710 return NULL;
711
Eli Benderskyebf37a22012-04-03 22:02:37 +0300712 dealloc_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000713
714 Py_INCREF(Py_None);
715 Py_DECREF(JOIN_OBJ(self->text));
716 self->text = Py_None;
717
718 Py_INCREF(Py_None);
719 Py_DECREF(JOIN_OBJ(self->tail));
720 self->tail = Py_None;
721
722 Py_RETURN_NONE;
723}
724
725static PyObject*
726element_copy(ElementObject* self, PyObject* args)
727{
728 int i;
729 ElementObject* element;
730
731 if (!PyArg_ParseTuple(args, ":__copy__"))
732 return NULL;
733
Eli Bendersky092af1f2012-03-04 07:14:03 +0200734 element = (ElementObject*) create_new_element(
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000735 self->tag, (self->extra) ? self->extra->attrib : Py_None
736 );
737 if (!element)
738 return NULL;
739
740 Py_DECREF(JOIN_OBJ(element->text));
741 element->text = self->text;
742 Py_INCREF(JOIN_OBJ(element->text));
743
744 Py_DECREF(JOIN_OBJ(element->tail));
745 element->tail = self->tail;
746 Py_INCREF(JOIN_OBJ(element->tail));
747
748 if (self->extra) {
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100749
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000750 if (element_resize(element, self->extra->length) < 0) {
751 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000752 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000753 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000754
755 for (i = 0; i < self->extra->length; i++) {
756 Py_INCREF(self->extra->children[i]);
757 element->extra->children[i] = self->extra->children[i];
758 }
759
760 element->extra->length = self->extra->length;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100761
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000762 }
763
764 return (PyObject*) element;
765}
766
767static PyObject*
768element_deepcopy(ElementObject* self, PyObject* args)
769{
770 int i;
771 ElementObject* element;
772 PyObject* tag;
773 PyObject* attrib;
774 PyObject* text;
775 PyObject* tail;
776 PyObject* id;
777
778 PyObject* memo;
779 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
780 return NULL;
781
782 tag = deepcopy(self->tag, memo);
783 if (!tag)
784 return NULL;
785
786 if (self->extra) {
787 attrib = deepcopy(self->extra->attrib, memo);
788 if (!attrib) {
789 Py_DECREF(tag);
790 return NULL;
791 }
792 } else {
793 Py_INCREF(Py_None);
794 attrib = Py_None;
795 }
796
Eli Bendersky092af1f2012-03-04 07:14:03 +0200797 element = (ElementObject*) create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000798
799 Py_DECREF(tag);
800 Py_DECREF(attrib);
801
802 if (!element)
803 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100804
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000805 text = deepcopy(JOIN_OBJ(self->text), memo);
806 if (!text)
807 goto error;
808 Py_DECREF(element->text);
809 element->text = JOIN_SET(text, JOIN_GET(self->text));
810
811 tail = deepcopy(JOIN_OBJ(self->tail), memo);
812 if (!tail)
813 goto error;
814 Py_DECREF(element->tail);
815 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
816
817 if (self->extra) {
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100818
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000819 if (element_resize(element, self->extra->length) < 0)
820 goto error;
821
822 for (i = 0; i < self->extra->length; i++) {
823 PyObject* child = deepcopy(self->extra->children[i], memo);
824 if (!child) {
825 element->extra->length = i;
826 goto error;
827 }
828 element->extra->children[i] = child;
829 }
830
831 element->extra->length = self->extra->length;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100832
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000833 }
834
835 /* add object to memo dictionary (so deepcopy won't visit it again) */
Christian Heimes217cfd12007-12-02 14:31:20 +0000836 id = PyLong_FromLong((Py_uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000837 if (!id)
838 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000839
840 i = PyDict_SetItem(memo, id, (PyObject*) element);
841
842 Py_DECREF(id);
843
844 if (i < 0)
845 goto error;
846
847 return (PyObject*) element;
848
849 error:
850 Py_DECREF(element);
851 return NULL;
852}
853
854LOCAL(int)
855checkpath(PyObject* tag)
856{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000857 Py_ssize_t i;
858 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000859
860 /* check if a tag contains an xpath character */
861
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000862#define PATHCHAR(ch) \
863 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000864
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000865 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200866 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
867 void *data = PyUnicode_DATA(tag);
868 unsigned int kind = PyUnicode_KIND(tag);
869 for (i = 0; i < len; i++) {
870 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
871 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000872 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200873 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000874 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200875 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000876 return 1;
877 }
878 return 0;
879 }
Christian Heimes72b710a2008-05-26 13:28:38 +0000880 if (PyBytes_Check(tag)) {
881 char *p = PyBytes_AS_STRING(tag);
882 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000883 if (p[i] == '{')
884 check = 0;
885 else if (p[i] == '}')
886 check = 1;
887 else if (check && PATHCHAR(p[i]))
888 return 1;
889 }
890 return 0;
891 }
892
893 return 1; /* unknown type; might be path expression */
894}
895
896static PyObject*
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000897element_extend(ElementObject* self, PyObject* args)
898{
899 PyObject* seq;
900 Py_ssize_t i, seqlen = 0;
901
902 PyObject* seq_in;
903 if (!PyArg_ParseTuple(args, "O:extend", &seq_in))
904 return NULL;
905
906 seq = PySequence_Fast(seq_in, "");
907 if (!seq) {
908 PyErr_Format(
909 PyExc_TypeError,
910 "expected sequence, not \"%.200s\"", Py_TYPE(seq_in)->tp_name
911 );
912 return NULL;
913 }
914
915 seqlen = PySequence_Size(seq);
916 for (i = 0; i < seqlen; i++) {
917 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
Eli Bendersky396e8fc2012-03-23 14:24:20 +0200918 if (!PyObject_IsInstance(element, (PyObject *)&Element_Type)) {
919 Py_DECREF(seq);
920 PyErr_Format(
921 PyExc_TypeError,
922 "expected an Element, not \"%.200s\"",
923 Py_TYPE(element)->tp_name);
924 return NULL;
925 }
926
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000927 if (element_add_subelement(self, element) < 0) {
928 Py_DECREF(seq);
929 return NULL;
930 }
931 }
932
933 Py_DECREF(seq);
934
935 Py_RETURN_NONE;
936}
937
938static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300939element_find(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000940{
941 int i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000942 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000943 PyObject* namespaces = Py_None;
Eli Bendersky737b1732012-05-29 06:02:56 +0300944 static char *kwlist[] = {"path", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200945
Eli Bendersky737b1732012-05-29 06:02:56 +0300946 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:find", kwlist,
947 &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000948 return NULL;
949
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200950 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200951 _Py_IDENTIFIER(find);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200952 return _PyObject_CallMethodId(
953 elementpath_obj, &PyId_find, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000954 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200955 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000956
957 if (!self->extra)
958 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100959
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000960 for (i = 0; i < self->extra->length; i++) {
961 PyObject* item = self->extra->children[i];
962 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +0000963 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000964 Py_INCREF(item);
965 return item;
966 }
967 }
968
969 Py_RETURN_NONE;
970}
971
972static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300973element_findtext(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000974{
975 int i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000976 PyObject* tag;
977 PyObject* default_value = Py_None;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000978 PyObject* namespaces = Py_None;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200979 _Py_IDENTIFIER(findtext);
Eli Bendersky737b1732012-05-29 06:02:56 +0300980 static char *kwlist[] = {"path", "default", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200981
Eli Bendersky737b1732012-05-29 06:02:56 +0300982 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO:findtext", kwlist,
983 &tag, &default_value, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000984 return NULL;
985
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000986 if (checkpath(tag) || namespaces != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200987 return _PyObject_CallMethodId(
988 elementpath_obj, &PyId_findtext, "OOOO", self, tag, default_value, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000989 );
990
991 if (!self->extra) {
992 Py_INCREF(default_value);
993 return default_value;
994 }
995
996 for (i = 0; i < self->extra->length; i++) {
997 ElementObject* item = (ElementObject*) self->extra->children[i];
Mark Dickinson211c6252009-02-01 10:28:51 +0000998 if (Element_CheckExact(item) && (PyObject_RichCompareBool(item->tag, tag, Py_EQ) == 1)) {
999
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001000 PyObject* text = element_get_text(item);
1001 if (text == Py_None)
Christian Heimes72b710a2008-05-26 13:28:38 +00001002 return PyBytes_FromString("");
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001003 Py_XINCREF(text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001004 return text;
1005 }
1006 }
1007
1008 Py_INCREF(default_value);
1009 return default_value;
1010}
1011
1012static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001013element_findall(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001014{
1015 int i;
1016 PyObject* out;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001017 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001018 PyObject* namespaces = Py_None;
Eli Bendersky737b1732012-05-29 06:02:56 +03001019 static char *kwlist[] = {"path", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001020
Eli Bendersky737b1732012-05-29 06:02:56 +03001021 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:findall", kwlist,
1022 &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001023 return NULL;
1024
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001025 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001026 _Py_IDENTIFIER(findall);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001027 return _PyObject_CallMethodId(
1028 elementpath_obj, &PyId_findall, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001029 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001030 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001031
1032 out = PyList_New(0);
1033 if (!out)
1034 return NULL;
1035
1036 if (!self->extra)
1037 return out;
1038
1039 for (i = 0; i < self->extra->length; i++) {
1040 PyObject* item = self->extra->children[i];
1041 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +00001042 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001043 if (PyList_Append(out, item) < 0) {
1044 Py_DECREF(out);
1045 return NULL;
1046 }
1047 }
1048 }
1049
1050 return out;
1051}
1052
1053static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001054element_iterfind(ElementObject *self, PyObject *args, PyObject *kwds)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001055{
1056 PyObject* tag;
1057 PyObject* namespaces = Py_None;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001058 _Py_IDENTIFIER(iterfind);
Eli Bendersky737b1732012-05-29 06:02:56 +03001059 static char *kwlist[] = {"path", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001060
Eli Bendersky737b1732012-05-29 06:02:56 +03001061 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:iterfind", kwlist,
1062 &tag, &namespaces))
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001063 return NULL;
1064
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001065 return _PyObject_CallMethodId(
1066 elementpath_obj, &PyId_iterfind, "OOO", self, tag, namespaces
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001067 );
1068}
1069
1070static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001071element_get(ElementObject* self, PyObject* args)
1072{
1073 PyObject* value;
1074
1075 PyObject* key;
1076 PyObject* default_value = Py_None;
1077 if (!PyArg_ParseTuple(args, "O|O:get", &key, &default_value))
1078 return NULL;
1079
1080 if (!self->extra || self->extra->attrib == Py_None)
1081 value = default_value;
1082 else {
1083 value = PyDict_GetItem(self->extra->attrib, key);
1084 if (!value)
1085 value = default_value;
1086 }
1087
1088 Py_INCREF(value);
1089 return value;
1090}
1091
1092static PyObject*
1093element_getchildren(ElementObject* self, PyObject* args)
1094{
1095 int i;
1096 PyObject* list;
1097
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001098 /* FIXME: report as deprecated? */
1099
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001100 if (!PyArg_ParseTuple(args, ":getchildren"))
1101 return NULL;
1102
1103 if (!self->extra)
1104 return PyList_New(0);
1105
1106 list = PyList_New(self->extra->length);
1107 if (!list)
1108 return NULL;
1109
1110 for (i = 0; i < self->extra->length; i++) {
1111 PyObject* item = self->extra->children[i];
1112 Py_INCREF(item);
1113 PyList_SET_ITEM(list, i, item);
1114 }
1115
1116 return list;
1117}
1118
1119static PyObject*
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001120element_iter(ElementObject* self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001121{
1122 PyObject* result;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001123
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001124 PyObject* tag = Py_None;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001125 if (!PyArg_ParseTuple(args, "|O:iter", &tag))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001126 return NULL;
1127
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001128 if (!elementtree_iter_obj) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001129 PyErr_SetString(
1130 PyExc_RuntimeError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001131 "iter helper not found"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001132 );
1133 return NULL;
1134 }
1135
1136 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001137 if (!args)
1138 return NULL;
Neal Norwitz02876df2006-02-07 06:58:52 +00001139
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001140 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
1141 Py_INCREF(tag); PyTuple_SET_ITEM(args, 1, (PyObject*) tag);
1142
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001143 result = PyObject_CallObject(elementtree_iter_obj, args);
1144
1145 Py_DECREF(args);
1146
1147 return result;
1148}
1149
1150
1151static PyObject*
1152element_itertext(ElementObject* self, PyObject* args)
1153{
1154 PyObject* result;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001155
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001156 if (!PyArg_ParseTuple(args, ":itertext"))
1157 return NULL;
1158
1159 if (!elementtree_itertext_obj) {
1160 PyErr_SetString(
1161 PyExc_RuntimeError,
1162 "itertext helper not found"
1163 );
1164 return NULL;
1165 }
1166
1167 args = PyTuple_New(1);
1168 if (!args)
1169 return NULL;
1170
1171 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
1172
1173 result = PyObject_CallObject(elementtree_itertext_obj, args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001174
1175 Py_DECREF(args);
1176
1177 return result;
1178}
1179
1180static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001181element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001182{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001183 ElementObject* self = (ElementObject*) self_;
1184
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001185 if (!self->extra || index < 0 || index >= self->extra->length) {
1186 PyErr_SetString(
1187 PyExc_IndexError,
1188 "child index out of range"
1189 );
1190 return NULL;
1191 }
1192
1193 Py_INCREF(self->extra->children[index]);
1194 return self->extra->children[index];
1195}
1196
1197static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001198element_insert(ElementObject* self, PyObject* args)
1199{
1200 int i;
1201
1202 int index;
1203 PyObject* element;
1204 if (!PyArg_ParseTuple(args, "iO!:insert", &index,
1205 &Element_Type, &element))
1206 return NULL;
1207
1208 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001209 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001210
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001211 if (index < 0) {
1212 index += self->extra->length;
1213 if (index < 0)
1214 index = 0;
1215 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001216 if (index > self->extra->length)
1217 index = self->extra->length;
1218
1219 if (element_resize(self, 1) < 0)
1220 return NULL;
1221
1222 for (i = self->extra->length; i > index; i--)
1223 self->extra->children[i] = self->extra->children[i-1];
1224
1225 Py_INCREF(element);
1226 self->extra->children[index] = element;
1227
1228 self->extra->length++;
1229
1230 Py_RETURN_NONE;
1231}
1232
1233static PyObject*
1234element_items(ElementObject* self, PyObject* args)
1235{
1236 if (!PyArg_ParseTuple(args, ":items"))
1237 return NULL;
1238
1239 if (!self->extra || self->extra->attrib == Py_None)
1240 return PyList_New(0);
1241
1242 return PyDict_Items(self->extra->attrib);
1243}
1244
1245static PyObject*
1246element_keys(ElementObject* self, PyObject* args)
1247{
1248 if (!PyArg_ParseTuple(args, ":keys"))
1249 return NULL;
1250
1251 if (!self->extra || self->extra->attrib == Py_None)
1252 return PyList_New(0);
1253
1254 return PyDict_Keys(self->extra->attrib);
1255}
1256
Martin v. Löwis18e16552006-02-15 17:27:45 +00001257static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001258element_length(ElementObject* self)
1259{
1260 if (!self->extra)
1261 return 0;
1262
1263 return self->extra->length;
1264}
1265
1266static PyObject*
1267element_makeelement(PyObject* self, PyObject* args, PyObject* kw)
1268{
1269 PyObject* elem;
1270
1271 PyObject* tag;
1272 PyObject* attrib;
1273 if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib))
1274 return NULL;
1275
1276 attrib = PyDict_Copy(attrib);
1277 if (!attrib)
1278 return NULL;
1279
Eli Bendersky092af1f2012-03-04 07:14:03 +02001280 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001281
1282 Py_DECREF(attrib);
1283
1284 return elem;
1285}
1286
1287static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001288element_remove(ElementObject* self, PyObject* args)
1289{
1290 int i;
1291
1292 PyObject* element;
1293 if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element))
1294 return NULL;
1295
1296 if (!self->extra) {
1297 /* element has no children, so raise exception */
1298 PyErr_SetString(
1299 PyExc_ValueError,
1300 "list.remove(x): x not in list"
1301 );
1302 return NULL;
1303 }
1304
1305 for (i = 0; i < self->extra->length; i++) {
1306 if (self->extra->children[i] == element)
1307 break;
Mark Dickinson211c6252009-02-01 10:28:51 +00001308 if (PyObject_RichCompareBool(self->extra->children[i], element, Py_EQ) == 1)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001309 break;
1310 }
1311
1312 if (i == self->extra->length) {
1313 /* element is not in children, so raise exception */
1314 PyErr_SetString(
1315 PyExc_ValueError,
1316 "list.remove(x): x not in list"
1317 );
1318 return NULL;
1319 }
1320
1321 Py_DECREF(self->extra->children[i]);
1322
1323 self->extra->length--;
1324
1325 for (; i < self->extra->length; i++)
1326 self->extra->children[i] = self->extra->children[i+1];
1327
1328 Py_RETURN_NONE;
1329}
1330
1331static PyObject*
1332element_repr(ElementObject* self)
1333{
Eli Bendersky092af1f2012-03-04 07:14:03 +02001334 if (self->tag)
1335 return PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1336 else
1337 return PyUnicode_FromFormat("<Element at %p>", self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001338}
1339
1340static PyObject*
1341element_set(ElementObject* self, PyObject* args)
1342{
1343 PyObject* attrib;
1344
1345 PyObject* key;
1346 PyObject* value;
1347 if (!PyArg_ParseTuple(args, "OO:set", &key, &value))
1348 return NULL;
1349
1350 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001351 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001352
1353 attrib = element_get_attrib(self);
1354 if (!attrib)
1355 return NULL;
1356
1357 if (PyDict_SetItem(attrib, key, value) < 0)
1358 return NULL;
1359
1360 Py_RETURN_NONE;
1361}
1362
1363static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001364element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001365{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001366 ElementObject* self = (ElementObject*) self_;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001367 int i;
1368 PyObject* old;
1369
1370 if (!self->extra || index < 0 || index >= self->extra->length) {
1371 PyErr_SetString(
1372 PyExc_IndexError,
1373 "child assignment index out of range");
1374 return -1;
1375 }
1376
1377 old = self->extra->children[index];
1378
1379 if (item) {
1380 Py_INCREF(item);
1381 self->extra->children[index] = item;
1382 } else {
1383 self->extra->length--;
1384 for (i = index; i < self->extra->length; i++)
1385 self->extra->children[i] = self->extra->children[i+1];
1386 }
1387
1388 Py_DECREF(old);
1389
1390 return 0;
1391}
1392
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001393static PyObject*
1394element_subscr(PyObject* self_, PyObject* item)
1395{
1396 ElementObject* self = (ElementObject*) self_;
1397
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001398 if (PyIndex_Check(item)) {
1399 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001400
1401 if (i == -1 && PyErr_Occurred()) {
1402 return NULL;
1403 }
1404 if (i < 0 && self->extra)
1405 i += self->extra->length;
1406 return element_getitem(self_, i);
1407 }
1408 else if (PySlice_Check(item)) {
1409 Py_ssize_t start, stop, step, slicelen, cur, i;
1410 PyObject* list;
1411
1412 if (!self->extra)
1413 return PyList_New(0);
1414
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001415 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001416 self->extra->length,
1417 &start, &stop, &step, &slicelen) < 0) {
1418 return NULL;
1419 }
1420
1421 if (slicelen <= 0)
1422 return PyList_New(0);
1423 else {
1424 list = PyList_New(slicelen);
1425 if (!list)
1426 return NULL;
1427
1428 for (cur = start, i = 0; i < slicelen;
1429 cur += step, i++) {
1430 PyObject* item = self->extra->children[cur];
1431 Py_INCREF(item);
1432 PyList_SET_ITEM(list, i, item);
1433 }
1434
1435 return list;
1436 }
1437 }
1438 else {
1439 PyErr_SetString(PyExc_TypeError,
1440 "element indices must be integers");
1441 return NULL;
1442 }
1443}
1444
1445static int
1446element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1447{
1448 ElementObject* self = (ElementObject*) self_;
1449
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001450 if (PyIndex_Check(item)) {
1451 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001452
1453 if (i == -1 && PyErr_Occurred()) {
1454 return -1;
1455 }
1456 if (i < 0 && self->extra)
1457 i += self->extra->length;
1458 return element_setitem(self_, i, value);
1459 }
1460 else if (PySlice_Check(item)) {
1461 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1462
1463 PyObject* recycle = NULL;
1464 PyObject* seq = NULL;
1465
1466 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001467 create_extra(self, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001468
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001469 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001470 self->extra->length,
1471 &start, &stop, &step, &slicelen) < 0) {
1472 return -1;
1473 }
1474
Eli Bendersky865756a2012-03-09 13:38:15 +02001475 if (value == NULL) {
1476 /* Delete slice */
1477 size_t cur;
1478 Py_ssize_t i;
1479
1480 if (slicelen <= 0)
1481 return 0;
1482
1483 /* Since we're deleting, the direction of the range doesn't matter,
1484 * so for simplicity make it always ascending.
1485 */
1486 if (step < 0) {
1487 stop = start + 1;
1488 start = stop + step * (slicelen - 1) - 1;
1489 step = -step;
1490 }
1491
1492 assert((size_t)slicelen <= PY_SIZE_MAX / sizeof(PyObject *));
1493
1494 /* recycle is a list that will contain all the children
1495 * scheduled for removal.
1496 */
1497 if (!(recycle = PyList_New(slicelen))) {
1498 PyErr_NoMemory();
1499 return -1;
1500 }
1501
1502 /* This loop walks over all the children that have to be deleted,
1503 * with cur pointing at them. num_moved is the amount of children
1504 * until the next deleted child that have to be "shifted down" to
1505 * occupy the deleted's places.
1506 * Note that in the ith iteration, shifting is done i+i places down
1507 * because i children were already removed.
1508 */
1509 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1510 /* Compute how many children have to be moved, clipping at the
1511 * list end.
1512 */
1513 Py_ssize_t num_moved = step - 1;
1514 if (cur + step >= (size_t)self->extra->length) {
1515 num_moved = self->extra->length - cur - 1;
1516 }
1517
1518 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1519
1520 memmove(
1521 self->extra->children + cur - i,
1522 self->extra->children + cur + 1,
1523 num_moved * sizeof(PyObject *));
1524 }
1525
1526 /* Leftover "tail" after the last removed child */
1527 cur = start + (size_t)slicelen * step;
1528 if (cur < (size_t)self->extra->length) {
1529 memmove(
1530 self->extra->children + cur - slicelen,
1531 self->extra->children + cur,
1532 (self->extra->length - cur) * sizeof(PyObject *));
1533 }
1534
1535 self->extra->length -= slicelen;
1536
1537 /* Discard the recycle list with all the deleted sub-elements */
1538 Py_XDECREF(recycle);
1539 return 0;
1540 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001541 else {
Eli Bendersky865756a2012-03-09 13:38:15 +02001542 /* A new slice is actually being assigned */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001543 seq = PySequence_Fast(value, "");
1544 if (!seq) {
1545 PyErr_Format(
1546 PyExc_TypeError,
1547 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1548 );
1549 return -1;
1550 }
1551 newlen = PySequence_Size(seq);
1552 }
1553
1554 if (step != 1 && newlen != slicelen)
1555 {
1556 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001557 "attempt to assign sequence of size %zd "
1558 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001559 newlen, slicelen
1560 );
1561 return -1;
1562 }
1563
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001564 /* Resize before creating the recycle bin, to prevent refleaks. */
1565 if (newlen > slicelen) {
1566 if (element_resize(self, newlen - slicelen) < 0) {
1567 if (seq) {
1568 Py_DECREF(seq);
1569 }
1570 return -1;
1571 }
1572 }
1573
1574 if (slicelen > 0) {
1575 /* to avoid recursive calls to this method (via decref), move
1576 old items to the recycle bin here, and get rid of them when
1577 we're done modifying the element */
1578 recycle = PyList_New(slicelen);
1579 if (!recycle) {
1580 if (seq) {
1581 Py_DECREF(seq);
1582 }
1583 return -1;
1584 }
1585 for (cur = start, i = 0; i < slicelen;
1586 cur += step, i++)
1587 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1588 }
1589
1590 if (newlen < slicelen) {
1591 /* delete slice */
1592 for (i = stop; i < self->extra->length; i++)
1593 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1594 } else if (newlen > slicelen) {
1595 /* insert slice */
1596 for (i = self->extra->length-1; i >= stop; i--)
1597 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1598 }
1599
1600 /* replace the slice */
1601 for (cur = start, i = 0; i < newlen;
1602 cur += step, i++) {
1603 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1604 Py_INCREF(element);
1605 self->extra->children[cur] = element;
1606 }
1607
1608 self->extra->length += newlen - slicelen;
1609
1610 if (seq) {
1611 Py_DECREF(seq);
1612 }
1613
1614 /* discard the recycle bin, and everything in it */
1615 Py_XDECREF(recycle);
1616
1617 return 0;
1618 }
1619 else {
1620 PyErr_SetString(PyExc_TypeError,
1621 "element indices must be integers");
1622 return -1;
1623 }
1624}
1625
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001626static PyMethodDef element_methods[] = {
1627
Eli Bendersky0192ba32012-03-30 16:38:33 +03001628 {"clear", (PyCFunction) element_clearmethod, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001629
1630 {"get", (PyCFunction) element_get, METH_VARARGS},
1631 {"set", (PyCFunction) element_set, METH_VARARGS},
1632
Eli Bendersky737b1732012-05-29 06:02:56 +03001633 {"find", (PyCFunction) element_find, METH_VARARGS | METH_KEYWORDS},
1634 {"findtext", (PyCFunction) element_findtext, METH_VARARGS | METH_KEYWORDS},
1635 {"findall", (PyCFunction) element_findall, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001636
1637 {"append", (PyCFunction) element_append, METH_VARARGS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001638 {"extend", (PyCFunction) element_extend, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001639 {"insert", (PyCFunction) element_insert, METH_VARARGS},
1640 {"remove", (PyCFunction) element_remove, METH_VARARGS},
1641
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001642 {"iter", (PyCFunction) element_iter, METH_VARARGS},
1643 {"itertext", (PyCFunction) element_itertext, METH_VARARGS},
Eli Bendersky737b1732012-05-29 06:02:56 +03001644 {"iterfind", (PyCFunction) element_iterfind, METH_VARARGS | METH_KEYWORDS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001645
1646 {"getiterator", (PyCFunction) element_iter, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001647 {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS},
1648
1649 {"items", (PyCFunction) element_items, METH_VARARGS},
1650 {"keys", (PyCFunction) element_keys, METH_VARARGS},
1651
1652 {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS},
1653
1654 {"__copy__", (PyCFunction) element_copy, METH_VARARGS},
1655 {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS},
1656
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001657 {NULL, NULL}
1658};
1659
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001660static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001661element_getattro(ElementObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001662{
1663 PyObject* res;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001664 char *name = "";
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001665
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001666 if (PyUnicode_Check(nameobj))
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001667 name = _PyUnicode_AsString(nameobj);
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001668
Alexander Belopolskye239d232010-12-08 23:31:48 +00001669 if (name == NULL)
1670 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001671
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001672 /* handle common attributes first */
1673 if (strcmp(name, "tag") == 0) {
1674 res = self->tag;
1675 Py_INCREF(res);
1676 return res;
1677 } else if (strcmp(name, "text") == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001678 res = element_get_text(self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001679 Py_INCREF(res);
1680 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001681 }
1682
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001683 /* methods */
1684 res = PyObject_GenericGetAttr((PyObject*) self, nameobj);
1685 if (res)
1686 return res;
1687
1688 /* less common attributes */
1689 if (strcmp(name, "tail") == 0) {
1690 PyErr_Clear();
1691 res = element_get_tail(self);
1692 } else if (strcmp(name, "attrib") == 0) {
1693 PyErr_Clear();
1694 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001695 create_extra(self, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001696 res = element_get_attrib(self);
1697 }
1698
1699 if (!res)
1700 return NULL;
1701
1702 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001703 return res;
1704}
1705
Eli Benderskyb20df952012-05-20 06:33:29 +03001706static PyObject*
1707element_setattro(ElementObject* self, PyObject* nameobj, PyObject* value)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001708{
Eli Benderskyb20df952012-05-20 06:33:29 +03001709 char *name = "";
1710 if (PyUnicode_Check(nameobj))
1711 name = _PyUnicode_AsString(nameobj);
1712
1713 if (name == NULL)
1714 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001715
1716 if (strcmp(name, "tag") == 0) {
1717 Py_DECREF(self->tag);
1718 self->tag = value;
1719 Py_INCREF(self->tag);
1720 } else if (strcmp(name, "text") == 0) {
1721 Py_DECREF(JOIN_OBJ(self->text));
1722 self->text = value;
1723 Py_INCREF(self->text);
1724 } else if (strcmp(name, "tail") == 0) {
1725 Py_DECREF(JOIN_OBJ(self->tail));
1726 self->tail = value;
1727 Py_INCREF(self->tail);
1728 } else if (strcmp(name, "attrib") == 0) {
1729 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001730 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001731 Py_DECREF(self->extra->attrib);
1732 self->extra->attrib = value;
1733 Py_INCREF(self->extra->attrib);
1734 } else {
1735 PyErr_SetString(PyExc_AttributeError, name);
Eli Benderskyb20df952012-05-20 06:33:29 +03001736 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001737 }
1738
Eli Benderskyb20df952012-05-20 06:33:29 +03001739 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001740}
1741
1742static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001743 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001744 0, /* sq_concat */
1745 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001746 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001747 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001748 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001749 0,
1750};
1751
1752static PyMappingMethods element_as_mapping = {
1753 (lenfunc) element_length,
1754 (binaryfunc) element_subscr,
1755 (objobjargproc) element_ass_subscr,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001756};
1757
Neal Norwitz227b5332006-03-22 09:28:35 +00001758static PyTypeObject Element_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001759 PyVarObject_HEAD_INIT(NULL, 0)
1760 "Element", sizeof(ElementObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001761 /* methods */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001762 (destructor)element_dealloc, /* tp_dealloc */
1763 0, /* tp_print */
1764 0, /* tp_getattr */
Eli Benderskyb20df952012-05-20 06:33:29 +03001765 0, /* tp_setattr */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001766 0, /* tp_reserved */
1767 (reprfunc)element_repr, /* tp_repr */
1768 0, /* tp_as_number */
1769 &element_as_sequence, /* tp_as_sequence */
1770 &element_as_mapping, /* tp_as_mapping */
1771 0, /* tp_hash */
1772 0, /* tp_call */
1773 0, /* tp_str */
1774 (getattrofunc)element_getattro, /* tp_getattro */
Eli Benderskyb20df952012-05-20 06:33:29 +03001775 (setattrofunc)element_setattro, /* tp_setattro */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001776 0, /* tp_as_buffer */
Eli Bendersky0192ba32012-03-30 16:38:33 +03001777 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
1778 /* tp_flags */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001779 0, /* tp_doc */
Eli Bendersky0192ba32012-03-30 16:38:33 +03001780 (traverseproc)element_gc_traverse, /* tp_traverse */
1781 (inquiry)element_gc_clear, /* tp_clear */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001782 0, /* tp_richcompare */
Eli Benderskyebf37a22012-04-03 22:02:37 +03001783 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001784 0, /* tp_iter */
1785 0, /* tp_iternext */
1786 element_methods, /* tp_methods */
1787 0, /* tp_members */
1788 0, /* tp_getset */
1789 0, /* tp_base */
1790 0, /* tp_dict */
1791 0, /* tp_descr_get */
1792 0, /* tp_descr_set */
1793 0, /* tp_dictoffset */
1794 (initproc)element_init, /* tp_init */
1795 PyType_GenericAlloc, /* tp_alloc */
1796 element_new, /* tp_new */
1797 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001798};
1799
1800/* ==================================================================== */
1801/* the tree builder type */
1802
1803typedef struct {
1804 PyObject_HEAD
1805
Eli Bendersky58d548d2012-05-29 15:45:16 +03001806 PyObject *root; /* root node (first created node) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001807
Eli Bendersky58d548d2012-05-29 15:45:16 +03001808 ElementObject *this; /* current node */
1809 ElementObject *last; /* most recently created node */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001810
Eli Bendersky58d548d2012-05-29 15:45:16 +03001811 PyObject *data; /* data collector (string or list), or NULL */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001812
Eli Bendersky58d548d2012-05-29 15:45:16 +03001813 PyObject *stack; /* element stack */
1814 Py_ssize_t index; /* current stack size (0 means empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001815
1816 /* element tracing */
Eli Bendersky58d548d2012-05-29 15:45:16 +03001817 PyObject *events; /* list of events, or NULL if not collecting */
1818 PyObject *start_event_obj; /* event objects (NULL to ignore) */
1819 PyObject *end_event_obj;
1820 PyObject *start_ns_event_obj;
1821 PyObject *end_ns_event_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001822} TreeBuilderObject;
1823
Neal Norwitz227b5332006-03-22 09:28:35 +00001824static PyTypeObject TreeBuilder_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001825
Christian Heimes90aa7642007-12-19 02:45:37 +00001826#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001827
1828/* -------------------------------------------------------------------- */
1829/* constructor and destructor */
1830
Eli Bendersky58d548d2012-05-29 15:45:16 +03001831static PyObject *
1832treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001833{
Eli Bendersky58d548d2012-05-29 15:45:16 +03001834 TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
1835 if (t != NULL) {
1836 t->root = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001837
Eli Bendersky58d548d2012-05-29 15:45:16 +03001838 Py_INCREF(Py_None);
1839 t->this = (ElementObject *)Py_None;
1840 Py_INCREF(Py_None);
1841 t->last = (ElementObject *)Py_None;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001842
Eli Bendersky58d548d2012-05-29 15:45:16 +03001843 t->data = NULL;
1844 t->stack = PyList_New(20);
1845 if (!t->stack) {
1846 Py_DECREF(t->this);
1847 Py_DECREF(t->last);
1848 return NULL;
1849 }
1850 t->index = 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001851
Eli Bendersky58d548d2012-05-29 15:45:16 +03001852 t->events = NULL;
1853 t->start_event_obj = t->end_event_obj = NULL;
1854 t->start_ns_event_obj = t->end_ns_event_obj = NULL;
1855 }
1856 return (PyObject *)t;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001857}
1858
Eli Bendersky58d548d2012-05-29 15:45:16 +03001859static int
1860treebuilder_init(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001861{
Eli Bendersky58d548d2012-05-29 15:45:16 +03001862 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001863}
1864
1865static void
Eli Bendersky58d548d2012-05-29 15:45:16 +03001866treebuilder_dealloc(TreeBuilderObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001867{
1868 Py_XDECREF(self->end_ns_event_obj);
1869 Py_XDECREF(self->start_ns_event_obj);
1870 Py_XDECREF(self->end_event_obj);
1871 Py_XDECREF(self->start_event_obj);
1872 Py_XDECREF(self->events);
1873 Py_DECREF(self->stack);
1874 Py_XDECREF(self->data);
1875 Py_DECREF(self->last);
1876 Py_DECREF(self->this);
1877 Py_XDECREF(self->root);
1878
Eli Bendersky58d548d2012-05-29 15:45:16 +03001879 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001880}
1881
1882/* -------------------------------------------------------------------- */
1883/* handlers */
1884
1885LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001886treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
1887 PyObject* attrib)
1888{
1889 PyObject* node;
1890 PyObject* this;
1891
1892 if (self->data) {
1893 if (self->this == self->last) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001894 Py_DECREF(JOIN_OBJ(self->last->text));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001895 self->last->text = JOIN_SET(
1896 self->data, PyList_CheckExact(self->data)
1897 );
1898 } else {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001899 Py_DECREF(JOIN_OBJ(self->last->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001900 self->last->tail = JOIN_SET(
1901 self->data, PyList_CheckExact(self->data)
1902 );
1903 }
1904 self->data = NULL;
1905 }
1906
Eli Bendersky092af1f2012-03-04 07:14:03 +02001907 node = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001908 if (!node)
1909 return NULL;
1910
1911 this = (PyObject*) self->this;
1912
1913 if (this != Py_None) {
1914 if (element_add_subelement((ElementObject*) this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001915 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001916 } else {
1917 if (self->root) {
1918 PyErr_SetString(
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001919 elementtree_parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001920 "multiple elements on top level"
1921 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001922 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001923 }
1924 Py_INCREF(node);
1925 self->root = node;
1926 }
1927
1928 if (self->index < PyList_GET_SIZE(self->stack)) {
1929 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001930 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001931 Py_INCREF(this);
1932 } else {
1933 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001934 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001935 }
1936 self->index++;
1937
1938 Py_DECREF(this);
1939 Py_INCREF(node);
1940 self->this = (ElementObject*) node;
1941
1942 Py_DECREF(self->last);
1943 Py_INCREF(node);
1944 self->last = (ElementObject*) node;
1945
1946 if (self->start_event_obj) {
1947 PyObject* res;
1948 PyObject* action = self->start_event_obj;
1949 res = PyTuple_New(2);
1950 if (res) {
1951 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
1952 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
1953 PyList_Append(self->events, res);
1954 Py_DECREF(res);
1955 } else
1956 PyErr_Clear(); /* FIXME: propagate error */
1957 }
1958
1959 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001960
1961 error:
1962 Py_DECREF(node);
1963 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001964}
1965
1966LOCAL(PyObject*)
1967treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
1968{
1969 if (!self->data) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001970 if (self->last == (ElementObject*) Py_None) {
1971 /* ignore calls to data before the first call to start */
1972 Py_RETURN_NONE;
1973 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001974 /* store the first item as is */
1975 Py_INCREF(data); self->data = data;
1976 } else {
1977 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00001978 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
1979 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001980 /* expat often generates single character data sections; handle
1981 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00001982 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
1983 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001984 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00001985 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001986 } else if (PyList_CheckExact(self->data)) {
1987 if (PyList_Append(self->data, data) < 0)
1988 return NULL;
1989 } else {
1990 PyObject* list = PyList_New(2);
1991 if (!list)
1992 return NULL;
1993 PyList_SET_ITEM(list, 0, self->data);
1994 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
1995 self->data = list;
1996 }
1997 }
1998
1999 Py_RETURN_NONE;
2000}
2001
2002LOCAL(PyObject*)
2003treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2004{
2005 PyObject* item;
2006
2007 if (self->data) {
2008 if (self->this == self->last) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002009 Py_DECREF(JOIN_OBJ(self->last->text));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002010 self->last->text = JOIN_SET(
2011 self->data, PyList_CheckExact(self->data)
2012 );
2013 } else {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002014 Py_DECREF(JOIN_OBJ(self->last->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002015 self->last->tail = JOIN_SET(
2016 self->data, PyList_CheckExact(self->data)
2017 );
2018 }
2019 self->data = NULL;
2020 }
2021
2022 if (self->index == 0) {
2023 PyErr_SetString(
2024 PyExc_IndexError,
2025 "pop from empty stack"
2026 );
2027 return NULL;
2028 }
2029
2030 self->index--;
2031
2032 item = PyList_GET_ITEM(self->stack, self->index);
2033 Py_INCREF(item);
2034
2035 Py_DECREF(self->last);
2036
2037 self->last = (ElementObject*) self->this;
2038 self->this = (ElementObject*) item;
2039
2040 if (self->end_event_obj) {
2041 PyObject* res;
2042 PyObject* action = self->end_event_obj;
2043 PyObject* node = (PyObject*) self->last;
2044 res = PyTuple_New(2);
2045 if (res) {
2046 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
2047 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
2048 PyList_Append(self->events, res);
2049 Py_DECREF(res);
2050 } else
2051 PyErr_Clear(); /* FIXME: propagate error */
2052 }
2053
2054 Py_INCREF(self->last);
2055 return (PyObject*) self->last;
2056}
2057
2058LOCAL(void)
2059treebuilder_handle_namespace(TreeBuilderObject* self, int start,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002060 PyObject *prefix, PyObject *uri)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002061{
2062 PyObject* res;
2063 PyObject* action;
2064 PyObject* parcel;
2065
2066 if (!self->events)
2067 return;
2068
2069 if (start) {
2070 if (!self->start_ns_event_obj)
2071 return;
2072 action = self->start_ns_event_obj;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002073 parcel = Py_BuildValue("OO", prefix, uri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002074 if (!parcel)
2075 return;
2076 Py_INCREF(action);
2077 } else {
2078 if (!self->end_ns_event_obj)
2079 return;
2080 action = self->end_ns_event_obj;
2081 Py_INCREF(action);
2082 parcel = Py_None;
2083 Py_INCREF(parcel);
2084 }
2085
2086 res = PyTuple_New(2);
2087
2088 if (res) {
2089 PyTuple_SET_ITEM(res, 0, action);
2090 PyTuple_SET_ITEM(res, 1, parcel);
2091 PyList_Append(self->events, res);
2092 Py_DECREF(res);
2093 } else
2094 PyErr_Clear(); /* FIXME: propagate error */
2095}
2096
2097/* -------------------------------------------------------------------- */
2098/* methods (in alphabetical order) */
2099
2100static PyObject*
2101treebuilder_data(TreeBuilderObject* self, PyObject* args)
2102{
2103 PyObject* data;
2104 if (!PyArg_ParseTuple(args, "O:data", &data))
2105 return NULL;
2106
2107 return treebuilder_handle_data(self, data);
2108}
2109
2110static PyObject*
2111treebuilder_end(TreeBuilderObject* self, PyObject* args)
2112{
2113 PyObject* tag;
2114 if (!PyArg_ParseTuple(args, "O:end", &tag))
2115 return NULL;
2116
2117 return treebuilder_handle_end(self, tag);
2118}
2119
2120LOCAL(PyObject*)
2121treebuilder_done(TreeBuilderObject* self)
2122{
2123 PyObject* res;
2124
2125 /* FIXME: check stack size? */
2126
2127 if (self->root)
2128 res = self->root;
2129 else
2130 res = Py_None;
2131
2132 Py_INCREF(res);
2133 return res;
2134}
2135
2136static PyObject*
2137treebuilder_close(TreeBuilderObject* self, PyObject* args)
2138{
2139 if (!PyArg_ParseTuple(args, ":close"))
2140 return NULL;
2141
2142 return treebuilder_done(self);
2143}
2144
2145static PyObject*
2146treebuilder_start(TreeBuilderObject* self, PyObject* args)
2147{
2148 PyObject* tag;
2149 PyObject* attrib = Py_None;
2150 if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib))
2151 return NULL;
2152
2153 return treebuilder_handle_start(self, tag, attrib);
2154}
2155
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002156static PyMethodDef treebuilder_methods[] = {
2157 {"data", (PyCFunction) treebuilder_data, METH_VARARGS},
2158 {"start", (PyCFunction) treebuilder_start, METH_VARARGS},
2159 {"end", (PyCFunction) treebuilder_end, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002160 {"close", (PyCFunction) treebuilder_close, METH_VARARGS},
2161 {NULL, NULL}
2162};
2163
Neal Norwitz227b5332006-03-22 09:28:35 +00002164static PyTypeObject TreeBuilder_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002165 PyVarObject_HEAD_INIT(NULL, 0)
2166 "TreeBuilder", sizeof(TreeBuilderObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002167 /* methods */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002168 (destructor)treebuilder_dealloc, /* tp_dealloc */
2169 0, /* tp_print */
2170 0, /* tp_getattr */
2171 0, /* tp_setattr */
2172 0, /* tp_reserved */
2173 0, /* tp_repr */
2174 0, /* tp_as_number */
2175 0, /* tp_as_sequence */
2176 0, /* tp_as_mapping */
2177 0, /* tp_hash */
2178 0, /* tp_call */
2179 0, /* tp_str */
2180 0, /* tp_getattro */
2181 0, /* tp_setattro */
2182 0, /* tp_as_buffer */
2183 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
2184 0, /* tp_doc */
2185 0, /* tp_traverse */
2186 0, /* tp_clear */
2187 0, /* tp_richcompare */
2188 0, /* tp_weaklistoffset */
2189 0, /* tp_iter */
2190 0, /* tp_iternext */
2191 treebuilder_methods, /* tp_methods */
2192 0, /* tp_members */
2193 0, /* tp_getset */
2194 0, /* tp_base */
2195 0, /* tp_dict */
2196 0, /* tp_descr_get */
2197 0, /* tp_descr_set */
2198 0, /* tp_dictoffset */
2199 (initproc)treebuilder_init, /* tp_init */
2200 PyType_GenericAlloc, /* tp_alloc */
2201 treebuilder_new, /* tp_new */
2202 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002203};
2204
2205/* ==================================================================== */
2206/* the expat interface */
2207
2208#if defined(USE_EXPAT)
2209
2210#include "expat.h"
2211
2212#if defined(USE_PYEXPAT_CAPI)
2213#include "pyexpat.h"
2214static struct PyExpat_CAPI* expat_capi;
2215#define EXPAT(func) (expat_capi->func)
2216#else
2217#define EXPAT(func) (XML_##func)
2218#endif
2219
2220typedef struct {
2221 PyObject_HEAD
2222
2223 XML_Parser parser;
2224
2225 PyObject* target;
2226 PyObject* entity;
2227
2228 PyObject* names;
2229
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002230 PyObject* handle_start;
2231 PyObject* handle_data;
2232 PyObject* handle_end;
2233
2234 PyObject* handle_comment;
2235 PyObject* handle_pi;
2236
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002237 PyObject* handle_close;
2238
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002239} XMLParserObject;
2240
Neal Norwitz227b5332006-03-22 09:28:35 +00002241static PyTypeObject XMLParser_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002242
2243/* helpers */
2244
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002245LOCAL(PyObject*)
2246makeuniversal(XMLParserObject* self, const char* string)
2247{
2248 /* convert a UTF-8 tag/attribute name from the expat parser
2249 to a universal name string */
2250
2251 int size = strlen(string);
2252 PyObject* key;
2253 PyObject* value;
2254
2255 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00002256 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002257 if (!key)
2258 return NULL;
2259
2260 value = PyDict_GetItem(self->names, key);
2261
2262 if (value) {
2263 Py_INCREF(value);
2264 } else {
2265 /* new name. convert to universal name, and decode as
2266 necessary */
2267
2268 PyObject* tag;
2269 char* p;
2270 int i;
2271
2272 /* look for namespace separator */
2273 for (i = 0; i < size; i++)
2274 if (string[i] == '}')
2275 break;
2276 if (i != size) {
2277 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002278 tag = PyBytes_FromStringAndSize(NULL, size+1);
2279 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002280 p[0] = '{';
2281 memcpy(p+1, string, size);
2282 size++;
2283 } else {
2284 /* plain name; use key as tag */
2285 Py_INCREF(key);
2286 tag = key;
2287 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002288
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002289 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002290 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00002291 value = PyUnicode_DecodeUTF8(p, size, "strict");
2292 Py_DECREF(tag);
2293 if (!value) {
2294 Py_DECREF(key);
2295 return NULL;
2296 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002297
2298 /* add to names dictionary */
2299 if (PyDict_SetItem(self->names, key, value) < 0) {
2300 Py_DECREF(key);
2301 Py_DECREF(value);
2302 return NULL;
2303 }
2304 }
2305
2306 Py_DECREF(key);
2307 return value;
2308}
2309
Eli Bendersky5b77d812012-03-16 08:20:05 +02002310/* Set the ParseError exception with the given parameters.
2311 * If message is not NULL, it's used as the error string. Otherwise, the
2312 * message string is the default for the given error_code.
2313*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002314static void
Eli Bendersky5b77d812012-03-16 08:20:05 +02002315expat_set_error(enum XML_Error error_code, int line, int column, char *message)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002316{
Eli Bendersky5b77d812012-03-16 08:20:05 +02002317 PyObject *errmsg, *error, *position, *code;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002318
Victor Stinner499dfcf2011-03-21 13:26:24 +01002319 errmsg = PyUnicode_FromFormat("%s: line %d, column %d",
Eli Bendersky5b77d812012-03-16 08:20:05 +02002320 message ? message : EXPAT(ErrorString)(error_code),
2321 line, column);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002322 if (errmsg == NULL)
2323 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002324
Victor Stinner499dfcf2011-03-21 13:26:24 +01002325 error = PyObject_CallFunction(elementtree_parseerror_obj, "O", errmsg);
2326 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002327 if (!error)
2328 return;
2329
Eli Bendersky5b77d812012-03-16 08:20:05 +02002330 /* Add code and position attributes */
2331 code = PyLong_FromLong((long)error_code);
2332 if (!code) {
2333 Py_DECREF(error);
2334 return;
2335 }
2336 if (PyObject_SetAttrString(error, "code", code) == -1) {
2337 Py_DECREF(error);
2338 Py_DECREF(code);
2339 return;
2340 }
2341 Py_DECREF(code);
2342
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002343 position = Py_BuildValue("(ii)", line, column);
2344 if (!position) {
2345 Py_DECREF(error);
2346 return;
2347 }
2348 if (PyObject_SetAttrString(error, "position", position) == -1) {
2349 Py_DECREF(error);
2350 Py_DECREF(position);
2351 return;
2352 }
2353 Py_DECREF(position);
2354
2355 PyErr_SetObject(elementtree_parseerror_obj, error);
2356 Py_DECREF(error);
2357}
2358
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002359/* -------------------------------------------------------------------- */
2360/* handlers */
2361
2362static void
2363expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2364 int data_len)
2365{
2366 PyObject* key;
2367 PyObject* value;
2368 PyObject* res;
2369
2370 if (data_len < 2 || data_in[0] != '&')
2371 return;
2372
Neal Norwitz0269b912007-08-08 06:56:02 +00002373 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002374 if (!key)
2375 return;
2376
2377 value = PyDict_GetItem(self->entity, key);
2378
2379 if (value) {
2380 if (TreeBuilder_CheckExact(self->target))
2381 res = treebuilder_handle_data(
2382 (TreeBuilderObject*) self->target, value
2383 );
2384 else if (self->handle_data)
2385 res = PyObject_CallFunction(self->handle_data, "O", value);
2386 else
2387 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002388 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002389 } else if (!PyErr_Occurred()) {
2390 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00002391 char message[128] = "undefined entity ";
2392 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002393 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02002394 XML_ERROR_UNDEFINED_ENTITY,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002395 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02002396 EXPAT(GetErrorColumnNumber)(self->parser),
2397 message
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002398 );
2399 }
2400
2401 Py_DECREF(key);
2402}
2403
2404static void
2405expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2406 const XML_Char **attrib_in)
2407{
2408 PyObject* res;
2409 PyObject* tag;
2410 PyObject* attrib;
2411 int ok;
2412
2413 /* tag name */
2414 tag = makeuniversal(self, tag_in);
2415 if (!tag)
2416 return; /* parser will look for errors */
2417
2418 /* attributes */
2419 if (attrib_in[0]) {
2420 attrib = PyDict_New();
2421 if (!attrib)
2422 return;
2423 while (attrib_in[0] && attrib_in[1]) {
2424 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00002425 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002426 if (!key || !value) {
2427 Py_XDECREF(value);
2428 Py_XDECREF(key);
2429 Py_DECREF(attrib);
2430 return;
2431 }
2432 ok = PyDict_SetItem(attrib, key, value);
2433 Py_DECREF(value);
2434 Py_DECREF(key);
2435 if (ok < 0) {
2436 Py_DECREF(attrib);
2437 return;
2438 }
2439 attrib_in += 2;
2440 }
2441 } else {
2442 Py_INCREF(Py_None);
2443 attrib = Py_None;
2444 }
2445
2446 if (TreeBuilder_CheckExact(self->target))
2447 /* shortcut */
2448 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2449 tag, attrib);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002450 else if (self->handle_start) {
2451 if (attrib == Py_None) {
2452 Py_DECREF(attrib);
2453 attrib = PyDict_New();
2454 if (!attrib)
2455 return;
2456 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002457 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002458 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002459 res = NULL;
2460
2461 Py_DECREF(tag);
2462 Py_DECREF(attrib);
2463
2464 Py_XDECREF(res);
2465}
2466
2467static void
2468expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2469 int data_len)
2470{
2471 PyObject* data;
2472 PyObject* res;
2473
Neal Norwitz0269b912007-08-08 06:56:02 +00002474 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002475 if (!data)
2476 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002477
2478 if (TreeBuilder_CheckExact(self->target))
2479 /* shortcut */
2480 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
2481 else if (self->handle_data)
2482 res = PyObject_CallFunction(self->handle_data, "O", data);
2483 else
2484 res = NULL;
2485
2486 Py_DECREF(data);
2487
2488 Py_XDECREF(res);
2489}
2490
2491static void
2492expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
2493{
2494 PyObject* tag;
2495 PyObject* res = NULL;
2496
2497 if (TreeBuilder_CheckExact(self->target))
2498 /* shortcut */
2499 /* the standard tree builder doesn't look at the end tag */
2500 res = treebuilder_handle_end(
2501 (TreeBuilderObject*) self->target, Py_None
2502 );
2503 else if (self->handle_end) {
2504 tag = makeuniversal(self, tag_in);
2505 if (tag) {
2506 res = PyObject_CallFunction(self->handle_end, "O", tag);
2507 Py_DECREF(tag);
2508 }
2509 }
2510
2511 Py_XDECREF(res);
2512}
2513
2514static void
2515expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
2516 const XML_Char *uri)
2517{
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002518 PyObject* sprefix = NULL;
2519 PyObject* suri = NULL;
2520
2521 suri = PyUnicode_DecodeUTF8(uri, strlen(uri), "strict");
2522 if (!suri)
2523 return;
2524
2525 if (prefix)
2526 sprefix = PyUnicode_DecodeUTF8(prefix, strlen(prefix), "strict");
2527 else
2528 sprefix = PyUnicode_FromString("");
2529 if (!sprefix) {
2530 Py_DECREF(suri);
2531 return;
2532 }
2533
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002534 treebuilder_handle_namespace(
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002535 (TreeBuilderObject*) self->target, 1, sprefix, suri
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002536 );
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002537
2538 Py_DECREF(sprefix);
2539 Py_DECREF(suri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002540}
2541
2542static void
2543expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
2544{
2545 treebuilder_handle_namespace(
2546 (TreeBuilderObject*) self->target, 0, NULL, NULL
2547 );
2548}
2549
2550static void
2551expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
2552{
2553 PyObject* comment;
2554 PyObject* res;
2555
2556 if (self->handle_comment) {
Neal Norwitz0269b912007-08-08 06:56:02 +00002557 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002558 if (comment) {
2559 res = PyObject_CallFunction(self->handle_comment, "O", comment);
2560 Py_XDECREF(res);
2561 Py_DECREF(comment);
2562 }
2563 }
2564}
2565
2566static void
2567expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
2568 const XML_Char* data_in)
2569{
2570 PyObject* target;
2571 PyObject* data;
2572 PyObject* res;
2573
2574 if (self->handle_pi) {
Neal Norwitz0269b912007-08-08 06:56:02 +00002575 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
2576 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002577 if (target && data) {
2578 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
2579 Py_XDECREF(res);
2580 Py_DECREF(data);
2581 Py_DECREF(target);
2582 } else {
2583 Py_XDECREF(data);
2584 Py_XDECREF(target);
2585 }
2586 }
2587}
2588
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002589static int
2590expat_unknown_encoding_handler(XMLParserObject *self, const XML_Char *name,
2591 XML_Encoding *info)
2592{
2593 PyObject* u;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002594 unsigned char s[256];
2595 int i;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002596 void *data;
2597 unsigned int kind;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002598
2599 memset(info, 0, sizeof(XML_Encoding));
2600
2601 for (i = 0; i < 256; i++)
2602 s[i] = i;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002603
Fredrik Lundhc3389992005-12-25 11:40:19 +00002604 u = PyUnicode_Decode((char*) s, 256, name, "replace");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002605 if (!u)
2606 return XML_STATUS_ERROR;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002607 if (PyUnicode_READY(u))
2608 return XML_STATUS_ERROR;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002609
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002610 if (PyUnicode_GET_LENGTH(u) != 256) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002611 Py_DECREF(u);
2612 return XML_STATUS_ERROR;
2613 }
2614
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002615 kind = PyUnicode_KIND(u);
2616 data = PyUnicode_DATA(u);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002617 for (i = 0; i < 256; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002618 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
2619 if (ch != Py_UNICODE_REPLACEMENT_CHARACTER)
2620 info->map[i] = ch;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002621 else
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002622 info->map[i] = -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002623 }
2624
2625 Py_DECREF(u);
2626
2627 return XML_STATUS_OK;
2628}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002629
2630/* -------------------------------------------------------------------- */
2631/* constructor and destructor */
2632
2633static PyObject*
Thomas Wouters73e5a5b2006-06-08 15:35:45 +00002634xmlparser(PyObject* self_, PyObject* args, PyObject* kw)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002635{
2636 XMLParserObject* self;
2637 /* FIXME: does this need to be static? */
2638 static XML_Memory_Handling_Suite memory_handler;
2639
2640 PyObject* target = NULL;
2641 char* encoding = NULL;
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +00002642 static char* kwlist[] = { "target", "encoding", NULL };
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002643 if (!PyArg_ParseTupleAndKeywords(args, kw, "|Oz:XMLParser", kwlist,
2644 &target, &encoding))
2645 return NULL;
2646
2647#if defined(USE_PYEXPAT_CAPI)
2648 if (!expat_capi) {
2649 PyErr_SetString(
2650 PyExc_RuntimeError, "cannot load dispatch table from pyexpat"
2651 );
2652 return NULL;
2653 }
2654#endif
2655
2656 self = PyObject_New(XMLParserObject, &XMLParser_Type);
2657 if (self == NULL)
2658 return NULL;
2659
2660 self->entity = PyDict_New();
2661 if (!self->entity) {
2662 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002663 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002664 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002665
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002666 self->names = PyDict_New();
2667 if (!self->names) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002668 PyObject_Del(self->entity);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002669 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002670 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002671 }
2672
2673 memory_handler.malloc_fcn = PyObject_Malloc;
2674 memory_handler.realloc_fcn = PyObject_Realloc;
2675 memory_handler.free_fcn = PyObject_Free;
2676
2677 self->parser = EXPAT(ParserCreate_MM)(encoding, &memory_handler, "}");
2678 if (!self->parser) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002679 PyObject_Del(self->names);
2680 PyObject_Del(self->entity);
2681 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002682 PyErr_NoMemory();
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002683 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002684 }
2685
2686 /* setup target handlers */
2687 if (!target) {
Eli Bendersky58d548d2012-05-29 15:45:16 +03002688 target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002689 if (!target) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002690 EXPAT(ParserFree)(self->parser);
2691 PyObject_Del(self->names);
2692 PyObject_Del(self->entity);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002693 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002694 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002695 }
2696 } else
2697 Py_INCREF(target);
2698 self->target = target;
2699
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002700 self->handle_start = PyObject_GetAttrString(target, "start");
2701 self->handle_data = PyObject_GetAttrString(target, "data");
2702 self->handle_end = PyObject_GetAttrString(target, "end");
2703 self->handle_comment = PyObject_GetAttrString(target, "comment");
2704 self->handle_pi = PyObject_GetAttrString(target, "pi");
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002705 self->handle_close = PyObject_GetAttrString(target, "close");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002706
2707 PyErr_Clear();
2708
2709 /* configure parser */
2710 EXPAT(SetUserData)(self->parser, self);
2711 EXPAT(SetElementHandler)(
2712 self->parser,
2713 (XML_StartElementHandler) expat_start_handler,
2714 (XML_EndElementHandler) expat_end_handler
2715 );
2716 EXPAT(SetDefaultHandlerExpand)(
2717 self->parser,
2718 (XML_DefaultHandler) expat_default_handler
2719 );
2720 EXPAT(SetCharacterDataHandler)(
2721 self->parser,
2722 (XML_CharacterDataHandler) expat_data_handler
2723 );
2724 if (self->handle_comment)
2725 EXPAT(SetCommentHandler)(
2726 self->parser,
2727 (XML_CommentHandler) expat_comment_handler
2728 );
2729 if (self->handle_pi)
2730 EXPAT(SetProcessingInstructionHandler)(
2731 self->parser,
2732 (XML_ProcessingInstructionHandler) expat_pi_handler
2733 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002734 EXPAT(SetUnknownEncodingHandler)(
2735 self->parser,
2736 (XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL
2737 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002738
2739 ALLOC(sizeof(XMLParserObject), "create expatparser");
2740
2741 return (PyObject*) self;
2742}
2743
2744static void
2745xmlparser_dealloc(XMLParserObject* self)
2746{
2747 EXPAT(ParserFree)(self->parser);
2748
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002749 Py_XDECREF(self->handle_close);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002750 Py_XDECREF(self->handle_pi);
2751 Py_XDECREF(self->handle_comment);
2752 Py_XDECREF(self->handle_end);
2753 Py_XDECREF(self->handle_data);
2754 Py_XDECREF(self->handle_start);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002755
2756 Py_DECREF(self->target);
2757 Py_DECREF(self->entity);
2758 Py_DECREF(self->names);
2759
2760 RELEASE(sizeof(XMLParserObject), "destroy expatparser");
2761
2762 PyObject_Del(self);
2763}
2764
2765/* -------------------------------------------------------------------- */
2766/* methods (in alphabetical order) */
2767
2768LOCAL(PyObject*)
2769expat_parse(XMLParserObject* self, char* data, int data_len, int final)
2770{
2771 int ok;
2772
2773 ok = EXPAT(Parse)(self->parser, data, data_len, final);
2774
2775 if (PyErr_Occurred())
2776 return NULL;
2777
2778 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002779 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02002780 EXPAT(GetErrorCode)(self->parser),
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002781 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02002782 EXPAT(GetErrorColumnNumber)(self->parser),
2783 NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002784 );
2785 return NULL;
2786 }
2787
2788 Py_RETURN_NONE;
2789}
2790
2791static PyObject*
2792xmlparser_close(XMLParserObject* self, PyObject* args)
2793{
2794 /* end feeding data to parser */
2795
2796 PyObject* res;
2797 if (!PyArg_ParseTuple(args, ":close"))
2798 return NULL;
2799
2800 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002801 if (!res)
2802 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002803
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002804 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002805 Py_DECREF(res);
2806 return treebuilder_done((TreeBuilderObject*) self->target);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002807 } if (self->handle_close) {
2808 Py_DECREF(res);
2809 return PyObject_CallFunction(self->handle_close, "");
2810 } else
2811 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002812}
2813
2814static PyObject*
2815xmlparser_feed(XMLParserObject* self, PyObject* args)
2816{
2817 /* feed data to parser */
2818
2819 char* data;
2820 int data_len;
2821 if (!PyArg_ParseTuple(args, "s#:feed", &data, &data_len))
2822 return NULL;
2823
2824 return expat_parse(self, data, data_len, 0);
2825}
2826
2827static PyObject*
2828xmlparser_parse(XMLParserObject* self, PyObject* args)
2829{
2830 /* (internal) parse until end of input stream */
2831
2832 PyObject* reader;
2833 PyObject* buffer;
Eli Benderskyf996e772012-03-16 05:53:30 +02002834 PyObject* temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002835 PyObject* res;
2836
2837 PyObject* fileobj;
2838 if (!PyArg_ParseTuple(args, "O:_parse", &fileobj))
2839 return NULL;
2840
2841 reader = PyObject_GetAttrString(fileobj, "read");
2842 if (!reader)
2843 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002844
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002845 /* read from open file object */
2846 for (;;) {
2847
2848 buffer = PyObject_CallFunction(reader, "i", 64*1024);
2849
2850 if (!buffer) {
2851 /* read failed (e.g. due to KeyboardInterrupt) */
2852 Py_DECREF(reader);
2853 return NULL;
2854 }
2855
Eli Benderskyf996e772012-03-16 05:53:30 +02002856 if (PyUnicode_CheckExact(buffer)) {
2857 /* A unicode object is encoded into bytes using UTF-8 */
2858 if (PyUnicode_GET_SIZE(buffer) == 0) {
2859 Py_DECREF(buffer);
2860 break;
2861 }
2862 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
2863 if (!temp) {
2864 /* Propagate exception from PyUnicode_AsEncodedString */
2865 Py_DECREF(buffer);
2866 Py_DECREF(reader);
2867 return NULL;
2868 }
2869
2870 /* Here we no longer need the original buffer since it contains
2871 * unicode. Make it point to the encoded bytes object.
2872 */
2873 Py_DECREF(buffer);
2874 buffer = temp;
2875 }
2876 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002877 Py_DECREF(buffer);
2878 break;
2879 }
2880
2881 res = expat_parse(
Christian Heimes72b710a2008-05-26 13:28:38 +00002882 self, PyBytes_AS_STRING(buffer), PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002883 );
2884
2885 Py_DECREF(buffer);
2886
2887 if (!res) {
2888 Py_DECREF(reader);
2889 return NULL;
2890 }
2891 Py_DECREF(res);
2892
2893 }
2894
2895 Py_DECREF(reader);
2896
2897 res = expat_parse(self, "", 0, 1);
2898
2899 if (res && TreeBuilder_CheckExact(self->target)) {
2900 Py_DECREF(res);
2901 return treebuilder_done((TreeBuilderObject*) self->target);
2902 }
2903
2904 return res;
2905}
2906
2907static PyObject*
2908xmlparser_setevents(XMLParserObject* self, PyObject* args)
2909{
2910 /* activate element event reporting */
2911
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002912 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002913 TreeBuilderObject* target;
2914
2915 PyObject* events; /* event collector */
2916 PyObject* event_set = Py_None;
2917 if (!PyArg_ParseTuple(args, "O!|O:_setevents", &PyList_Type, &events,
2918 &event_set))
2919 return NULL;
2920
2921 if (!TreeBuilder_CheckExact(self->target)) {
2922 PyErr_SetString(
2923 PyExc_TypeError,
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01002924 "event handling only supported for ElementTree.TreeBuilder "
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002925 "targets"
2926 );
2927 return NULL;
2928 }
2929
2930 target = (TreeBuilderObject*) self->target;
2931
2932 Py_INCREF(events);
2933 Py_XDECREF(target->events);
2934 target->events = events;
2935
2936 /* clear out existing events */
2937 Py_XDECREF(target->start_event_obj); target->start_event_obj = NULL;
2938 Py_XDECREF(target->end_event_obj); target->end_event_obj = NULL;
2939 Py_XDECREF(target->start_ns_event_obj); target->start_ns_event_obj = NULL;
2940 Py_XDECREF(target->end_ns_event_obj); target->end_ns_event_obj = NULL;
2941
2942 if (event_set == Py_None) {
2943 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002944 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002945 Py_RETURN_NONE;
2946 }
2947
2948 if (!PyTuple_Check(event_set)) /* FIXME: handle arbitrary sequences */
2949 goto error;
2950
2951 for (i = 0; i < PyTuple_GET_SIZE(event_set); i++) {
2952 PyObject* item = PyTuple_GET_ITEM(event_set, i);
2953 char* event;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002954 if (PyUnicode_Check(item)) {
2955 event = _PyUnicode_AsString(item);
Victor Stinner0477bf32010-03-22 12:11:44 +00002956 if (event == NULL)
2957 goto error;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002958 } else if (PyBytes_Check(item))
2959 event = PyBytes_AS_STRING(item);
2960 else {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002961 goto error;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002962 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002963 if (strcmp(event, "start") == 0) {
2964 Py_INCREF(item);
2965 target->start_event_obj = item;
2966 } else if (strcmp(event, "end") == 0) {
2967 Py_INCREF(item);
2968 Py_XDECREF(target->end_event_obj);
2969 target->end_event_obj = item;
2970 } else if (strcmp(event, "start-ns") == 0) {
2971 Py_INCREF(item);
2972 Py_XDECREF(target->start_ns_event_obj);
2973 target->start_ns_event_obj = item;
2974 EXPAT(SetNamespaceDeclHandler)(
2975 self->parser,
2976 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2977 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2978 );
2979 } else if (strcmp(event, "end-ns") == 0) {
2980 Py_INCREF(item);
2981 Py_XDECREF(target->end_ns_event_obj);
2982 target->end_ns_event_obj = item;
2983 EXPAT(SetNamespaceDeclHandler)(
2984 self->parser,
2985 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2986 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2987 );
2988 } else {
2989 PyErr_Format(
2990 PyExc_ValueError,
2991 "unknown event '%s'", event
2992 );
2993 return NULL;
2994 }
2995 }
2996
2997 Py_RETURN_NONE;
2998
2999 error:
3000 PyErr_SetString(
3001 PyExc_TypeError,
3002 "invalid event tuple"
3003 );
3004 return NULL;
3005}
3006
3007static PyMethodDef xmlparser_methods[] = {
3008 {"feed", (PyCFunction) xmlparser_feed, METH_VARARGS},
3009 {"close", (PyCFunction) xmlparser_close, METH_VARARGS},
3010 {"_parse", (PyCFunction) xmlparser_parse, METH_VARARGS},
3011 {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
3012 {NULL, NULL}
3013};
3014
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003015static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003016xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003017{
Alexander Belopolskye239d232010-12-08 23:31:48 +00003018 if (PyUnicode_Check(nameobj)) {
3019 PyObject* res;
3020 if (PyUnicode_CompareWithASCIIString(nameobj, "entity") == 0)
3021 res = self->entity;
3022 else if (PyUnicode_CompareWithASCIIString(nameobj, "target") == 0)
3023 res = self->target;
3024 else if (PyUnicode_CompareWithASCIIString(nameobj, "version") == 0) {
3025 return PyUnicode_FromFormat(
3026 "Expat %d.%d.%d", XML_MAJOR_VERSION,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003027 XML_MINOR_VERSION, XML_MICRO_VERSION);
Alexander Belopolskye239d232010-12-08 23:31:48 +00003028 }
3029 else
3030 goto generic;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003031
Alexander Belopolskye239d232010-12-08 23:31:48 +00003032 Py_INCREF(res);
3033 return res;
3034 }
3035 generic:
3036 return PyObject_GenericGetAttr((PyObject*) self, nameobj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003037}
3038
Neal Norwitz227b5332006-03-22 09:28:35 +00003039static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003040 PyVarObject_HEAD_INIT(NULL, 0)
3041 "XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003042 /* methods */
3043 (destructor)xmlparser_dealloc, /* tp_dealloc */
3044 0, /* tp_print */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003045 0, /* tp_getattr */
3046 0, /* tp_setattr */
Mark Dickinsone94c6792009-02-02 20:36:42 +00003047 0, /* tp_reserved */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003048 0, /* tp_repr */
3049 0, /* tp_as_number */
3050 0, /* tp_as_sequence */
3051 0, /* tp_as_mapping */
3052 0, /* tp_hash */
3053 0, /* tp_call */
3054 0, /* tp_str */
3055 (getattrofunc)xmlparser_getattro, /* tp_getattro */
3056 0, /* tp_setattro */
3057 0, /* tp_as_buffer */
3058 Py_TPFLAGS_DEFAULT, /* tp_flags */
3059 0, /* tp_doc */
3060 0, /* tp_traverse */
3061 0, /* tp_clear */
3062 0, /* tp_richcompare */
3063 0, /* tp_weaklistoffset */
3064 0, /* tp_iter */
3065 0, /* tp_iternext */
3066 xmlparser_methods, /* tp_methods */
3067 0, /* tp_members */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003068};
3069
3070#endif
3071
3072/* ==================================================================== */
3073/* python module interface */
3074
3075static PyMethodDef _functions[] = {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003076 {"SubElement", (PyCFunction) subelement, METH_VARARGS|METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003077#if defined(USE_EXPAT)
3078 {"XMLParser", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003079#endif
3080 {NULL, NULL}
3081};
3082
Martin v. Löwis1a214512008-06-11 05:26:20 +00003083
3084static struct PyModuleDef _elementtreemodule = {
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003085 PyModuleDef_HEAD_INIT,
3086 "_elementtree",
3087 NULL,
3088 -1,
3089 _functions,
3090 NULL,
3091 NULL,
3092 NULL,
3093 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00003094};
3095
Neal Norwitzf6657e62006-12-28 04:47:50 +00003096PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00003097PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003098{
Eli Bendersky828efde2012-04-05 05:40:58 +03003099 PyObject *m, *g, *temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003100 char* bootstrap;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003101
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003102 /* Initialize object types */
3103 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003104 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003105 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003106 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003107#if defined(USE_EXPAT)
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003108 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003109 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003110#endif
3111
Martin v. Löwis1a214512008-06-11 05:26:20 +00003112 m = PyModule_Create(&_elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003113 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00003114 return NULL;
3115
3116 /* The code below requires that the module gets already added
3117 to sys.modules. */
3118 PyDict_SetItemString(PyImport_GetModuleDict(),
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003119 _elementtreemodule.m_name,
3120 m);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003121
3122 /* python glue code */
3123
3124 g = PyDict_New();
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003125 if (!g)
Martin v. Löwis1a214512008-06-11 05:26:20 +00003126 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003127
3128 PyDict_SetItemString(g, "__builtins__", PyEval_GetBuiltins());
3129
3130 bootstrap = (
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003131 "def iter(node, tag=None):\n" /* helper */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003132 " if tag == '*':\n"
3133 " tag = None\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003134 " if tag is None or node.tag == tag:\n"
3135 " yield node\n"
3136 " for node in node:\n"
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003137 " for node in iter(node, tag):\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003138 " yield node\n"
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003139
3140 "def itertext(node):\n" /* helper */
3141 " if node.text:\n"
3142 " yield node.text\n"
3143 " for e in node:\n"
3144 " for s in e.itertext():\n"
3145 " yield s\n"
3146 " if e.tail:\n"
3147 " yield e.tail\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003148
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003149 );
3150
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003151 if (!PyRun_String(bootstrap, Py_file_input, g, NULL))
3152 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003153
Eli Bendersky828efde2012-04-05 05:40:58 +03003154 if (!(temp = PyImport_ImportModule("copy")))
3155 return NULL;
3156 elementtree_deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
3157 Py_XDECREF(temp);
3158
3159 if (!(elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
3160 return NULL;
3161
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003162 elementtree_iter_obj = PyDict_GetItemString(g, "iter");
3163 elementtree_itertext_obj = PyDict_GetItemString(g, "itertext");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003164
3165#if defined(USE_PYEXPAT_CAPI)
3166 /* link against pyexpat, if possible */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003167 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
3168 if (expat_capi) {
3169 /* check that it's usable */
3170 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
3171 expat_capi->size < sizeof(struct PyExpat_CAPI) ||
3172 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
3173 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
3174 expat_capi->MICRO_VERSION != XML_MICRO_VERSION)
3175 expat_capi = NULL;
3176 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003177#endif
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003178
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003179 elementtree_parseerror_obj = PyErr_NewException(
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003180 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003181 );
3182 Py_INCREF(elementtree_parseerror_obj);
3183 PyModule_AddObject(m, "ParseError", elementtree_parseerror_obj);
3184
Eli Bendersky092af1f2012-03-04 07:14:03 +02003185 Py_INCREF((PyObject *)&Element_Type);
3186 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
3187
Eli Bendersky58d548d2012-05-29 15:45:16 +03003188 Py_INCREF((PyObject *)&TreeBuilder_Type);
3189 PyModule_AddObject(m, "TreeBuilder", (PyObject *)&TreeBuilder_Type);
3190
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003191 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003192}