blob: d74b4972f559d94554a88a3a9cc521a882ab06aa [file] [log] [blame]
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001/*
2 * ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003 * $Id: _elementtree.c 3473 2009-01-11 22:53:55Z fredrik $
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
5 * elementtree accelerator
6 *
7 * History:
8 * 1999-06-20 fl created (as part of sgmlop)
9 * 2001-05-29 fl effdom edition
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000010 * 2003-02-27 fl elementtree edition (alpha)
11 * 2004-06-03 fl updates for elementtree 1.2
Florent Xiclunaf15351d2010-03-13 23:24:31 +000012 * 2005-01-05 fl major optimization effort
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000013 * 2005-01-11 fl first public release (cElementTree 0.8)
14 * 2005-01-12 fl split element object into base and extras
15 * 2005-01-13 fl use tagged pointers for tail/text (cElementTree 0.9)
16 * 2005-01-17 fl added treebuilder close method
17 * 2005-01-17 fl fixed crash in getchildren
18 * 2005-01-18 fl removed observer api, added iterparse (cElementTree 0.9.3)
19 * 2005-01-23 fl revised iterparse api; added namespace event support (0.9.8)
20 * 2005-01-26 fl added VERSION module property (cElementTree 1.0)
21 * 2005-01-28 fl added remove method (1.0.1)
22 * 2005-03-01 fl added iselement function; fixed makeelement aliasing (1.0.2)
23 * 2005-03-13 fl export Comment and ProcessingInstruction/PI helpers
24 * 2005-03-26 fl added Comment and PI support to XMLParser
25 * 2005-03-27 fl event optimizations; complain about bogus events
26 * 2005-08-08 fl fixed read error handling in parse
27 * 2005-08-11 fl added runtime test for copy workaround (1.0.3)
28 * 2005-12-13 fl added expat_capi support (for xml.etree) (1.0.4)
29 * 2005-12-16 fl added support for non-standard encodings
Fredrik Lundh44ed4db2006-03-12 21:06:35 +000030 * 2006-03-08 fl fixed a couple of potential null-refs and leaks
31 * 2006-03-12 fl merge in 2.5 ssize_t changes
Florent Xiclunaf15351d2010-03-13 23:24:31 +000032 * 2007-08-25 fl call custom builder's close method from XMLParser
33 * 2007-08-31 fl added iter, extend from ET 1.3
34 * 2007-09-01 fl fixed ParseError exception, setslice source type, etc
35 * 2007-09-03 fl fixed handling of negative insert indexes
36 * 2007-09-04 fl added itertext from ET 1.3
37 * 2007-09-06 fl added position attribute to ParseError exception
38 * 2008-06-06 fl delay error reporting in iterparse (from Hrvoje Niksic)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000039 *
Florent Xiclunaf15351d2010-03-13 23:24:31 +000040 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
41 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000042 *
43 * info@pythonware.com
44 * http://www.pythonware.com
45 */
46
Fredrik Lundh6d52b552005-12-16 22:06:43 +000047/* Licensed to PSF under a Contributor Agreement. */
Florent Xiclunaf15351d2010-03-13 23:24:31 +000048/* See http://www.python.org/psf/license for licensing details. */
Fredrik Lundh6d52b552005-12-16 22:06:43 +000049
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000050#include "Python.h"
Eli Benderskyebf37a22012-04-03 22:02:37 +030051#include "structmember.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000052
Thomas Wouters00ee7ba2006-08-21 19:07:27 +000053#define VERSION "1.0.6"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000054
55/* -------------------------------------------------------------------- */
56/* configuration */
57
58/* Leave defined to include the expat-based XMLParser type */
59#define USE_EXPAT
60
Florent Xiclunaf15351d2010-03-13 23:24:31 +000061/* Define to do all expat calls via pyexpat's embedded expat library */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000062/* #define USE_PYEXPAT_CAPI */
63
64/* An element can hold this many children without extra memory
65 allocations. */
66#define STATIC_CHILDREN 4
67
68/* For best performance, chose a value so that 80-90% of all nodes
69 have no more than the given number of children. Set this to zero
70 to minimize the size of the element structure itself (this only
71 helps if you have lots of leaf nodes with attributes). */
72
73/* Also note that pymalloc always allocates blocks in multiples of
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010074 eight bytes. For the current C version of ElementTree, this means
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000075 that the number of children should be an even number, at least on
76 32-bit platforms. */
77
78/* -------------------------------------------------------------------- */
79
80#if 0
81static int memory = 0;
82#define ALLOC(size, comment)\
83do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
84#define RELEASE(size, comment)\
85do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
86#else
87#define ALLOC(size, comment)
88#define RELEASE(size, comment)
89#endif
90
91/* compiler tweaks */
92#if defined(_MSC_VER)
93#define LOCAL(type) static __inline type __fastcall
94#else
95#define LOCAL(type) static type
96#endif
97
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000098/* macros used to store 'join' flags in string object pointers. note
99 that all use of text and tail as object pointers must be wrapped in
100 JOIN_OBJ. see comments in the ElementObject definition for more
101 info. */
102#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
103#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
104#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~1))
105
106/* glue functions (see the init function for details) */
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000107static PyObject* elementtree_parseerror_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000108static PyObject* elementtree_deepcopy_obj;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000109static PyObject* elementtree_iter_obj;
110static PyObject* elementtree_itertext_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000111static PyObject* elementpath_obj;
112
113/* helpers */
114
115LOCAL(PyObject*)
116deepcopy(PyObject* object, PyObject* memo)
117{
118 /* do a deep copy of the given object */
119
120 PyObject* args;
121 PyObject* result;
122
123 if (!elementtree_deepcopy_obj) {
124 PyErr_SetString(
125 PyExc_RuntimeError,
126 "deepcopy helper not found"
127 );
128 return NULL;
129 }
130
131 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000132 if (!args)
133 return NULL;
134
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000135 Py_INCREF(object); PyTuple_SET_ITEM(args, 0, (PyObject*) object);
136 Py_INCREF(memo); PyTuple_SET_ITEM(args, 1, (PyObject*) memo);
137
138 result = PyObject_CallObject(elementtree_deepcopy_obj, args);
139
140 Py_DECREF(args);
141
142 return result;
143}
144
145LOCAL(PyObject*)
146list_join(PyObject* list)
147{
148 /* join list elements (destroying the list in the process) */
149
150 PyObject* joiner;
151 PyObject* function;
152 PyObject* args;
153 PyObject* result;
154
155 switch (PyList_GET_SIZE(list)) {
156 case 0:
157 Py_DECREF(list);
Christian Heimes72b710a2008-05-26 13:28:38 +0000158 return PyBytes_FromString("");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000159 case 1:
160 result = PyList_GET_ITEM(list, 0);
161 Py_INCREF(result);
162 Py_DECREF(list);
163 return result;
164 }
165
166 /* two or more elements: slice out a suitable separator from the
167 first member, and use that to join the entire list */
168
169 joiner = PySequence_GetSlice(PyList_GET_ITEM(list, 0), 0, 0);
170 if (!joiner)
171 return NULL;
172
173 function = PyObject_GetAttrString(joiner, "join");
174 if (!function) {
175 Py_DECREF(joiner);
176 return NULL;
177 }
178
179 args = PyTuple_New(1);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000180 if (!args)
181 return NULL;
182
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000183 PyTuple_SET_ITEM(args, 0, list);
184
185 result = PyObject_CallObject(function, args);
186
187 Py_DECREF(args); /* also removes list */
188 Py_DECREF(function);
189 Py_DECREF(joiner);
190
191 return result;
192}
193
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000194/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200195/* the Element type */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000196
197typedef struct {
198
199 /* attributes (a dictionary object), or None if no attributes */
200 PyObject* attrib;
201
202 /* child elements */
203 int length; /* actual number of items */
204 int allocated; /* allocated items */
205
206 /* this either points to _children or to a malloced buffer */
207 PyObject* *children;
208
209 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100210
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000211} ElementObjectExtra;
212
213typedef struct {
214 PyObject_HEAD
215
216 /* element tag (a string). */
217 PyObject* tag;
218
219 /* text before first child. note that this is a tagged pointer;
220 use JOIN_OBJ to get the object pointer. the join flag is used
221 to distinguish lists created by the tree builder from lists
222 assigned to the attribute by application code; the former
223 should be joined before being returned to the user, the latter
224 should be left intact. */
225 PyObject* text;
226
227 /* text after this element, in parent. note that this is a tagged
228 pointer; use JOIN_OBJ to get the object pointer. */
229 PyObject* tail;
230
231 ElementObjectExtra* extra;
232
Eli Benderskyebf37a22012-04-03 22:02:37 +0300233 PyObject *weakreflist; /* For tp_weaklistoffset */
234
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000235} ElementObject;
236
Neal Norwitz227b5332006-03-22 09:28:35 +0000237static PyTypeObject Element_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000238
Christian Heimes90aa7642007-12-19 02:45:37 +0000239#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000240
241/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200242/* Element constructors and destructor */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000243
244LOCAL(int)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200245create_extra(ElementObject* self, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000246{
247 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
248 if (!self->extra)
249 return -1;
250
251 if (!attrib)
252 attrib = Py_None;
253
254 Py_INCREF(attrib);
255 self->extra->attrib = attrib;
256
257 self->extra->length = 0;
258 self->extra->allocated = STATIC_CHILDREN;
259 self->extra->children = self->extra->_children;
260
261 return 0;
262}
263
264LOCAL(void)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200265dealloc_extra(ElementObject* self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000266{
Eli Bendersky08b85292012-04-04 15:55:07 +0300267 ElementObjectExtra *myextra;
268 int i;
269
Eli Benderskyebf37a22012-04-03 22:02:37 +0300270 if (!self->extra)
271 return;
272
273 /* Avoid DECREFs calling into this code again (cycles, etc.)
274 */
Eli Bendersky08b85292012-04-04 15:55:07 +0300275 myextra = self->extra;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300276 self->extra = NULL;
277
278 Py_DECREF(myextra->attrib);
279
Eli Benderskyebf37a22012-04-03 22:02:37 +0300280 for (i = 0; i < myextra->length; i++)
281 Py_DECREF(myextra->children[i]);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000282
Eli Benderskyebf37a22012-04-03 22:02:37 +0300283 if (myextra->children != myextra->_children)
284 PyObject_Free(myextra->children);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000285
Eli Benderskyebf37a22012-04-03 22:02:37 +0300286 PyObject_Free(myextra);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000287}
288
Eli Bendersky092af1f2012-03-04 07:14:03 +0200289/* Convenience internal function to create new Element objects with the given
290 * tag and attributes.
291*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000292LOCAL(PyObject*)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200293create_new_element(PyObject* tag, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000294{
295 ElementObject* self;
296
Eli Bendersky0192ba32012-03-30 16:38:33 +0300297 self = PyObject_GC_New(ElementObject, &Element_Type);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000298 if (self == NULL)
299 return NULL;
300
301 /* use None for empty dictionaries */
302 if (PyDict_CheckExact(attrib) && !PyDict_Size(attrib))
303 attrib = Py_None;
304
305 self->extra = NULL;
306
307 if (attrib != Py_None) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200308 if (create_extra(self, attrib) < 0) {
Thomas Wouters477c8d52006-05-27 19:21:47 +0000309 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000310 return NULL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000311 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000312 }
313
314 Py_INCREF(tag);
315 self->tag = tag;
316
317 Py_INCREF(Py_None);
318 self->text = Py_None;
319
320 Py_INCREF(Py_None);
321 self->tail = Py_None;
322
Eli Benderskyebf37a22012-04-03 22:02:37 +0300323 self->weakreflist = NULL;
324
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000325 ALLOC(sizeof(ElementObject), "create element");
Eli Bendersky0192ba32012-03-30 16:38:33 +0300326 PyObject_GC_Track(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000327 return (PyObject*) self;
328}
329
Eli Bendersky092af1f2012-03-04 07:14:03 +0200330static PyObject *
331element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
332{
333 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
334 if (e != NULL) {
335 Py_INCREF(Py_None);
336 e->tag = Py_None;
337
338 Py_INCREF(Py_None);
339 e->text = Py_None;
340
341 Py_INCREF(Py_None);
342 e->tail = Py_None;
343
344 e->extra = NULL;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300345 e->weakreflist = NULL;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200346 }
347 return (PyObject *)e;
348}
349
350static int
351element_init(PyObject *self, PyObject *args, PyObject *kwds)
352{
353 PyObject *tag;
354 PyObject *tmp;
355 PyObject *attrib = NULL;
356 ElementObject *self_elem;
357
358 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
359 return -1;
360
361 if (attrib || kwds) {
362 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
363 if (!attrib)
364 return -1;
365 if (kwds)
366 PyDict_Update(attrib, kwds);
367 } else {
368 Py_INCREF(Py_None);
369 attrib = Py_None;
370 }
371
372 self_elem = (ElementObject *)self;
373
374 /* Use None for empty dictionaries */
375 if (PyDict_CheckExact(attrib) && PyDict_Size(attrib) == 0) {
376 Py_INCREF(Py_None);
377 attrib = Py_None;
378 }
379
380 if (attrib != Py_None) {
381 if (create_extra(self_elem, attrib) < 0) {
382 PyObject_Del(self_elem);
383 return -1;
384 }
385 }
386
387 /* If create_extra needed attrib, it took a reference to it, so we can
388 * release ours anyway.
389 */
390 Py_DECREF(attrib);
391
392 /* Replace the objects already pointed to by tag, text and tail. */
393 tmp = self_elem->tag;
394 self_elem->tag = tag;
395 Py_INCREF(tag);
396 Py_DECREF(tmp);
397
398 tmp = self_elem->text;
399 self_elem->text = Py_None;
400 Py_INCREF(Py_None);
401 Py_DECREF(JOIN_OBJ(tmp));
402
403 tmp = self_elem->tail;
404 self_elem->tail = Py_None;
405 Py_INCREF(Py_None);
406 Py_DECREF(JOIN_OBJ(tmp));
407
408 return 0;
409}
410
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000411LOCAL(int)
412element_resize(ElementObject* self, int extra)
413{
414 int size;
415 PyObject* *children;
416
417 /* make sure self->children can hold the given number of extra
418 elements. set an exception and return -1 if allocation failed */
419
420 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200421 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000422
423 size = self->extra->length + extra;
424
425 if (size > self->extra->allocated) {
426 /* use Python 2.4's list growth strategy */
427 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000428 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100429 * which needs at least 4 bytes.
430 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000431 * be safe.
432 */
433 size = size ? size : 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000434 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000435 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100436 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000437 * false alarm always assume at least one child to be safe.
438 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000439 children = PyObject_Realloc(self->extra->children,
440 size * sizeof(PyObject*));
441 if (!children)
442 goto nomemory;
443 } else {
444 children = PyObject_Malloc(size * sizeof(PyObject*));
445 if (!children)
446 goto nomemory;
447 /* copy existing children from static area to malloc buffer */
448 memcpy(children, self->extra->children,
449 self->extra->length * sizeof(PyObject*));
450 }
451 self->extra->children = children;
452 self->extra->allocated = size;
453 }
454
455 return 0;
456
457 nomemory:
458 PyErr_NoMemory();
459 return -1;
460}
461
462LOCAL(int)
463element_add_subelement(ElementObject* self, PyObject* element)
464{
465 /* add a child element to a parent */
466
467 if (element_resize(self, 1) < 0)
468 return -1;
469
470 Py_INCREF(element);
471 self->extra->children[self->extra->length] = element;
472
473 self->extra->length++;
474
475 return 0;
476}
477
478LOCAL(PyObject*)
479element_get_attrib(ElementObject* self)
480{
481 /* return borrowed reference to attrib dictionary */
482 /* note: this function assumes that the extra section exists */
483
484 PyObject* res = self->extra->attrib;
485
486 if (res == Py_None) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000487 Py_DECREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000488 /* create missing dictionary */
489 res = PyDict_New();
490 if (!res)
491 return NULL;
492 self->extra->attrib = res;
493 }
494
495 return res;
496}
497
498LOCAL(PyObject*)
499element_get_text(ElementObject* self)
500{
501 /* return borrowed reference to text attribute */
502
503 PyObject* res = self->text;
504
505 if (JOIN_GET(res)) {
506 res = JOIN_OBJ(res);
507 if (PyList_CheckExact(res)) {
508 res = list_join(res);
509 if (!res)
510 return NULL;
511 self->text = res;
512 }
513 }
514
515 return res;
516}
517
518LOCAL(PyObject*)
519element_get_tail(ElementObject* self)
520{
521 /* return borrowed reference to text attribute */
522
523 PyObject* res = self->tail;
524
525 if (JOIN_GET(res)) {
526 res = JOIN_OBJ(res);
527 if (PyList_CheckExact(res)) {
528 res = list_join(res);
529 if (!res)
530 return NULL;
531 self->tail = res;
532 }
533 }
534
535 return res;
536}
537
538static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000539subelement(PyObject* self, PyObject* args, PyObject* kw)
540{
541 PyObject* elem;
542
543 ElementObject* parent;
544 PyObject* tag;
545 PyObject* attrib = NULL;
546 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
547 &Element_Type, &parent, &tag,
548 &PyDict_Type, &attrib))
549 return NULL;
550
551 if (attrib || kw) {
552 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
553 if (!attrib)
554 return NULL;
555 if (kw)
556 PyDict_Update(attrib, kw);
557 } else {
558 Py_INCREF(Py_None);
559 attrib = Py_None;
560 }
561
Eli Bendersky092af1f2012-03-04 07:14:03 +0200562 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000563
564 Py_DECREF(attrib);
565
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000566 if (element_add_subelement(parent, elem) < 0) {
567 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000568 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000569 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000570
571 return elem;
572}
573
Eli Bendersky0192ba32012-03-30 16:38:33 +0300574static int
575element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
576{
577 Py_VISIT(self->tag);
578 Py_VISIT(JOIN_OBJ(self->text));
579 Py_VISIT(JOIN_OBJ(self->tail));
580
581 if (self->extra) {
582 int i;
583 Py_VISIT(self->extra->attrib);
584
585 for (i = 0; i < self->extra->length; ++i)
586 Py_VISIT(self->extra->children[i]);
587 }
588 return 0;
589}
590
591static int
592element_gc_clear(ElementObject *self)
593{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300594 Py_CLEAR(self->tag);
Eli Benderskyebf37a22012-04-03 22:02:37 +0300595
596 /* The following is like Py_CLEAR for self->text and self->tail, but
597 * written explicitily because the real pointers hide behind access
598 * macros.
599 */
600 if (self->text) {
601 PyObject *tmp = JOIN_OBJ(self->text);
602 self->text = NULL;
603 Py_DECREF(tmp);
604 }
605
606 if (self->tail) {
607 PyObject *tmp = JOIN_OBJ(self->tail);
608 self->tail = NULL;
609 Py_DECREF(tmp);
610 }
Eli Bendersky0192ba32012-03-30 16:38:33 +0300611
612 /* After dropping all references from extra, it's no longer valid anyway,
Eli Benderskyebf37a22012-04-03 22:02:37 +0300613 * so fully deallocate it.
Eli Bendersky0192ba32012-03-30 16:38:33 +0300614 */
Eli Benderskyebf37a22012-04-03 22:02:37 +0300615 dealloc_extra(self);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300616 return 0;
617}
618
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000619static void
620element_dealloc(ElementObject* self)
621{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300622 PyObject_GC_UnTrack(self);
Eli Benderskyebf37a22012-04-03 22:02:37 +0300623
624 if (self->weakreflist != NULL)
625 PyObject_ClearWeakRefs((PyObject *) self);
626
Eli Bendersky0192ba32012-03-30 16:38:33 +0300627 /* element_gc_clear clears all references and deallocates extra
628 */
629 element_gc_clear(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000630
631 RELEASE(sizeof(ElementObject), "destroy element");
Eli Bendersky092af1f2012-03-04 07:14:03 +0200632 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000633}
634
635/* -------------------------------------------------------------------- */
636/* methods (in alphabetical order) */
637
638static PyObject*
639element_append(ElementObject* self, PyObject* args)
640{
641 PyObject* element;
642 if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element))
643 return NULL;
644
645 if (element_add_subelement(self, element) < 0)
646 return NULL;
647
648 Py_RETURN_NONE;
649}
650
651static PyObject*
Eli Bendersky0192ba32012-03-30 16:38:33 +0300652element_clearmethod(ElementObject* self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000653{
654 if (!PyArg_ParseTuple(args, ":clear"))
655 return NULL;
656
Eli Benderskyebf37a22012-04-03 22:02:37 +0300657 dealloc_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000658
659 Py_INCREF(Py_None);
660 Py_DECREF(JOIN_OBJ(self->text));
661 self->text = Py_None;
662
663 Py_INCREF(Py_None);
664 Py_DECREF(JOIN_OBJ(self->tail));
665 self->tail = Py_None;
666
667 Py_RETURN_NONE;
668}
669
670static PyObject*
671element_copy(ElementObject* self, PyObject* args)
672{
673 int i;
674 ElementObject* element;
675
676 if (!PyArg_ParseTuple(args, ":__copy__"))
677 return NULL;
678
Eli Bendersky092af1f2012-03-04 07:14:03 +0200679 element = (ElementObject*) create_new_element(
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000680 self->tag, (self->extra) ? self->extra->attrib : Py_None
681 );
682 if (!element)
683 return NULL;
684
685 Py_DECREF(JOIN_OBJ(element->text));
686 element->text = self->text;
687 Py_INCREF(JOIN_OBJ(element->text));
688
689 Py_DECREF(JOIN_OBJ(element->tail));
690 element->tail = self->tail;
691 Py_INCREF(JOIN_OBJ(element->tail));
692
693 if (self->extra) {
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100694
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000695 if (element_resize(element, self->extra->length) < 0) {
696 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000697 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000698 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000699
700 for (i = 0; i < self->extra->length; i++) {
701 Py_INCREF(self->extra->children[i]);
702 element->extra->children[i] = self->extra->children[i];
703 }
704
705 element->extra->length = self->extra->length;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100706
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000707 }
708
709 return (PyObject*) element;
710}
711
712static PyObject*
713element_deepcopy(ElementObject* self, PyObject* args)
714{
715 int i;
716 ElementObject* element;
717 PyObject* tag;
718 PyObject* attrib;
719 PyObject* text;
720 PyObject* tail;
721 PyObject* id;
722
723 PyObject* memo;
724 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
725 return NULL;
726
727 tag = deepcopy(self->tag, memo);
728 if (!tag)
729 return NULL;
730
731 if (self->extra) {
732 attrib = deepcopy(self->extra->attrib, memo);
733 if (!attrib) {
734 Py_DECREF(tag);
735 return NULL;
736 }
737 } else {
738 Py_INCREF(Py_None);
739 attrib = Py_None;
740 }
741
Eli Bendersky092af1f2012-03-04 07:14:03 +0200742 element = (ElementObject*) create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000743
744 Py_DECREF(tag);
745 Py_DECREF(attrib);
746
747 if (!element)
748 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100749
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000750 text = deepcopy(JOIN_OBJ(self->text), memo);
751 if (!text)
752 goto error;
753 Py_DECREF(element->text);
754 element->text = JOIN_SET(text, JOIN_GET(self->text));
755
756 tail = deepcopy(JOIN_OBJ(self->tail), memo);
757 if (!tail)
758 goto error;
759 Py_DECREF(element->tail);
760 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
761
762 if (self->extra) {
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100763
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000764 if (element_resize(element, self->extra->length) < 0)
765 goto error;
766
767 for (i = 0; i < self->extra->length; i++) {
768 PyObject* child = deepcopy(self->extra->children[i], memo);
769 if (!child) {
770 element->extra->length = i;
771 goto error;
772 }
773 element->extra->children[i] = child;
774 }
775
776 element->extra->length = self->extra->length;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100777
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000778 }
779
780 /* add object to memo dictionary (so deepcopy won't visit it again) */
Christian Heimes217cfd12007-12-02 14:31:20 +0000781 id = PyLong_FromLong((Py_uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000782 if (!id)
783 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000784
785 i = PyDict_SetItem(memo, id, (PyObject*) element);
786
787 Py_DECREF(id);
788
789 if (i < 0)
790 goto error;
791
792 return (PyObject*) element;
793
794 error:
795 Py_DECREF(element);
796 return NULL;
797}
798
799LOCAL(int)
800checkpath(PyObject* tag)
801{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000802 Py_ssize_t i;
803 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000804
805 /* check if a tag contains an xpath character */
806
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000807#define PATHCHAR(ch) \
808 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000809
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000810 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200811 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
812 void *data = PyUnicode_DATA(tag);
813 unsigned int kind = PyUnicode_KIND(tag);
814 for (i = 0; i < len; i++) {
815 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
816 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000817 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200818 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000819 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200820 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000821 return 1;
822 }
823 return 0;
824 }
Christian Heimes72b710a2008-05-26 13:28:38 +0000825 if (PyBytes_Check(tag)) {
826 char *p = PyBytes_AS_STRING(tag);
827 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000828 if (p[i] == '{')
829 check = 0;
830 else if (p[i] == '}')
831 check = 1;
832 else if (check && PATHCHAR(p[i]))
833 return 1;
834 }
835 return 0;
836 }
837
838 return 1; /* unknown type; might be path expression */
839}
840
841static PyObject*
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000842element_extend(ElementObject* self, PyObject* args)
843{
844 PyObject* seq;
845 Py_ssize_t i, seqlen = 0;
846
847 PyObject* seq_in;
848 if (!PyArg_ParseTuple(args, "O:extend", &seq_in))
849 return NULL;
850
851 seq = PySequence_Fast(seq_in, "");
852 if (!seq) {
853 PyErr_Format(
854 PyExc_TypeError,
855 "expected sequence, not \"%.200s\"", Py_TYPE(seq_in)->tp_name
856 );
857 return NULL;
858 }
859
860 seqlen = PySequence_Size(seq);
861 for (i = 0; i < seqlen; i++) {
862 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
Eli Bendersky396e8fc2012-03-23 14:24:20 +0200863 if (!PyObject_IsInstance(element, (PyObject *)&Element_Type)) {
864 Py_DECREF(seq);
865 PyErr_Format(
866 PyExc_TypeError,
867 "expected an Element, not \"%.200s\"",
868 Py_TYPE(element)->tp_name);
869 return NULL;
870 }
871
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000872 if (element_add_subelement(self, element) < 0) {
873 Py_DECREF(seq);
874 return NULL;
875 }
876 }
877
878 Py_DECREF(seq);
879
880 Py_RETURN_NONE;
881}
882
883static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000884element_find(ElementObject* self, PyObject* args)
885{
886 int i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000887 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000888 PyObject* namespaces = Py_None;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200889
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000890 if (!PyArg_ParseTuple(args, "O|O:find", &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000891 return NULL;
892
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200893 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200894 _Py_IDENTIFIER(find);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200895 return _PyObject_CallMethodId(
896 elementpath_obj, &PyId_find, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000897 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200898 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000899
900 if (!self->extra)
901 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100902
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000903 for (i = 0; i < self->extra->length; i++) {
904 PyObject* item = self->extra->children[i];
905 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +0000906 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000907 Py_INCREF(item);
908 return item;
909 }
910 }
911
912 Py_RETURN_NONE;
913}
914
915static PyObject*
916element_findtext(ElementObject* self, PyObject* args)
917{
918 int i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000919 PyObject* tag;
920 PyObject* default_value = Py_None;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000921 PyObject* namespaces = Py_None;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200922 _Py_IDENTIFIER(findtext);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200923
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000924 if (!PyArg_ParseTuple(args, "O|OO:findtext", &tag, &default_value, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000925 return NULL;
926
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000927 if (checkpath(tag) || namespaces != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200928 return _PyObject_CallMethodId(
929 elementpath_obj, &PyId_findtext, "OOOO", self, tag, default_value, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000930 );
931
932 if (!self->extra) {
933 Py_INCREF(default_value);
934 return default_value;
935 }
936
937 for (i = 0; i < self->extra->length; i++) {
938 ElementObject* item = (ElementObject*) self->extra->children[i];
Mark Dickinson211c6252009-02-01 10:28:51 +0000939 if (Element_CheckExact(item) && (PyObject_RichCompareBool(item->tag, tag, Py_EQ) == 1)) {
940
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000941 PyObject* text = element_get_text(item);
942 if (text == Py_None)
Christian Heimes72b710a2008-05-26 13:28:38 +0000943 return PyBytes_FromString("");
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000944 Py_XINCREF(text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000945 return text;
946 }
947 }
948
949 Py_INCREF(default_value);
950 return default_value;
951}
952
953static PyObject*
954element_findall(ElementObject* self, PyObject* args)
955{
956 int i;
957 PyObject* out;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000958 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000959 PyObject* namespaces = Py_None;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200960
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000961 if (!PyArg_ParseTuple(args, "O|O:findall", &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000962 return NULL;
963
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200964 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200965 _Py_IDENTIFIER(findall);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200966 return _PyObject_CallMethodId(
967 elementpath_obj, &PyId_findall, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000968 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200969 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000970
971 out = PyList_New(0);
972 if (!out)
973 return NULL;
974
975 if (!self->extra)
976 return out;
977
978 for (i = 0; i < self->extra->length; i++) {
979 PyObject* item = self->extra->children[i];
980 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +0000981 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000982 if (PyList_Append(out, item) < 0) {
983 Py_DECREF(out);
984 return NULL;
985 }
986 }
987 }
988
989 return out;
990}
991
992static PyObject*
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000993element_iterfind(ElementObject* self, PyObject* args)
994{
995 PyObject* tag;
996 PyObject* namespaces = Py_None;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200997 _Py_IDENTIFIER(iterfind);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200998
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000999 if (!PyArg_ParseTuple(args, "O|O:iterfind", &tag, &namespaces))
1000 return NULL;
1001
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001002 return _PyObject_CallMethodId(
1003 elementpath_obj, &PyId_iterfind, "OOO", self, tag, namespaces
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001004 );
1005}
1006
1007static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001008element_get(ElementObject* self, PyObject* args)
1009{
1010 PyObject* value;
1011
1012 PyObject* key;
1013 PyObject* default_value = Py_None;
1014 if (!PyArg_ParseTuple(args, "O|O:get", &key, &default_value))
1015 return NULL;
1016
1017 if (!self->extra || self->extra->attrib == Py_None)
1018 value = default_value;
1019 else {
1020 value = PyDict_GetItem(self->extra->attrib, key);
1021 if (!value)
1022 value = default_value;
1023 }
1024
1025 Py_INCREF(value);
1026 return value;
1027}
1028
1029static PyObject*
1030element_getchildren(ElementObject* self, PyObject* args)
1031{
1032 int i;
1033 PyObject* list;
1034
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001035 /* FIXME: report as deprecated? */
1036
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001037 if (!PyArg_ParseTuple(args, ":getchildren"))
1038 return NULL;
1039
1040 if (!self->extra)
1041 return PyList_New(0);
1042
1043 list = PyList_New(self->extra->length);
1044 if (!list)
1045 return NULL;
1046
1047 for (i = 0; i < self->extra->length; i++) {
1048 PyObject* item = self->extra->children[i];
1049 Py_INCREF(item);
1050 PyList_SET_ITEM(list, i, item);
1051 }
1052
1053 return list;
1054}
1055
1056static PyObject*
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001057element_iter(ElementObject* self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001058{
1059 PyObject* result;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001060
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001061 PyObject* tag = Py_None;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001062 if (!PyArg_ParseTuple(args, "|O:iter", &tag))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001063 return NULL;
1064
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001065 if (!elementtree_iter_obj) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001066 PyErr_SetString(
1067 PyExc_RuntimeError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001068 "iter helper not found"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001069 );
1070 return NULL;
1071 }
1072
1073 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001074 if (!args)
1075 return NULL;
Neal Norwitz02876df2006-02-07 06:58:52 +00001076
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001077 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
1078 Py_INCREF(tag); PyTuple_SET_ITEM(args, 1, (PyObject*) tag);
1079
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001080 result = PyObject_CallObject(elementtree_iter_obj, args);
1081
1082 Py_DECREF(args);
1083
1084 return result;
1085}
1086
1087
1088static PyObject*
1089element_itertext(ElementObject* self, PyObject* args)
1090{
1091 PyObject* result;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001092
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001093 if (!PyArg_ParseTuple(args, ":itertext"))
1094 return NULL;
1095
1096 if (!elementtree_itertext_obj) {
1097 PyErr_SetString(
1098 PyExc_RuntimeError,
1099 "itertext helper not found"
1100 );
1101 return NULL;
1102 }
1103
1104 args = PyTuple_New(1);
1105 if (!args)
1106 return NULL;
1107
1108 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
1109
1110 result = PyObject_CallObject(elementtree_itertext_obj, args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001111
1112 Py_DECREF(args);
1113
1114 return result;
1115}
1116
1117static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001118element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001119{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001120 ElementObject* self = (ElementObject*) self_;
1121
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001122 if (!self->extra || index < 0 || index >= self->extra->length) {
1123 PyErr_SetString(
1124 PyExc_IndexError,
1125 "child index out of range"
1126 );
1127 return NULL;
1128 }
1129
1130 Py_INCREF(self->extra->children[index]);
1131 return self->extra->children[index];
1132}
1133
1134static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001135element_insert(ElementObject* self, PyObject* args)
1136{
1137 int i;
1138
1139 int index;
1140 PyObject* element;
1141 if (!PyArg_ParseTuple(args, "iO!:insert", &index,
1142 &Element_Type, &element))
1143 return NULL;
1144
1145 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001146 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001147
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001148 if (index < 0) {
1149 index += self->extra->length;
1150 if (index < 0)
1151 index = 0;
1152 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001153 if (index > self->extra->length)
1154 index = self->extra->length;
1155
1156 if (element_resize(self, 1) < 0)
1157 return NULL;
1158
1159 for (i = self->extra->length; i > index; i--)
1160 self->extra->children[i] = self->extra->children[i-1];
1161
1162 Py_INCREF(element);
1163 self->extra->children[index] = element;
1164
1165 self->extra->length++;
1166
1167 Py_RETURN_NONE;
1168}
1169
1170static PyObject*
1171element_items(ElementObject* self, PyObject* args)
1172{
1173 if (!PyArg_ParseTuple(args, ":items"))
1174 return NULL;
1175
1176 if (!self->extra || self->extra->attrib == Py_None)
1177 return PyList_New(0);
1178
1179 return PyDict_Items(self->extra->attrib);
1180}
1181
1182static PyObject*
1183element_keys(ElementObject* self, PyObject* args)
1184{
1185 if (!PyArg_ParseTuple(args, ":keys"))
1186 return NULL;
1187
1188 if (!self->extra || self->extra->attrib == Py_None)
1189 return PyList_New(0);
1190
1191 return PyDict_Keys(self->extra->attrib);
1192}
1193
Martin v. Löwis18e16552006-02-15 17:27:45 +00001194static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001195element_length(ElementObject* self)
1196{
1197 if (!self->extra)
1198 return 0;
1199
1200 return self->extra->length;
1201}
1202
1203static PyObject*
1204element_makeelement(PyObject* self, PyObject* args, PyObject* kw)
1205{
1206 PyObject* elem;
1207
1208 PyObject* tag;
1209 PyObject* attrib;
1210 if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib))
1211 return NULL;
1212
1213 attrib = PyDict_Copy(attrib);
1214 if (!attrib)
1215 return NULL;
1216
Eli Bendersky092af1f2012-03-04 07:14:03 +02001217 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001218
1219 Py_DECREF(attrib);
1220
1221 return elem;
1222}
1223
1224static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001225element_remove(ElementObject* self, PyObject* args)
1226{
1227 int i;
1228
1229 PyObject* element;
1230 if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element))
1231 return NULL;
1232
1233 if (!self->extra) {
1234 /* element has no children, so raise exception */
1235 PyErr_SetString(
1236 PyExc_ValueError,
1237 "list.remove(x): x not in list"
1238 );
1239 return NULL;
1240 }
1241
1242 for (i = 0; i < self->extra->length; i++) {
1243 if (self->extra->children[i] == element)
1244 break;
Mark Dickinson211c6252009-02-01 10:28:51 +00001245 if (PyObject_RichCompareBool(self->extra->children[i], element, Py_EQ) == 1)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001246 break;
1247 }
1248
1249 if (i == self->extra->length) {
1250 /* element is not in children, so raise exception */
1251 PyErr_SetString(
1252 PyExc_ValueError,
1253 "list.remove(x): x not in list"
1254 );
1255 return NULL;
1256 }
1257
1258 Py_DECREF(self->extra->children[i]);
1259
1260 self->extra->length--;
1261
1262 for (; i < self->extra->length; i++)
1263 self->extra->children[i] = self->extra->children[i+1];
1264
1265 Py_RETURN_NONE;
1266}
1267
1268static PyObject*
1269element_repr(ElementObject* self)
1270{
Eli Bendersky092af1f2012-03-04 07:14:03 +02001271 if (self->tag)
1272 return PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1273 else
1274 return PyUnicode_FromFormat("<Element at %p>", self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001275}
1276
1277static PyObject*
1278element_set(ElementObject* self, PyObject* args)
1279{
1280 PyObject* attrib;
1281
1282 PyObject* key;
1283 PyObject* value;
1284 if (!PyArg_ParseTuple(args, "OO:set", &key, &value))
1285 return NULL;
1286
1287 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001288 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001289
1290 attrib = element_get_attrib(self);
1291 if (!attrib)
1292 return NULL;
1293
1294 if (PyDict_SetItem(attrib, key, value) < 0)
1295 return NULL;
1296
1297 Py_RETURN_NONE;
1298}
1299
1300static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001301element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001302{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001303 ElementObject* self = (ElementObject*) self_;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001304 int i;
1305 PyObject* old;
1306
1307 if (!self->extra || index < 0 || index >= self->extra->length) {
1308 PyErr_SetString(
1309 PyExc_IndexError,
1310 "child assignment index out of range");
1311 return -1;
1312 }
1313
1314 old = self->extra->children[index];
1315
1316 if (item) {
1317 Py_INCREF(item);
1318 self->extra->children[index] = item;
1319 } else {
1320 self->extra->length--;
1321 for (i = index; i < self->extra->length; i++)
1322 self->extra->children[i] = self->extra->children[i+1];
1323 }
1324
1325 Py_DECREF(old);
1326
1327 return 0;
1328}
1329
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001330static PyObject*
1331element_subscr(PyObject* self_, PyObject* item)
1332{
1333 ElementObject* self = (ElementObject*) self_;
1334
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001335 if (PyIndex_Check(item)) {
1336 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001337
1338 if (i == -1 && PyErr_Occurred()) {
1339 return NULL;
1340 }
1341 if (i < 0 && self->extra)
1342 i += self->extra->length;
1343 return element_getitem(self_, i);
1344 }
1345 else if (PySlice_Check(item)) {
1346 Py_ssize_t start, stop, step, slicelen, cur, i;
1347 PyObject* list;
1348
1349 if (!self->extra)
1350 return PyList_New(0);
1351
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001352 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001353 self->extra->length,
1354 &start, &stop, &step, &slicelen) < 0) {
1355 return NULL;
1356 }
1357
1358 if (slicelen <= 0)
1359 return PyList_New(0);
1360 else {
1361 list = PyList_New(slicelen);
1362 if (!list)
1363 return NULL;
1364
1365 for (cur = start, i = 0; i < slicelen;
1366 cur += step, i++) {
1367 PyObject* item = self->extra->children[cur];
1368 Py_INCREF(item);
1369 PyList_SET_ITEM(list, i, item);
1370 }
1371
1372 return list;
1373 }
1374 }
1375 else {
1376 PyErr_SetString(PyExc_TypeError,
1377 "element indices must be integers");
1378 return NULL;
1379 }
1380}
1381
1382static int
1383element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1384{
1385 ElementObject* self = (ElementObject*) self_;
1386
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001387 if (PyIndex_Check(item)) {
1388 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001389
1390 if (i == -1 && PyErr_Occurred()) {
1391 return -1;
1392 }
1393 if (i < 0 && self->extra)
1394 i += self->extra->length;
1395 return element_setitem(self_, i, value);
1396 }
1397 else if (PySlice_Check(item)) {
1398 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1399
1400 PyObject* recycle = NULL;
1401 PyObject* seq = NULL;
1402
1403 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001404 create_extra(self, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001405
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001406 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001407 self->extra->length,
1408 &start, &stop, &step, &slicelen) < 0) {
1409 return -1;
1410 }
1411
Eli Bendersky865756a2012-03-09 13:38:15 +02001412 if (value == NULL) {
1413 /* Delete slice */
1414 size_t cur;
1415 Py_ssize_t i;
1416
1417 if (slicelen <= 0)
1418 return 0;
1419
1420 /* Since we're deleting, the direction of the range doesn't matter,
1421 * so for simplicity make it always ascending.
1422 */
1423 if (step < 0) {
1424 stop = start + 1;
1425 start = stop + step * (slicelen - 1) - 1;
1426 step = -step;
1427 }
1428
1429 assert((size_t)slicelen <= PY_SIZE_MAX / sizeof(PyObject *));
1430
1431 /* recycle is a list that will contain all the children
1432 * scheduled for removal.
1433 */
1434 if (!(recycle = PyList_New(slicelen))) {
1435 PyErr_NoMemory();
1436 return -1;
1437 }
1438
1439 /* This loop walks over all the children that have to be deleted,
1440 * with cur pointing at them. num_moved is the amount of children
1441 * until the next deleted child that have to be "shifted down" to
1442 * occupy the deleted's places.
1443 * Note that in the ith iteration, shifting is done i+i places down
1444 * because i children were already removed.
1445 */
1446 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1447 /* Compute how many children have to be moved, clipping at the
1448 * list end.
1449 */
1450 Py_ssize_t num_moved = step - 1;
1451 if (cur + step >= (size_t)self->extra->length) {
1452 num_moved = self->extra->length - cur - 1;
1453 }
1454
1455 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1456
1457 memmove(
1458 self->extra->children + cur - i,
1459 self->extra->children + cur + 1,
1460 num_moved * sizeof(PyObject *));
1461 }
1462
1463 /* Leftover "tail" after the last removed child */
1464 cur = start + (size_t)slicelen * step;
1465 if (cur < (size_t)self->extra->length) {
1466 memmove(
1467 self->extra->children + cur - slicelen,
1468 self->extra->children + cur,
1469 (self->extra->length - cur) * sizeof(PyObject *));
1470 }
1471
1472 self->extra->length -= slicelen;
1473
1474 /* Discard the recycle list with all the deleted sub-elements */
1475 Py_XDECREF(recycle);
1476 return 0;
1477 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001478 else {
Eli Bendersky865756a2012-03-09 13:38:15 +02001479 /* A new slice is actually being assigned */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001480 seq = PySequence_Fast(value, "");
1481 if (!seq) {
1482 PyErr_Format(
1483 PyExc_TypeError,
1484 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1485 );
1486 return -1;
1487 }
1488 newlen = PySequence_Size(seq);
1489 }
1490
1491 if (step != 1 && newlen != slicelen)
1492 {
1493 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001494 "attempt to assign sequence of size %zd "
1495 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001496 newlen, slicelen
1497 );
1498 return -1;
1499 }
1500
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001501 /* Resize before creating the recycle bin, to prevent refleaks. */
1502 if (newlen > slicelen) {
1503 if (element_resize(self, newlen - slicelen) < 0) {
1504 if (seq) {
1505 Py_DECREF(seq);
1506 }
1507 return -1;
1508 }
1509 }
1510
1511 if (slicelen > 0) {
1512 /* to avoid recursive calls to this method (via decref), move
1513 old items to the recycle bin here, and get rid of them when
1514 we're done modifying the element */
1515 recycle = PyList_New(slicelen);
1516 if (!recycle) {
1517 if (seq) {
1518 Py_DECREF(seq);
1519 }
1520 return -1;
1521 }
1522 for (cur = start, i = 0; i < slicelen;
1523 cur += step, i++)
1524 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1525 }
1526
1527 if (newlen < slicelen) {
1528 /* delete slice */
1529 for (i = stop; i < self->extra->length; i++)
1530 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1531 } else if (newlen > slicelen) {
1532 /* insert slice */
1533 for (i = self->extra->length-1; i >= stop; i--)
1534 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1535 }
1536
1537 /* replace the slice */
1538 for (cur = start, i = 0; i < newlen;
1539 cur += step, i++) {
1540 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1541 Py_INCREF(element);
1542 self->extra->children[cur] = element;
1543 }
1544
1545 self->extra->length += newlen - slicelen;
1546
1547 if (seq) {
1548 Py_DECREF(seq);
1549 }
1550
1551 /* discard the recycle bin, and everything in it */
1552 Py_XDECREF(recycle);
1553
1554 return 0;
1555 }
1556 else {
1557 PyErr_SetString(PyExc_TypeError,
1558 "element indices must be integers");
1559 return -1;
1560 }
1561}
1562
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001563static PyMethodDef element_methods[] = {
1564
Eli Bendersky0192ba32012-03-30 16:38:33 +03001565 {"clear", (PyCFunction) element_clearmethod, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001566
1567 {"get", (PyCFunction) element_get, METH_VARARGS},
1568 {"set", (PyCFunction) element_set, METH_VARARGS},
1569
1570 {"find", (PyCFunction) element_find, METH_VARARGS},
1571 {"findtext", (PyCFunction) element_findtext, METH_VARARGS},
1572 {"findall", (PyCFunction) element_findall, METH_VARARGS},
1573
1574 {"append", (PyCFunction) element_append, METH_VARARGS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001575 {"extend", (PyCFunction) element_extend, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001576 {"insert", (PyCFunction) element_insert, METH_VARARGS},
1577 {"remove", (PyCFunction) element_remove, METH_VARARGS},
1578
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001579 {"iter", (PyCFunction) element_iter, METH_VARARGS},
1580 {"itertext", (PyCFunction) element_itertext, METH_VARARGS},
1581 {"iterfind", (PyCFunction) element_iterfind, METH_VARARGS},
1582
1583 {"getiterator", (PyCFunction) element_iter, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001584 {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS},
1585
1586 {"items", (PyCFunction) element_items, METH_VARARGS},
1587 {"keys", (PyCFunction) element_keys, METH_VARARGS},
1588
1589 {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS},
1590
1591 {"__copy__", (PyCFunction) element_copy, METH_VARARGS},
1592 {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS},
1593
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001594 {NULL, NULL}
1595};
1596
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001597static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001598element_getattro(ElementObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001599{
1600 PyObject* res;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001601 char *name = "";
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001602
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001603 if (PyUnicode_Check(nameobj))
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001604 name = _PyUnicode_AsString(nameobj);
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001605
Alexander Belopolskye239d232010-12-08 23:31:48 +00001606 if (name == NULL)
1607 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001608
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001609 /* handle common attributes first */
1610 if (strcmp(name, "tag") == 0) {
1611 res = self->tag;
1612 Py_INCREF(res);
1613 return res;
1614 } else if (strcmp(name, "text") == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001615 res = element_get_text(self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001616 Py_INCREF(res);
1617 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001618 }
1619
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001620 /* methods */
1621 res = PyObject_GenericGetAttr((PyObject*) self, nameobj);
1622 if (res)
1623 return res;
1624
1625 /* less common attributes */
1626 if (strcmp(name, "tail") == 0) {
1627 PyErr_Clear();
1628 res = element_get_tail(self);
1629 } else if (strcmp(name, "attrib") == 0) {
1630 PyErr_Clear();
1631 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001632 create_extra(self, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001633 res = element_get_attrib(self);
1634 }
1635
1636 if (!res)
1637 return NULL;
1638
1639 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001640 return res;
1641}
1642
Eli Benderskyb20df952012-05-20 06:33:29 +03001643static PyObject*
1644element_setattro(ElementObject* self, PyObject* nameobj, PyObject* value)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001645{
Eli Benderskyb20df952012-05-20 06:33:29 +03001646 char *name = "";
1647 if (PyUnicode_Check(nameobj))
1648 name = _PyUnicode_AsString(nameobj);
1649
1650 if (name == NULL)
1651 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001652
1653 if (strcmp(name, "tag") == 0) {
1654 Py_DECREF(self->tag);
1655 self->tag = value;
1656 Py_INCREF(self->tag);
1657 } else if (strcmp(name, "text") == 0) {
1658 Py_DECREF(JOIN_OBJ(self->text));
1659 self->text = value;
1660 Py_INCREF(self->text);
1661 } else if (strcmp(name, "tail") == 0) {
1662 Py_DECREF(JOIN_OBJ(self->tail));
1663 self->tail = value;
1664 Py_INCREF(self->tail);
1665 } else if (strcmp(name, "attrib") == 0) {
1666 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001667 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001668 Py_DECREF(self->extra->attrib);
1669 self->extra->attrib = value;
1670 Py_INCREF(self->extra->attrib);
1671 } else {
1672 PyErr_SetString(PyExc_AttributeError, name);
Eli Benderskyb20df952012-05-20 06:33:29 +03001673 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001674 }
1675
Eli Benderskyb20df952012-05-20 06:33:29 +03001676 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001677}
1678
1679static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001680 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001681 0, /* sq_concat */
1682 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001683 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001684 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001685 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001686 0,
1687};
1688
1689static PyMappingMethods element_as_mapping = {
1690 (lenfunc) element_length,
1691 (binaryfunc) element_subscr,
1692 (objobjargproc) element_ass_subscr,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001693};
1694
Neal Norwitz227b5332006-03-22 09:28:35 +00001695static PyTypeObject Element_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001696 PyVarObject_HEAD_INIT(NULL, 0)
1697 "Element", sizeof(ElementObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001698 /* methods */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001699 (destructor)element_dealloc, /* tp_dealloc */
1700 0, /* tp_print */
1701 0, /* tp_getattr */
Eli Benderskyb20df952012-05-20 06:33:29 +03001702 0, /* tp_setattr */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001703 0, /* tp_reserved */
1704 (reprfunc)element_repr, /* tp_repr */
1705 0, /* tp_as_number */
1706 &element_as_sequence, /* tp_as_sequence */
1707 &element_as_mapping, /* tp_as_mapping */
1708 0, /* tp_hash */
1709 0, /* tp_call */
1710 0, /* tp_str */
1711 (getattrofunc)element_getattro, /* tp_getattro */
Eli Benderskyb20df952012-05-20 06:33:29 +03001712 (setattrofunc)element_setattro, /* tp_setattro */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001713 0, /* tp_as_buffer */
Eli Bendersky0192ba32012-03-30 16:38:33 +03001714 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
1715 /* tp_flags */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001716 0, /* tp_doc */
Eli Bendersky0192ba32012-03-30 16:38:33 +03001717 (traverseproc)element_gc_traverse, /* tp_traverse */
1718 (inquiry)element_gc_clear, /* tp_clear */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001719 0, /* tp_richcompare */
Eli Benderskyebf37a22012-04-03 22:02:37 +03001720 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001721 0, /* tp_iter */
1722 0, /* tp_iternext */
1723 element_methods, /* tp_methods */
1724 0, /* tp_members */
1725 0, /* tp_getset */
1726 0, /* tp_base */
1727 0, /* tp_dict */
1728 0, /* tp_descr_get */
1729 0, /* tp_descr_set */
1730 0, /* tp_dictoffset */
1731 (initproc)element_init, /* tp_init */
1732 PyType_GenericAlloc, /* tp_alloc */
1733 element_new, /* tp_new */
1734 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001735};
1736
1737/* ==================================================================== */
1738/* the tree builder type */
1739
1740typedef struct {
1741 PyObject_HEAD
1742
1743 PyObject* root; /* root node (first created node) */
1744
1745 ElementObject* this; /* current node */
1746 ElementObject* last; /* most recently created node */
1747
1748 PyObject* data; /* data collector (string or list), or NULL */
1749
1750 PyObject* stack; /* element stack */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001751 Py_ssize_t index; /* current stack size (0=empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001752
1753 /* element tracing */
1754 PyObject* events; /* list of events, or NULL if not collecting */
1755 PyObject* start_event_obj; /* event objects (NULL to ignore) */
1756 PyObject* end_event_obj;
1757 PyObject* start_ns_event_obj;
1758 PyObject* end_ns_event_obj;
1759
1760} TreeBuilderObject;
1761
Neal Norwitz227b5332006-03-22 09:28:35 +00001762static PyTypeObject TreeBuilder_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001763
Christian Heimes90aa7642007-12-19 02:45:37 +00001764#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001765
1766/* -------------------------------------------------------------------- */
1767/* constructor and destructor */
1768
1769LOCAL(PyObject*)
1770treebuilder_new(void)
1771{
1772 TreeBuilderObject* self;
1773
1774 self = PyObject_New(TreeBuilderObject, &TreeBuilder_Type);
1775 if (self == NULL)
1776 return NULL;
1777
1778 self->root = NULL;
1779
1780 Py_INCREF(Py_None);
1781 self->this = (ElementObject*) Py_None;
1782
1783 Py_INCREF(Py_None);
1784 self->last = (ElementObject*) Py_None;
1785
1786 self->data = NULL;
1787
1788 self->stack = PyList_New(20);
1789 self->index = 0;
1790
1791 self->events = NULL;
1792 self->start_event_obj = self->end_event_obj = NULL;
1793 self->start_ns_event_obj = self->end_ns_event_obj = NULL;
1794
1795 ALLOC(sizeof(TreeBuilderObject), "create treebuilder");
1796
1797 return (PyObject*) self;
1798}
1799
1800static PyObject*
Thomas Wouters73e5a5b2006-06-08 15:35:45 +00001801treebuilder(PyObject* self_, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001802{
1803 if (!PyArg_ParseTuple(args, ":TreeBuilder"))
1804 return NULL;
1805
1806 return treebuilder_new();
1807}
1808
1809static void
1810treebuilder_dealloc(TreeBuilderObject* self)
1811{
1812 Py_XDECREF(self->end_ns_event_obj);
1813 Py_XDECREF(self->start_ns_event_obj);
1814 Py_XDECREF(self->end_event_obj);
1815 Py_XDECREF(self->start_event_obj);
1816 Py_XDECREF(self->events);
1817 Py_DECREF(self->stack);
1818 Py_XDECREF(self->data);
1819 Py_DECREF(self->last);
1820 Py_DECREF(self->this);
1821 Py_XDECREF(self->root);
1822
1823 RELEASE(sizeof(TreeBuilderObject), "destroy treebuilder");
1824
1825 PyObject_Del(self);
1826}
1827
1828/* -------------------------------------------------------------------- */
1829/* handlers */
1830
1831LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001832treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
1833 PyObject* attrib)
1834{
1835 PyObject* node;
1836 PyObject* this;
1837
1838 if (self->data) {
1839 if (self->this == self->last) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001840 Py_DECREF(JOIN_OBJ(self->last->text));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001841 self->last->text = JOIN_SET(
1842 self->data, PyList_CheckExact(self->data)
1843 );
1844 } else {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001845 Py_DECREF(JOIN_OBJ(self->last->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001846 self->last->tail = JOIN_SET(
1847 self->data, PyList_CheckExact(self->data)
1848 );
1849 }
1850 self->data = NULL;
1851 }
1852
Eli Bendersky092af1f2012-03-04 07:14:03 +02001853 node = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001854 if (!node)
1855 return NULL;
1856
1857 this = (PyObject*) self->this;
1858
1859 if (this != Py_None) {
1860 if (element_add_subelement((ElementObject*) this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001861 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001862 } else {
1863 if (self->root) {
1864 PyErr_SetString(
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001865 elementtree_parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001866 "multiple elements on top level"
1867 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001868 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001869 }
1870 Py_INCREF(node);
1871 self->root = node;
1872 }
1873
1874 if (self->index < PyList_GET_SIZE(self->stack)) {
1875 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001876 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001877 Py_INCREF(this);
1878 } else {
1879 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001880 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001881 }
1882 self->index++;
1883
1884 Py_DECREF(this);
1885 Py_INCREF(node);
1886 self->this = (ElementObject*) node;
1887
1888 Py_DECREF(self->last);
1889 Py_INCREF(node);
1890 self->last = (ElementObject*) node;
1891
1892 if (self->start_event_obj) {
1893 PyObject* res;
1894 PyObject* action = self->start_event_obj;
1895 res = PyTuple_New(2);
1896 if (res) {
1897 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
1898 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
1899 PyList_Append(self->events, res);
1900 Py_DECREF(res);
1901 } else
1902 PyErr_Clear(); /* FIXME: propagate error */
1903 }
1904
1905 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001906
1907 error:
1908 Py_DECREF(node);
1909 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001910}
1911
1912LOCAL(PyObject*)
1913treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
1914{
1915 if (!self->data) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001916 if (self->last == (ElementObject*) Py_None) {
1917 /* ignore calls to data before the first call to start */
1918 Py_RETURN_NONE;
1919 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001920 /* store the first item as is */
1921 Py_INCREF(data); self->data = data;
1922 } else {
1923 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00001924 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
1925 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001926 /* expat often generates single character data sections; handle
1927 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00001928 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
1929 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001930 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00001931 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001932 } else if (PyList_CheckExact(self->data)) {
1933 if (PyList_Append(self->data, data) < 0)
1934 return NULL;
1935 } else {
1936 PyObject* list = PyList_New(2);
1937 if (!list)
1938 return NULL;
1939 PyList_SET_ITEM(list, 0, self->data);
1940 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
1941 self->data = list;
1942 }
1943 }
1944
1945 Py_RETURN_NONE;
1946}
1947
1948LOCAL(PyObject*)
1949treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
1950{
1951 PyObject* item;
1952
1953 if (self->data) {
1954 if (self->this == self->last) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001955 Py_DECREF(JOIN_OBJ(self->last->text));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001956 self->last->text = JOIN_SET(
1957 self->data, PyList_CheckExact(self->data)
1958 );
1959 } else {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001960 Py_DECREF(JOIN_OBJ(self->last->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001961 self->last->tail = JOIN_SET(
1962 self->data, PyList_CheckExact(self->data)
1963 );
1964 }
1965 self->data = NULL;
1966 }
1967
1968 if (self->index == 0) {
1969 PyErr_SetString(
1970 PyExc_IndexError,
1971 "pop from empty stack"
1972 );
1973 return NULL;
1974 }
1975
1976 self->index--;
1977
1978 item = PyList_GET_ITEM(self->stack, self->index);
1979 Py_INCREF(item);
1980
1981 Py_DECREF(self->last);
1982
1983 self->last = (ElementObject*) self->this;
1984 self->this = (ElementObject*) item;
1985
1986 if (self->end_event_obj) {
1987 PyObject* res;
1988 PyObject* action = self->end_event_obj;
1989 PyObject* node = (PyObject*) self->last;
1990 res = PyTuple_New(2);
1991 if (res) {
1992 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
1993 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
1994 PyList_Append(self->events, res);
1995 Py_DECREF(res);
1996 } else
1997 PyErr_Clear(); /* FIXME: propagate error */
1998 }
1999
2000 Py_INCREF(self->last);
2001 return (PyObject*) self->last;
2002}
2003
2004LOCAL(void)
2005treebuilder_handle_namespace(TreeBuilderObject* self, int start,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002006 PyObject *prefix, PyObject *uri)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002007{
2008 PyObject* res;
2009 PyObject* action;
2010 PyObject* parcel;
2011
2012 if (!self->events)
2013 return;
2014
2015 if (start) {
2016 if (!self->start_ns_event_obj)
2017 return;
2018 action = self->start_ns_event_obj;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002019 parcel = Py_BuildValue("OO", prefix, uri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002020 if (!parcel)
2021 return;
2022 Py_INCREF(action);
2023 } else {
2024 if (!self->end_ns_event_obj)
2025 return;
2026 action = self->end_ns_event_obj;
2027 Py_INCREF(action);
2028 parcel = Py_None;
2029 Py_INCREF(parcel);
2030 }
2031
2032 res = PyTuple_New(2);
2033
2034 if (res) {
2035 PyTuple_SET_ITEM(res, 0, action);
2036 PyTuple_SET_ITEM(res, 1, parcel);
2037 PyList_Append(self->events, res);
2038 Py_DECREF(res);
2039 } else
2040 PyErr_Clear(); /* FIXME: propagate error */
2041}
2042
2043/* -------------------------------------------------------------------- */
2044/* methods (in alphabetical order) */
2045
2046static PyObject*
2047treebuilder_data(TreeBuilderObject* self, PyObject* args)
2048{
2049 PyObject* data;
2050 if (!PyArg_ParseTuple(args, "O:data", &data))
2051 return NULL;
2052
2053 return treebuilder_handle_data(self, data);
2054}
2055
2056static PyObject*
2057treebuilder_end(TreeBuilderObject* self, PyObject* args)
2058{
2059 PyObject* tag;
2060 if (!PyArg_ParseTuple(args, "O:end", &tag))
2061 return NULL;
2062
2063 return treebuilder_handle_end(self, tag);
2064}
2065
2066LOCAL(PyObject*)
2067treebuilder_done(TreeBuilderObject* self)
2068{
2069 PyObject* res;
2070
2071 /* FIXME: check stack size? */
2072
2073 if (self->root)
2074 res = self->root;
2075 else
2076 res = Py_None;
2077
2078 Py_INCREF(res);
2079 return res;
2080}
2081
2082static PyObject*
2083treebuilder_close(TreeBuilderObject* self, PyObject* args)
2084{
2085 if (!PyArg_ParseTuple(args, ":close"))
2086 return NULL;
2087
2088 return treebuilder_done(self);
2089}
2090
2091static PyObject*
2092treebuilder_start(TreeBuilderObject* self, PyObject* args)
2093{
2094 PyObject* tag;
2095 PyObject* attrib = Py_None;
2096 if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib))
2097 return NULL;
2098
2099 return treebuilder_handle_start(self, tag, attrib);
2100}
2101
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002102static PyMethodDef treebuilder_methods[] = {
2103 {"data", (PyCFunction) treebuilder_data, METH_VARARGS},
2104 {"start", (PyCFunction) treebuilder_start, METH_VARARGS},
2105 {"end", (PyCFunction) treebuilder_end, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002106 {"close", (PyCFunction) treebuilder_close, METH_VARARGS},
2107 {NULL, NULL}
2108};
2109
Neal Norwitz227b5332006-03-22 09:28:35 +00002110static PyTypeObject TreeBuilder_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002111 PyVarObject_HEAD_INIT(NULL, 0)
2112 "TreeBuilder", sizeof(TreeBuilderObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002113 /* methods */
2114 (destructor)treebuilder_dealloc, /* tp_dealloc */
2115 0, /* tp_print */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002116 0, /* tp_getattr */
2117 0, /* tp_setattr */
Mark Dickinsone94c6792009-02-02 20:36:42 +00002118 0, /* tp_reserved */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002119 0, /* tp_repr */
2120 0, /* tp_as_number */
2121 0, /* tp_as_sequence */
2122 0, /* tp_as_mapping */
2123 0, /* tp_hash */
2124 0, /* tp_call */
2125 0, /* tp_str */
2126 0, /* tp_getattro */
2127 0, /* tp_setattro */
2128 0, /* tp_as_buffer */
2129 Py_TPFLAGS_DEFAULT, /* tp_flags */
2130 0, /* tp_doc */
2131 0, /* tp_traverse */
2132 0, /* tp_clear */
2133 0, /* tp_richcompare */
2134 0, /* tp_weaklistoffset */
2135 0, /* tp_iter */
2136 0, /* tp_iternext */
2137 treebuilder_methods, /* tp_methods */
2138 0, /* tp_members */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002139};
2140
2141/* ==================================================================== */
2142/* the expat interface */
2143
2144#if defined(USE_EXPAT)
2145
2146#include "expat.h"
2147
2148#if defined(USE_PYEXPAT_CAPI)
2149#include "pyexpat.h"
2150static struct PyExpat_CAPI* expat_capi;
2151#define EXPAT(func) (expat_capi->func)
2152#else
2153#define EXPAT(func) (XML_##func)
2154#endif
2155
2156typedef struct {
2157 PyObject_HEAD
2158
2159 XML_Parser parser;
2160
2161 PyObject* target;
2162 PyObject* entity;
2163
2164 PyObject* names;
2165
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002166 PyObject* handle_start;
2167 PyObject* handle_data;
2168 PyObject* handle_end;
2169
2170 PyObject* handle_comment;
2171 PyObject* handle_pi;
2172
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002173 PyObject* handle_close;
2174
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002175} XMLParserObject;
2176
Neal Norwitz227b5332006-03-22 09:28:35 +00002177static PyTypeObject XMLParser_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002178
2179/* helpers */
2180
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002181LOCAL(PyObject*)
2182makeuniversal(XMLParserObject* self, const char* string)
2183{
2184 /* convert a UTF-8 tag/attribute name from the expat parser
2185 to a universal name string */
2186
2187 int size = strlen(string);
2188 PyObject* key;
2189 PyObject* value;
2190
2191 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00002192 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002193 if (!key)
2194 return NULL;
2195
2196 value = PyDict_GetItem(self->names, key);
2197
2198 if (value) {
2199 Py_INCREF(value);
2200 } else {
2201 /* new name. convert to universal name, and decode as
2202 necessary */
2203
2204 PyObject* tag;
2205 char* p;
2206 int i;
2207
2208 /* look for namespace separator */
2209 for (i = 0; i < size; i++)
2210 if (string[i] == '}')
2211 break;
2212 if (i != size) {
2213 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002214 tag = PyBytes_FromStringAndSize(NULL, size+1);
2215 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002216 p[0] = '{';
2217 memcpy(p+1, string, size);
2218 size++;
2219 } else {
2220 /* plain name; use key as tag */
2221 Py_INCREF(key);
2222 tag = key;
2223 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002224
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002225 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002226 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00002227 value = PyUnicode_DecodeUTF8(p, size, "strict");
2228 Py_DECREF(tag);
2229 if (!value) {
2230 Py_DECREF(key);
2231 return NULL;
2232 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002233
2234 /* add to names dictionary */
2235 if (PyDict_SetItem(self->names, key, value) < 0) {
2236 Py_DECREF(key);
2237 Py_DECREF(value);
2238 return NULL;
2239 }
2240 }
2241
2242 Py_DECREF(key);
2243 return value;
2244}
2245
Eli Bendersky5b77d812012-03-16 08:20:05 +02002246/* Set the ParseError exception with the given parameters.
2247 * If message is not NULL, it's used as the error string. Otherwise, the
2248 * message string is the default for the given error_code.
2249*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002250static void
Eli Bendersky5b77d812012-03-16 08:20:05 +02002251expat_set_error(enum XML_Error error_code, int line, int column, char *message)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002252{
Eli Bendersky5b77d812012-03-16 08:20:05 +02002253 PyObject *errmsg, *error, *position, *code;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002254
Victor Stinner499dfcf2011-03-21 13:26:24 +01002255 errmsg = PyUnicode_FromFormat("%s: line %d, column %d",
Eli Bendersky5b77d812012-03-16 08:20:05 +02002256 message ? message : EXPAT(ErrorString)(error_code),
2257 line, column);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002258 if (errmsg == NULL)
2259 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002260
Victor Stinner499dfcf2011-03-21 13:26:24 +01002261 error = PyObject_CallFunction(elementtree_parseerror_obj, "O", errmsg);
2262 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002263 if (!error)
2264 return;
2265
Eli Bendersky5b77d812012-03-16 08:20:05 +02002266 /* Add code and position attributes */
2267 code = PyLong_FromLong((long)error_code);
2268 if (!code) {
2269 Py_DECREF(error);
2270 return;
2271 }
2272 if (PyObject_SetAttrString(error, "code", code) == -1) {
2273 Py_DECREF(error);
2274 Py_DECREF(code);
2275 return;
2276 }
2277 Py_DECREF(code);
2278
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002279 position = Py_BuildValue("(ii)", line, column);
2280 if (!position) {
2281 Py_DECREF(error);
2282 return;
2283 }
2284 if (PyObject_SetAttrString(error, "position", position) == -1) {
2285 Py_DECREF(error);
2286 Py_DECREF(position);
2287 return;
2288 }
2289 Py_DECREF(position);
2290
2291 PyErr_SetObject(elementtree_parseerror_obj, error);
2292 Py_DECREF(error);
2293}
2294
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002295/* -------------------------------------------------------------------- */
2296/* handlers */
2297
2298static void
2299expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2300 int data_len)
2301{
2302 PyObject* key;
2303 PyObject* value;
2304 PyObject* res;
2305
2306 if (data_len < 2 || data_in[0] != '&')
2307 return;
2308
Neal Norwitz0269b912007-08-08 06:56:02 +00002309 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002310 if (!key)
2311 return;
2312
2313 value = PyDict_GetItem(self->entity, key);
2314
2315 if (value) {
2316 if (TreeBuilder_CheckExact(self->target))
2317 res = treebuilder_handle_data(
2318 (TreeBuilderObject*) self->target, value
2319 );
2320 else if (self->handle_data)
2321 res = PyObject_CallFunction(self->handle_data, "O", value);
2322 else
2323 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002324 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002325 } else if (!PyErr_Occurred()) {
2326 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00002327 char message[128] = "undefined entity ";
2328 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002329 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02002330 XML_ERROR_UNDEFINED_ENTITY,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002331 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02002332 EXPAT(GetErrorColumnNumber)(self->parser),
2333 message
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002334 );
2335 }
2336
2337 Py_DECREF(key);
2338}
2339
2340static void
2341expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2342 const XML_Char **attrib_in)
2343{
2344 PyObject* res;
2345 PyObject* tag;
2346 PyObject* attrib;
2347 int ok;
2348
2349 /* tag name */
2350 tag = makeuniversal(self, tag_in);
2351 if (!tag)
2352 return; /* parser will look for errors */
2353
2354 /* attributes */
2355 if (attrib_in[0]) {
2356 attrib = PyDict_New();
2357 if (!attrib)
2358 return;
2359 while (attrib_in[0] && attrib_in[1]) {
2360 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00002361 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002362 if (!key || !value) {
2363 Py_XDECREF(value);
2364 Py_XDECREF(key);
2365 Py_DECREF(attrib);
2366 return;
2367 }
2368 ok = PyDict_SetItem(attrib, key, value);
2369 Py_DECREF(value);
2370 Py_DECREF(key);
2371 if (ok < 0) {
2372 Py_DECREF(attrib);
2373 return;
2374 }
2375 attrib_in += 2;
2376 }
2377 } else {
2378 Py_INCREF(Py_None);
2379 attrib = Py_None;
2380 }
2381
2382 if (TreeBuilder_CheckExact(self->target))
2383 /* shortcut */
2384 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2385 tag, attrib);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002386 else if (self->handle_start) {
2387 if (attrib == Py_None) {
2388 Py_DECREF(attrib);
2389 attrib = PyDict_New();
2390 if (!attrib)
2391 return;
2392 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002393 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002394 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002395 res = NULL;
2396
2397 Py_DECREF(tag);
2398 Py_DECREF(attrib);
2399
2400 Py_XDECREF(res);
2401}
2402
2403static void
2404expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2405 int data_len)
2406{
2407 PyObject* data;
2408 PyObject* res;
2409
Neal Norwitz0269b912007-08-08 06:56:02 +00002410 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002411 if (!data)
2412 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002413
2414 if (TreeBuilder_CheckExact(self->target))
2415 /* shortcut */
2416 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
2417 else if (self->handle_data)
2418 res = PyObject_CallFunction(self->handle_data, "O", data);
2419 else
2420 res = NULL;
2421
2422 Py_DECREF(data);
2423
2424 Py_XDECREF(res);
2425}
2426
2427static void
2428expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
2429{
2430 PyObject* tag;
2431 PyObject* res = NULL;
2432
2433 if (TreeBuilder_CheckExact(self->target))
2434 /* shortcut */
2435 /* the standard tree builder doesn't look at the end tag */
2436 res = treebuilder_handle_end(
2437 (TreeBuilderObject*) self->target, Py_None
2438 );
2439 else if (self->handle_end) {
2440 tag = makeuniversal(self, tag_in);
2441 if (tag) {
2442 res = PyObject_CallFunction(self->handle_end, "O", tag);
2443 Py_DECREF(tag);
2444 }
2445 }
2446
2447 Py_XDECREF(res);
2448}
2449
2450static void
2451expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
2452 const XML_Char *uri)
2453{
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002454 PyObject* sprefix = NULL;
2455 PyObject* suri = NULL;
2456
2457 suri = PyUnicode_DecodeUTF8(uri, strlen(uri), "strict");
2458 if (!suri)
2459 return;
2460
2461 if (prefix)
2462 sprefix = PyUnicode_DecodeUTF8(prefix, strlen(prefix), "strict");
2463 else
2464 sprefix = PyUnicode_FromString("");
2465 if (!sprefix) {
2466 Py_DECREF(suri);
2467 return;
2468 }
2469
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002470 treebuilder_handle_namespace(
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002471 (TreeBuilderObject*) self->target, 1, sprefix, suri
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002472 );
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002473
2474 Py_DECREF(sprefix);
2475 Py_DECREF(suri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002476}
2477
2478static void
2479expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
2480{
2481 treebuilder_handle_namespace(
2482 (TreeBuilderObject*) self->target, 0, NULL, NULL
2483 );
2484}
2485
2486static void
2487expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
2488{
2489 PyObject* comment;
2490 PyObject* res;
2491
2492 if (self->handle_comment) {
Neal Norwitz0269b912007-08-08 06:56:02 +00002493 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002494 if (comment) {
2495 res = PyObject_CallFunction(self->handle_comment, "O", comment);
2496 Py_XDECREF(res);
2497 Py_DECREF(comment);
2498 }
2499 }
2500}
2501
2502static void
2503expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
2504 const XML_Char* data_in)
2505{
2506 PyObject* target;
2507 PyObject* data;
2508 PyObject* res;
2509
2510 if (self->handle_pi) {
Neal Norwitz0269b912007-08-08 06:56:02 +00002511 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
2512 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002513 if (target && data) {
2514 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
2515 Py_XDECREF(res);
2516 Py_DECREF(data);
2517 Py_DECREF(target);
2518 } else {
2519 Py_XDECREF(data);
2520 Py_XDECREF(target);
2521 }
2522 }
2523}
2524
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002525static int
2526expat_unknown_encoding_handler(XMLParserObject *self, const XML_Char *name,
2527 XML_Encoding *info)
2528{
2529 PyObject* u;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002530 unsigned char s[256];
2531 int i;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002532 void *data;
2533 unsigned int kind;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002534
2535 memset(info, 0, sizeof(XML_Encoding));
2536
2537 for (i = 0; i < 256; i++)
2538 s[i] = i;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002539
Fredrik Lundhc3389992005-12-25 11:40:19 +00002540 u = PyUnicode_Decode((char*) s, 256, name, "replace");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002541 if (!u)
2542 return XML_STATUS_ERROR;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002543 if (PyUnicode_READY(u))
2544 return XML_STATUS_ERROR;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002545
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002546 if (PyUnicode_GET_LENGTH(u) != 256) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002547 Py_DECREF(u);
2548 return XML_STATUS_ERROR;
2549 }
2550
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002551 kind = PyUnicode_KIND(u);
2552 data = PyUnicode_DATA(u);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002553 for (i = 0; i < 256; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002554 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
2555 if (ch != Py_UNICODE_REPLACEMENT_CHARACTER)
2556 info->map[i] = ch;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002557 else
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002558 info->map[i] = -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002559 }
2560
2561 Py_DECREF(u);
2562
2563 return XML_STATUS_OK;
2564}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002565
2566/* -------------------------------------------------------------------- */
2567/* constructor and destructor */
2568
2569static PyObject*
Thomas Wouters73e5a5b2006-06-08 15:35:45 +00002570xmlparser(PyObject* self_, PyObject* args, PyObject* kw)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002571{
2572 XMLParserObject* self;
2573 /* FIXME: does this need to be static? */
2574 static XML_Memory_Handling_Suite memory_handler;
2575
2576 PyObject* target = NULL;
2577 char* encoding = NULL;
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +00002578 static char* kwlist[] = { "target", "encoding", NULL };
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002579 if (!PyArg_ParseTupleAndKeywords(args, kw, "|Oz:XMLParser", kwlist,
2580 &target, &encoding))
2581 return NULL;
2582
2583#if defined(USE_PYEXPAT_CAPI)
2584 if (!expat_capi) {
2585 PyErr_SetString(
2586 PyExc_RuntimeError, "cannot load dispatch table from pyexpat"
2587 );
2588 return NULL;
2589 }
2590#endif
2591
2592 self = PyObject_New(XMLParserObject, &XMLParser_Type);
2593 if (self == NULL)
2594 return NULL;
2595
2596 self->entity = PyDict_New();
2597 if (!self->entity) {
2598 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002599 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002600 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002601
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002602 self->names = PyDict_New();
2603 if (!self->names) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002604 PyObject_Del(self->entity);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002605 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002606 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002607 }
2608
2609 memory_handler.malloc_fcn = PyObject_Malloc;
2610 memory_handler.realloc_fcn = PyObject_Realloc;
2611 memory_handler.free_fcn = PyObject_Free;
2612
2613 self->parser = EXPAT(ParserCreate_MM)(encoding, &memory_handler, "}");
2614 if (!self->parser) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002615 PyObject_Del(self->names);
2616 PyObject_Del(self->entity);
2617 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002618 PyErr_NoMemory();
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002619 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002620 }
2621
2622 /* setup target handlers */
2623 if (!target) {
2624 target = treebuilder_new();
2625 if (!target) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002626 EXPAT(ParserFree)(self->parser);
2627 PyObject_Del(self->names);
2628 PyObject_Del(self->entity);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002629 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002630 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002631 }
2632 } else
2633 Py_INCREF(target);
2634 self->target = target;
2635
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002636 self->handle_start = PyObject_GetAttrString(target, "start");
2637 self->handle_data = PyObject_GetAttrString(target, "data");
2638 self->handle_end = PyObject_GetAttrString(target, "end");
2639 self->handle_comment = PyObject_GetAttrString(target, "comment");
2640 self->handle_pi = PyObject_GetAttrString(target, "pi");
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002641 self->handle_close = PyObject_GetAttrString(target, "close");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002642
2643 PyErr_Clear();
2644
2645 /* configure parser */
2646 EXPAT(SetUserData)(self->parser, self);
2647 EXPAT(SetElementHandler)(
2648 self->parser,
2649 (XML_StartElementHandler) expat_start_handler,
2650 (XML_EndElementHandler) expat_end_handler
2651 );
2652 EXPAT(SetDefaultHandlerExpand)(
2653 self->parser,
2654 (XML_DefaultHandler) expat_default_handler
2655 );
2656 EXPAT(SetCharacterDataHandler)(
2657 self->parser,
2658 (XML_CharacterDataHandler) expat_data_handler
2659 );
2660 if (self->handle_comment)
2661 EXPAT(SetCommentHandler)(
2662 self->parser,
2663 (XML_CommentHandler) expat_comment_handler
2664 );
2665 if (self->handle_pi)
2666 EXPAT(SetProcessingInstructionHandler)(
2667 self->parser,
2668 (XML_ProcessingInstructionHandler) expat_pi_handler
2669 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002670 EXPAT(SetUnknownEncodingHandler)(
2671 self->parser,
2672 (XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL
2673 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002674
2675 ALLOC(sizeof(XMLParserObject), "create expatparser");
2676
2677 return (PyObject*) self;
2678}
2679
2680static void
2681xmlparser_dealloc(XMLParserObject* self)
2682{
2683 EXPAT(ParserFree)(self->parser);
2684
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002685 Py_XDECREF(self->handle_close);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002686 Py_XDECREF(self->handle_pi);
2687 Py_XDECREF(self->handle_comment);
2688 Py_XDECREF(self->handle_end);
2689 Py_XDECREF(self->handle_data);
2690 Py_XDECREF(self->handle_start);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002691
2692 Py_DECREF(self->target);
2693 Py_DECREF(self->entity);
2694 Py_DECREF(self->names);
2695
2696 RELEASE(sizeof(XMLParserObject), "destroy expatparser");
2697
2698 PyObject_Del(self);
2699}
2700
2701/* -------------------------------------------------------------------- */
2702/* methods (in alphabetical order) */
2703
2704LOCAL(PyObject*)
2705expat_parse(XMLParserObject* self, char* data, int data_len, int final)
2706{
2707 int ok;
2708
2709 ok = EXPAT(Parse)(self->parser, data, data_len, final);
2710
2711 if (PyErr_Occurred())
2712 return NULL;
2713
2714 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002715 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02002716 EXPAT(GetErrorCode)(self->parser),
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002717 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02002718 EXPAT(GetErrorColumnNumber)(self->parser),
2719 NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002720 );
2721 return NULL;
2722 }
2723
2724 Py_RETURN_NONE;
2725}
2726
2727static PyObject*
2728xmlparser_close(XMLParserObject* self, PyObject* args)
2729{
2730 /* end feeding data to parser */
2731
2732 PyObject* res;
2733 if (!PyArg_ParseTuple(args, ":close"))
2734 return NULL;
2735
2736 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002737 if (!res)
2738 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002739
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002740 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002741 Py_DECREF(res);
2742 return treebuilder_done((TreeBuilderObject*) self->target);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002743 } if (self->handle_close) {
2744 Py_DECREF(res);
2745 return PyObject_CallFunction(self->handle_close, "");
2746 } else
2747 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002748}
2749
2750static PyObject*
2751xmlparser_feed(XMLParserObject* self, PyObject* args)
2752{
2753 /* feed data to parser */
2754
2755 char* data;
2756 int data_len;
2757 if (!PyArg_ParseTuple(args, "s#:feed", &data, &data_len))
2758 return NULL;
2759
2760 return expat_parse(self, data, data_len, 0);
2761}
2762
2763static PyObject*
2764xmlparser_parse(XMLParserObject* self, PyObject* args)
2765{
2766 /* (internal) parse until end of input stream */
2767
2768 PyObject* reader;
2769 PyObject* buffer;
Eli Benderskyf996e772012-03-16 05:53:30 +02002770 PyObject* temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002771 PyObject* res;
2772
2773 PyObject* fileobj;
2774 if (!PyArg_ParseTuple(args, "O:_parse", &fileobj))
2775 return NULL;
2776
2777 reader = PyObject_GetAttrString(fileobj, "read");
2778 if (!reader)
2779 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002780
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002781 /* read from open file object */
2782 for (;;) {
2783
2784 buffer = PyObject_CallFunction(reader, "i", 64*1024);
2785
2786 if (!buffer) {
2787 /* read failed (e.g. due to KeyboardInterrupt) */
2788 Py_DECREF(reader);
2789 return NULL;
2790 }
2791
Eli Benderskyf996e772012-03-16 05:53:30 +02002792 if (PyUnicode_CheckExact(buffer)) {
2793 /* A unicode object is encoded into bytes using UTF-8 */
2794 if (PyUnicode_GET_SIZE(buffer) == 0) {
2795 Py_DECREF(buffer);
2796 break;
2797 }
2798 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
2799 if (!temp) {
2800 /* Propagate exception from PyUnicode_AsEncodedString */
2801 Py_DECREF(buffer);
2802 Py_DECREF(reader);
2803 return NULL;
2804 }
2805
2806 /* Here we no longer need the original buffer since it contains
2807 * unicode. Make it point to the encoded bytes object.
2808 */
2809 Py_DECREF(buffer);
2810 buffer = temp;
2811 }
2812 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002813 Py_DECREF(buffer);
2814 break;
2815 }
2816
2817 res = expat_parse(
Christian Heimes72b710a2008-05-26 13:28:38 +00002818 self, PyBytes_AS_STRING(buffer), PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002819 );
2820
2821 Py_DECREF(buffer);
2822
2823 if (!res) {
2824 Py_DECREF(reader);
2825 return NULL;
2826 }
2827 Py_DECREF(res);
2828
2829 }
2830
2831 Py_DECREF(reader);
2832
2833 res = expat_parse(self, "", 0, 1);
2834
2835 if (res && TreeBuilder_CheckExact(self->target)) {
2836 Py_DECREF(res);
2837 return treebuilder_done((TreeBuilderObject*) self->target);
2838 }
2839
2840 return res;
2841}
2842
2843static PyObject*
2844xmlparser_setevents(XMLParserObject* self, PyObject* args)
2845{
2846 /* activate element event reporting */
2847
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002848 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002849 TreeBuilderObject* target;
2850
2851 PyObject* events; /* event collector */
2852 PyObject* event_set = Py_None;
2853 if (!PyArg_ParseTuple(args, "O!|O:_setevents", &PyList_Type, &events,
2854 &event_set))
2855 return NULL;
2856
2857 if (!TreeBuilder_CheckExact(self->target)) {
2858 PyErr_SetString(
2859 PyExc_TypeError,
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01002860 "event handling only supported for ElementTree.TreeBuilder "
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002861 "targets"
2862 );
2863 return NULL;
2864 }
2865
2866 target = (TreeBuilderObject*) self->target;
2867
2868 Py_INCREF(events);
2869 Py_XDECREF(target->events);
2870 target->events = events;
2871
2872 /* clear out existing events */
2873 Py_XDECREF(target->start_event_obj); target->start_event_obj = NULL;
2874 Py_XDECREF(target->end_event_obj); target->end_event_obj = NULL;
2875 Py_XDECREF(target->start_ns_event_obj); target->start_ns_event_obj = NULL;
2876 Py_XDECREF(target->end_ns_event_obj); target->end_ns_event_obj = NULL;
2877
2878 if (event_set == Py_None) {
2879 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002880 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002881 Py_RETURN_NONE;
2882 }
2883
2884 if (!PyTuple_Check(event_set)) /* FIXME: handle arbitrary sequences */
2885 goto error;
2886
2887 for (i = 0; i < PyTuple_GET_SIZE(event_set); i++) {
2888 PyObject* item = PyTuple_GET_ITEM(event_set, i);
2889 char* event;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002890 if (PyUnicode_Check(item)) {
2891 event = _PyUnicode_AsString(item);
Victor Stinner0477bf32010-03-22 12:11:44 +00002892 if (event == NULL)
2893 goto error;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002894 } else if (PyBytes_Check(item))
2895 event = PyBytes_AS_STRING(item);
2896 else {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002897 goto error;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002898 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002899 if (strcmp(event, "start") == 0) {
2900 Py_INCREF(item);
2901 target->start_event_obj = item;
2902 } else if (strcmp(event, "end") == 0) {
2903 Py_INCREF(item);
2904 Py_XDECREF(target->end_event_obj);
2905 target->end_event_obj = item;
2906 } else if (strcmp(event, "start-ns") == 0) {
2907 Py_INCREF(item);
2908 Py_XDECREF(target->start_ns_event_obj);
2909 target->start_ns_event_obj = item;
2910 EXPAT(SetNamespaceDeclHandler)(
2911 self->parser,
2912 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2913 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2914 );
2915 } else if (strcmp(event, "end-ns") == 0) {
2916 Py_INCREF(item);
2917 Py_XDECREF(target->end_ns_event_obj);
2918 target->end_ns_event_obj = item;
2919 EXPAT(SetNamespaceDeclHandler)(
2920 self->parser,
2921 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2922 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2923 );
2924 } else {
2925 PyErr_Format(
2926 PyExc_ValueError,
2927 "unknown event '%s'", event
2928 );
2929 return NULL;
2930 }
2931 }
2932
2933 Py_RETURN_NONE;
2934
2935 error:
2936 PyErr_SetString(
2937 PyExc_TypeError,
2938 "invalid event tuple"
2939 );
2940 return NULL;
2941}
2942
2943static PyMethodDef xmlparser_methods[] = {
2944 {"feed", (PyCFunction) xmlparser_feed, METH_VARARGS},
2945 {"close", (PyCFunction) xmlparser_close, METH_VARARGS},
2946 {"_parse", (PyCFunction) xmlparser_parse, METH_VARARGS},
2947 {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
2948 {NULL, NULL}
2949};
2950
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002951static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002952xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002953{
Alexander Belopolskye239d232010-12-08 23:31:48 +00002954 if (PyUnicode_Check(nameobj)) {
2955 PyObject* res;
2956 if (PyUnicode_CompareWithASCIIString(nameobj, "entity") == 0)
2957 res = self->entity;
2958 else if (PyUnicode_CompareWithASCIIString(nameobj, "target") == 0)
2959 res = self->target;
2960 else if (PyUnicode_CompareWithASCIIString(nameobj, "version") == 0) {
2961 return PyUnicode_FromFormat(
2962 "Expat %d.%d.%d", XML_MAJOR_VERSION,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002963 XML_MINOR_VERSION, XML_MICRO_VERSION);
Alexander Belopolskye239d232010-12-08 23:31:48 +00002964 }
2965 else
2966 goto generic;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002967
Alexander Belopolskye239d232010-12-08 23:31:48 +00002968 Py_INCREF(res);
2969 return res;
2970 }
2971 generic:
2972 return PyObject_GenericGetAttr((PyObject*) self, nameobj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002973}
2974
Neal Norwitz227b5332006-03-22 09:28:35 +00002975static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002976 PyVarObject_HEAD_INIT(NULL, 0)
2977 "XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002978 /* methods */
2979 (destructor)xmlparser_dealloc, /* tp_dealloc */
2980 0, /* tp_print */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002981 0, /* tp_getattr */
2982 0, /* tp_setattr */
Mark Dickinsone94c6792009-02-02 20:36:42 +00002983 0, /* tp_reserved */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002984 0, /* tp_repr */
2985 0, /* tp_as_number */
2986 0, /* tp_as_sequence */
2987 0, /* tp_as_mapping */
2988 0, /* tp_hash */
2989 0, /* tp_call */
2990 0, /* tp_str */
2991 (getattrofunc)xmlparser_getattro, /* tp_getattro */
2992 0, /* tp_setattro */
2993 0, /* tp_as_buffer */
2994 Py_TPFLAGS_DEFAULT, /* tp_flags */
2995 0, /* tp_doc */
2996 0, /* tp_traverse */
2997 0, /* tp_clear */
2998 0, /* tp_richcompare */
2999 0, /* tp_weaklistoffset */
3000 0, /* tp_iter */
3001 0, /* tp_iternext */
3002 xmlparser_methods, /* tp_methods */
3003 0, /* tp_members */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003004};
3005
3006#endif
3007
3008/* ==================================================================== */
3009/* python module interface */
3010
3011static PyMethodDef _functions[] = {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003012 {"SubElement", (PyCFunction) subelement, METH_VARARGS|METH_KEYWORDS},
3013 {"TreeBuilder", (PyCFunction) treebuilder, METH_VARARGS},
3014#if defined(USE_EXPAT)
3015 {"XMLParser", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003016#endif
3017 {NULL, NULL}
3018};
3019
Martin v. Löwis1a214512008-06-11 05:26:20 +00003020
3021static struct PyModuleDef _elementtreemodule = {
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003022 PyModuleDef_HEAD_INIT,
3023 "_elementtree",
3024 NULL,
3025 -1,
3026 _functions,
3027 NULL,
3028 NULL,
3029 NULL,
3030 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00003031};
3032
Neal Norwitzf6657e62006-12-28 04:47:50 +00003033PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00003034PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003035{
Eli Bendersky828efde2012-04-05 05:40:58 +03003036 PyObject *m, *g, *temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003037 char* bootstrap;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003038
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003039 /* Initialize object types */
3040 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003041 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003042 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003043 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003044#if defined(USE_EXPAT)
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003045 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003046 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003047#endif
3048
Martin v. Löwis1a214512008-06-11 05:26:20 +00003049 m = PyModule_Create(&_elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003050 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00003051 return NULL;
3052
3053 /* The code below requires that the module gets already added
3054 to sys.modules. */
3055 PyDict_SetItemString(PyImport_GetModuleDict(),
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003056 _elementtreemodule.m_name,
3057 m);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003058
3059 /* python glue code */
3060
3061 g = PyDict_New();
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003062 if (!g)
Martin v. Löwis1a214512008-06-11 05:26:20 +00003063 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003064
3065 PyDict_SetItemString(g, "__builtins__", PyEval_GetBuiltins());
3066
3067 bootstrap = (
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003068 "def iter(node, tag=None):\n" /* helper */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003069 " if tag == '*':\n"
3070 " tag = None\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003071 " if tag is None or node.tag == tag:\n"
3072 " yield node\n"
3073 " for node in node:\n"
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003074 " for node in iter(node, tag):\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003075 " yield node\n"
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003076
3077 "def itertext(node):\n" /* helper */
3078 " if node.text:\n"
3079 " yield node.text\n"
3080 " for e in node:\n"
3081 " for s in e.itertext():\n"
3082 " yield s\n"
3083 " if e.tail:\n"
3084 " yield e.tail\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003085
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003086 );
3087
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003088 if (!PyRun_String(bootstrap, Py_file_input, g, NULL))
3089 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003090
Eli Bendersky828efde2012-04-05 05:40:58 +03003091 if (!(temp = PyImport_ImportModule("copy")))
3092 return NULL;
3093 elementtree_deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
3094 Py_XDECREF(temp);
3095
3096 if (!(elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
3097 return NULL;
3098
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003099 elementtree_iter_obj = PyDict_GetItemString(g, "iter");
3100 elementtree_itertext_obj = PyDict_GetItemString(g, "itertext");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003101
3102#if defined(USE_PYEXPAT_CAPI)
3103 /* link against pyexpat, if possible */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003104 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
3105 if (expat_capi) {
3106 /* check that it's usable */
3107 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
3108 expat_capi->size < sizeof(struct PyExpat_CAPI) ||
3109 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
3110 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
3111 expat_capi->MICRO_VERSION != XML_MICRO_VERSION)
3112 expat_capi = NULL;
3113 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003114#endif
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003115
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003116 elementtree_parseerror_obj = PyErr_NewException(
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003117 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003118 );
3119 Py_INCREF(elementtree_parseerror_obj);
3120 PyModule_AddObject(m, "ParseError", elementtree_parseerror_obj);
3121
Eli Bendersky092af1f2012-03-04 07:14:03 +02003122 Py_INCREF((PyObject *)&Element_Type);
3123 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
3124
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003125 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003126}