blob: 42634977dad2125f89c623e0b0494dc9e0f7305e [file] [log] [blame]
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001/*
2 * ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003 * $Id: _elementtree.c 3473 2009-01-11 22:53:55Z fredrik $
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
5 * elementtree accelerator
6 *
7 * History:
8 * 1999-06-20 fl created (as part of sgmlop)
9 * 2001-05-29 fl effdom edition
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000010 * 2003-02-27 fl elementtree edition (alpha)
11 * 2004-06-03 fl updates for elementtree 1.2
Florent Xiclunaf15351d2010-03-13 23:24:31 +000012 * 2005-01-05 fl major optimization effort
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000013 * 2005-01-11 fl first public release (cElementTree 0.8)
14 * 2005-01-12 fl split element object into base and extras
15 * 2005-01-13 fl use tagged pointers for tail/text (cElementTree 0.9)
16 * 2005-01-17 fl added treebuilder close method
17 * 2005-01-17 fl fixed crash in getchildren
18 * 2005-01-18 fl removed observer api, added iterparse (cElementTree 0.9.3)
19 * 2005-01-23 fl revised iterparse api; added namespace event support (0.9.8)
20 * 2005-01-26 fl added VERSION module property (cElementTree 1.0)
21 * 2005-01-28 fl added remove method (1.0.1)
22 * 2005-03-01 fl added iselement function; fixed makeelement aliasing (1.0.2)
23 * 2005-03-13 fl export Comment and ProcessingInstruction/PI helpers
24 * 2005-03-26 fl added Comment and PI support to XMLParser
25 * 2005-03-27 fl event optimizations; complain about bogus events
26 * 2005-08-08 fl fixed read error handling in parse
27 * 2005-08-11 fl added runtime test for copy workaround (1.0.3)
28 * 2005-12-13 fl added expat_capi support (for xml.etree) (1.0.4)
29 * 2005-12-16 fl added support for non-standard encodings
Fredrik Lundh44ed4db2006-03-12 21:06:35 +000030 * 2006-03-08 fl fixed a couple of potential null-refs and leaks
31 * 2006-03-12 fl merge in 2.5 ssize_t changes
Florent Xiclunaf15351d2010-03-13 23:24:31 +000032 * 2007-08-25 fl call custom builder's close method from XMLParser
33 * 2007-08-31 fl added iter, extend from ET 1.3
34 * 2007-09-01 fl fixed ParseError exception, setslice source type, etc
35 * 2007-09-03 fl fixed handling of negative insert indexes
36 * 2007-09-04 fl added itertext from ET 1.3
37 * 2007-09-06 fl added position attribute to ParseError exception
38 * 2008-06-06 fl delay error reporting in iterparse (from Hrvoje Niksic)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000039 *
Florent Xiclunaf15351d2010-03-13 23:24:31 +000040 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
41 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000042 *
43 * info@pythonware.com
44 * http://www.pythonware.com
45 */
46
Fredrik Lundh6d52b552005-12-16 22:06:43 +000047/* Licensed to PSF under a Contributor Agreement. */
Florent Xiclunaf15351d2010-03-13 23:24:31 +000048/* See http://www.python.org/psf/license for licensing details. */
Fredrik Lundh6d52b552005-12-16 22:06:43 +000049
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000050#include "Python.h"
Eli Benderskyebf37a22012-04-03 22:02:37 +030051#include "structmember.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000052
Thomas Wouters00ee7ba2006-08-21 19:07:27 +000053#define VERSION "1.0.6"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000054
55/* -------------------------------------------------------------------- */
56/* configuration */
57
58/* Leave defined to include the expat-based XMLParser type */
59#define USE_EXPAT
60
Florent Xiclunaf15351d2010-03-13 23:24:31 +000061/* Define to do all expat calls via pyexpat's embedded expat library */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000062/* #define USE_PYEXPAT_CAPI */
63
64/* An element can hold this many children without extra memory
65 allocations. */
66#define STATIC_CHILDREN 4
67
68/* For best performance, chose a value so that 80-90% of all nodes
69 have no more than the given number of children. Set this to zero
70 to minimize the size of the element structure itself (this only
71 helps if you have lots of leaf nodes with attributes). */
72
73/* Also note that pymalloc always allocates blocks in multiples of
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010074 eight bytes. For the current C version of ElementTree, this means
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000075 that the number of children should be an even number, at least on
76 32-bit platforms. */
77
78/* -------------------------------------------------------------------- */
79
80#if 0
81static int memory = 0;
82#define ALLOC(size, comment)\
83do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
84#define RELEASE(size, comment)\
85do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
86#else
87#define ALLOC(size, comment)
88#define RELEASE(size, comment)
89#endif
90
91/* compiler tweaks */
92#if defined(_MSC_VER)
93#define LOCAL(type) static __inline type __fastcall
94#else
95#define LOCAL(type) static type
96#endif
97
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000098/* macros used to store 'join' flags in string object pointers. note
99 that all use of text and tail as object pointers must be wrapped in
100 JOIN_OBJ. see comments in the ElementObject definition for more
101 info. */
102#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
103#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
104#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~1))
105
106/* glue functions (see the init function for details) */
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000107static PyObject* elementtree_parseerror_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000108static PyObject* elementtree_deepcopy_obj;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000109static PyObject* elementtree_iter_obj;
110static PyObject* elementtree_itertext_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000111static PyObject* elementpath_obj;
112
113/* helpers */
114
115LOCAL(PyObject*)
116deepcopy(PyObject* object, PyObject* memo)
117{
118 /* do a deep copy of the given object */
119
120 PyObject* args;
121 PyObject* result;
122
123 if (!elementtree_deepcopy_obj) {
124 PyErr_SetString(
125 PyExc_RuntimeError,
126 "deepcopy helper not found"
127 );
128 return NULL;
129 }
130
131 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000132 if (!args)
133 return NULL;
134
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000135 Py_INCREF(object); PyTuple_SET_ITEM(args, 0, (PyObject*) object);
136 Py_INCREF(memo); PyTuple_SET_ITEM(args, 1, (PyObject*) memo);
137
138 result = PyObject_CallObject(elementtree_deepcopy_obj, args);
139
140 Py_DECREF(args);
141
142 return result;
143}
144
145LOCAL(PyObject*)
146list_join(PyObject* list)
147{
148 /* join list elements (destroying the list in the process) */
149
150 PyObject* joiner;
151 PyObject* function;
152 PyObject* args;
153 PyObject* result;
154
155 switch (PyList_GET_SIZE(list)) {
156 case 0:
157 Py_DECREF(list);
Christian Heimes72b710a2008-05-26 13:28:38 +0000158 return PyBytes_FromString("");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000159 case 1:
160 result = PyList_GET_ITEM(list, 0);
161 Py_INCREF(result);
162 Py_DECREF(list);
163 return result;
164 }
165
166 /* two or more elements: slice out a suitable separator from the
167 first member, and use that to join the entire list */
168
169 joiner = PySequence_GetSlice(PyList_GET_ITEM(list, 0), 0, 0);
170 if (!joiner)
171 return NULL;
172
173 function = PyObject_GetAttrString(joiner, "join");
174 if (!function) {
175 Py_DECREF(joiner);
176 return NULL;
177 }
178
179 args = PyTuple_New(1);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000180 if (!args)
181 return NULL;
182
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000183 PyTuple_SET_ITEM(args, 0, list);
184
185 result = PyObject_CallObject(function, args);
186
187 Py_DECREF(args); /* also removes list */
188 Py_DECREF(function);
189 Py_DECREF(joiner);
190
191 return result;
192}
193
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000194/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200195/* the Element type */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000196
197typedef struct {
198
199 /* attributes (a dictionary object), or None if no attributes */
200 PyObject* attrib;
201
202 /* child elements */
203 int length; /* actual number of items */
204 int allocated; /* allocated items */
205
206 /* this either points to _children or to a malloced buffer */
207 PyObject* *children;
208
209 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100210
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000211} ElementObjectExtra;
212
213typedef struct {
214 PyObject_HEAD
215
216 /* element tag (a string). */
217 PyObject* tag;
218
219 /* text before first child. note that this is a tagged pointer;
220 use JOIN_OBJ to get the object pointer. the join flag is used
221 to distinguish lists created by the tree builder from lists
222 assigned to the attribute by application code; the former
223 should be joined before being returned to the user, the latter
224 should be left intact. */
225 PyObject* text;
226
227 /* text after this element, in parent. note that this is a tagged
228 pointer; use JOIN_OBJ to get the object pointer. */
229 PyObject* tail;
230
231 ElementObjectExtra* extra;
232
Eli Benderskyebf37a22012-04-03 22:02:37 +0300233 PyObject *weakreflist; /* For tp_weaklistoffset */
234
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000235} ElementObject;
236
Neal Norwitz227b5332006-03-22 09:28:35 +0000237static PyTypeObject Element_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000238
Christian Heimes90aa7642007-12-19 02:45:37 +0000239#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000240
241/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200242/* Element constructors and destructor */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000243
244LOCAL(int)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200245create_extra(ElementObject* self, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000246{
247 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
248 if (!self->extra)
249 return -1;
250
251 if (!attrib)
252 attrib = Py_None;
253
254 Py_INCREF(attrib);
255 self->extra->attrib = attrib;
256
257 self->extra->length = 0;
258 self->extra->allocated = STATIC_CHILDREN;
259 self->extra->children = self->extra->_children;
260
261 return 0;
262}
263
264LOCAL(void)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200265dealloc_extra(ElementObject* self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000266{
Eli Bendersky08b85292012-04-04 15:55:07 +0300267 ElementObjectExtra *myextra;
268 int i;
269
Eli Benderskyebf37a22012-04-03 22:02:37 +0300270 if (!self->extra)
271 return;
272
273 /* Avoid DECREFs calling into this code again (cycles, etc.)
274 */
Eli Bendersky08b85292012-04-04 15:55:07 +0300275 myextra = self->extra;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300276 self->extra = NULL;
277
278 Py_DECREF(myextra->attrib);
279
Eli Benderskyebf37a22012-04-03 22:02:37 +0300280 for (i = 0; i < myextra->length; i++)
281 Py_DECREF(myextra->children[i]);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000282
Eli Benderskyebf37a22012-04-03 22:02:37 +0300283 if (myextra->children != myextra->_children)
284 PyObject_Free(myextra->children);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000285
Eli Benderskyebf37a22012-04-03 22:02:37 +0300286 PyObject_Free(myextra);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000287}
288
Eli Bendersky092af1f2012-03-04 07:14:03 +0200289/* Convenience internal function to create new Element objects with the given
290 * tag and attributes.
291*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000292LOCAL(PyObject*)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200293create_new_element(PyObject* tag, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000294{
295 ElementObject* self;
296
Eli Bendersky0192ba32012-03-30 16:38:33 +0300297 self = PyObject_GC_New(ElementObject, &Element_Type);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000298 if (self == NULL)
299 return NULL;
300
301 /* use None for empty dictionaries */
302 if (PyDict_CheckExact(attrib) && !PyDict_Size(attrib))
303 attrib = Py_None;
304
305 self->extra = NULL;
306
307 if (attrib != Py_None) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200308 if (create_extra(self, attrib) < 0) {
Thomas Wouters477c8d52006-05-27 19:21:47 +0000309 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000310 return NULL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000311 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000312 }
313
314 Py_INCREF(tag);
315 self->tag = tag;
316
317 Py_INCREF(Py_None);
318 self->text = Py_None;
319
320 Py_INCREF(Py_None);
321 self->tail = Py_None;
322
Eli Benderskyebf37a22012-04-03 22:02:37 +0300323 self->weakreflist = NULL;
324
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000325 ALLOC(sizeof(ElementObject), "create element");
Eli Bendersky0192ba32012-03-30 16:38:33 +0300326 PyObject_GC_Track(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000327 return (PyObject*) self;
328}
329
Eli Bendersky092af1f2012-03-04 07:14:03 +0200330static PyObject *
331element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
332{
333 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
334 if (e != NULL) {
335 Py_INCREF(Py_None);
336 e->tag = Py_None;
337
338 Py_INCREF(Py_None);
339 e->text = Py_None;
340
341 Py_INCREF(Py_None);
342 e->tail = Py_None;
343
344 e->extra = NULL;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300345 e->weakreflist = NULL;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200346 }
347 return (PyObject *)e;
348}
349
350static int
351element_init(PyObject *self, PyObject *args, PyObject *kwds)
352{
353 PyObject *tag;
354 PyObject *tmp;
355 PyObject *attrib = NULL;
356 ElementObject *self_elem;
357
358 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
359 return -1;
360
361 if (attrib || kwds) {
362 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
363 if (!attrib)
364 return -1;
365 if (kwds)
366 PyDict_Update(attrib, kwds);
367 } else {
368 Py_INCREF(Py_None);
369 attrib = Py_None;
370 }
371
372 self_elem = (ElementObject *)self;
373
374 /* Use None for empty dictionaries */
375 if (PyDict_CheckExact(attrib) && PyDict_Size(attrib) == 0) {
376 Py_INCREF(Py_None);
377 attrib = Py_None;
378 }
379
380 if (attrib != Py_None) {
381 if (create_extra(self_elem, attrib) < 0) {
382 PyObject_Del(self_elem);
383 return -1;
384 }
385 }
386
387 /* If create_extra needed attrib, it took a reference to it, so we can
388 * release ours anyway.
389 */
390 Py_DECREF(attrib);
391
392 /* Replace the objects already pointed to by tag, text and tail. */
393 tmp = self_elem->tag;
394 self_elem->tag = tag;
395 Py_INCREF(tag);
396 Py_DECREF(tmp);
397
398 tmp = self_elem->text;
399 self_elem->text = Py_None;
400 Py_INCREF(Py_None);
401 Py_DECREF(JOIN_OBJ(tmp));
402
403 tmp = self_elem->tail;
404 self_elem->tail = Py_None;
405 Py_INCREF(Py_None);
406 Py_DECREF(JOIN_OBJ(tmp));
407
408 return 0;
409}
410
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000411LOCAL(int)
412element_resize(ElementObject* self, int extra)
413{
414 int size;
415 PyObject* *children;
416
417 /* make sure self->children can hold the given number of extra
418 elements. set an exception and return -1 if allocation failed */
419
420 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200421 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000422
423 size = self->extra->length + extra;
424
425 if (size > self->extra->allocated) {
426 /* use Python 2.4's list growth strategy */
427 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000428 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100429 * which needs at least 4 bytes.
430 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000431 * be safe.
432 */
433 size = size ? size : 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000434 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000435 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100436 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000437 * false alarm always assume at least one child to be safe.
438 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000439 children = PyObject_Realloc(self->extra->children,
440 size * sizeof(PyObject*));
441 if (!children)
442 goto nomemory;
443 } else {
444 children = PyObject_Malloc(size * sizeof(PyObject*));
445 if (!children)
446 goto nomemory;
447 /* copy existing children from static area to malloc buffer */
448 memcpy(children, self->extra->children,
449 self->extra->length * sizeof(PyObject*));
450 }
451 self->extra->children = children;
452 self->extra->allocated = size;
453 }
454
455 return 0;
456
457 nomemory:
458 PyErr_NoMemory();
459 return -1;
460}
461
462LOCAL(int)
463element_add_subelement(ElementObject* self, PyObject* element)
464{
465 /* add a child element to a parent */
466
467 if (element_resize(self, 1) < 0)
468 return -1;
469
470 Py_INCREF(element);
471 self->extra->children[self->extra->length] = element;
472
473 self->extra->length++;
474
475 return 0;
476}
477
478LOCAL(PyObject*)
479element_get_attrib(ElementObject* self)
480{
481 /* return borrowed reference to attrib dictionary */
482 /* note: this function assumes that the extra section exists */
483
484 PyObject* res = self->extra->attrib;
485
486 if (res == Py_None) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000487 Py_DECREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000488 /* create missing dictionary */
489 res = PyDict_New();
490 if (!res)
491 return NULL;
492 self->extra->attrib = res;
493 }
494
495 return res;
496}
497
498LOCAL(PyObject*)
499element_get_text(ElementObject* self)
500{
501 /* return borrowed reference to text attribute */
502
503 PyObject* res = self->text;
504
505 if (JOIN_GET(res)) {
506 res = JOIN_OBJ(res);
507 if (PyList_CheckExact(res)) {
508 res = list_join(res);
509 if (!res)
510 return NULL;
511 self->text = res;
512 }
513 }
514
515 return res;
516}
517
518LOCAL(PyObject*)
519element_get_tail(ElementObject* self)
520{
521 /* return borrowed reference to text attribute */
522
523 PyObject* res = self->tail;
524
525 if (JOIN_GET(res)) {
526 res = JOIN_OBJ(res);
527 if (PyList_CheckExact(res)) {
528 res = list_join(res);
529 if (!res)
530 return NULL;
531 self->tail = res;
532 }
533 }
534
535 return res;
536}
537
538static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000539subelement(PyObject* self, PyObject* args, PyObject* kw)
540{
541 PyObject* elem;
542
543 ElementObject* parent;
544 PyObject* tag;
545 PyObject* attrib = NULL;
546 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
547 &Element_Type, &parent, &tag,
548 &PyDict_Type, &attrib))
549 return NULL;
550
551 if (attrib || kw) {
552 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
553 if (!attrib)
554 return NULL;
555 if (kw)
556 PyDict_Update(attrib, kw);
557 } else {
558 Py_INCREF(Py_None);
559 attrib = Py_None;
560 }
561
Eli Bendersky092af1f2012-03-04 07:14:03 +0200562 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000563
564 Py_DECREF(attrib);
565
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000566 if (element_add_subelement(parent, elem) < 0) {
567 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000568 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000569 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000570
571 return elem;
572}
573
Eli Bendersky0192ba32012-03-30 16:38:33 +0300574static int
575element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
576{
577 Py_VISIT(self->tag);
578 Py_VISIT(JOIN_OBJ(self->text));
579 Py_VISIT(JOIN_OBJ(self->tail));
580
581 if (self->extra) {
582 int i;
583 Py_VISIT(self->extra->attrib);
584
585 for (i = 0; i < self->extra->length; ++i)
586 Py_VISIT(self->extra->children[i]);
587 }
588 return 0;
589}
590
591static int
592element_gc_clear(ElementObject *self)
593{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300594 Py_CLEAR(self->tag);
Eli Benderskyebf37a22012-04-03 22:02:37 +0300595
596 /* The following is like Py_CLEAR for self->text and self->tail, but
597 * written explicitily because the real pointers hide behind access
598 * macros.
599 */
600 if (self->text) {
601 PyObject *tmp = JOIN_OBJ(self->text);
602 self->text = NULL;
603 Py_DECREF(tmp);
604 }
605
606 if (self->tail) {
607 PyObject *tmp = JOIN_OBJ(self->tail);
608 self->tail = NULL;
609 Py_DECREF(tmp);
610 }
Eli Bendersky0192ba32012-03-30 16:38:33 +0300611
612 /* After dropping all references from extra, it's no longer valid anyway,
Eli Benderskyebf37a22012-04-03 22:02:37 +0300613 * so fully deallocate it.
Eli Bendersky0192ba32012-03-30 16:38:33 +0300614 */
Eli Benderskyebf37a22012-04-03 22:02:37 +0300615 dealloc_extra(self);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300616 return 0;
617}
618
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000619static void
620element_dealloc(ElementObject* self)
621{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300622 PyObject_GC_UnTrack(self);
Eli Benderskyebf37a22012-04-03 22:02:37 +0300623
624 if (self->weakreflist != NULL)
625 PyObject_ClearWeakRefs((PyObject *) self);
626
Eli Bendersky0192ba32012-03-30 16:38:33 +0300627 /* element_gc_clear clears all references and deallocates extra
628 */
629 element_gc_clear(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000630
631 RELEASE(sizeof(ElementObject), "destroy element");
Eli Bendersky092af1f2012-03-04 07:14:03 +0200632 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000633}
634
635/* -------------------------------------------------------------------- */
636/* methods (in alphabetical order) */
637
638static PyObject*
639element_append(ElementObject* self, PyObject* args)
640{
641 PyObject* element;
642 if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element))
643 return NULL;
644
645 if (element_add_subelement(self, element) < 0)
646 return NULL;
647
648 Py_RETURN_NONE;
649}
650
651static PyObject*
Eli Bendersky0192ba32012-03-30 16:38:33 +0300652element_clearmethod(ElementObject* self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000653{
654 if (!PyArg_ParseTuple(args, ":clear"))
655 return NULL;
656
Eli Benderskyebf37a22012-04-03 22:02:37 +0300657 dealloc_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000658
659 Py_INCREF(Py_None);
660 Py_DECREF(JOIN_OBJ(self->text));
661 self->text = Py_None;
662
663 Py_INCREF(Py_None);
664 Py_DECREF(JOIN_OBJ(self->tail));
665 self->tail = Py_None;
666
667 Py_RETURN_NONE;
668}
669
670static PyObject*
671element_copy(ElementObject* self, PyObject* args)
672{
673 int i;
674 ElementObject* element;
675
676 if (!PyArg_ParseTuple(args, ":__copy__"))
677 return NULL;
678
Eli Bendersky092af1f2012-03-04 07:14:03 +0200679 element = (ElementObject*) create_new_element(
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000680 self->tag, (self->extra) ? self->extra->attrib : Py_None
681 );
682 if (!element)
683 return NULL;
684
685 Py_DECREF(JOIN_OBJ(element->text));
686 element->text = self->text;
687 Py_INCREF(JOIN_OBJ(element->text));
688
689 Py_DECREF(JOIN_OBJ(element->tail));
690 element->tail = self->tail;
691 Py_INCREF(JOIN_OBJ(element->tail));
692
693 if (self->extra) {
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100694
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000695 if (element_resize(element, self->extra->length) < 0) {
696 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000697 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000698 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000699
700 for (i = 0; i < self->extra->length; i++) {
701 Py_INCREF(self->extra->children[i]);
702 element->extra->children[i] = self->extra->children[i];
703 }
704
705 element->extra->length = self->extra->length;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100706
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000707 }
708
709 return (PyObject*) element;
710}
711
712static PyObject*
713element_deepcopy(ElementObject* self, PyObject* args)
714{
715 int i;
716 ElementObject* element;
717 PyObject* tag;
718 PyObject* attrib;
719 PyObject* text;
720 PyObject* tail;
721 PyObject* id;
722
723 PyObject* memo;
724 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
725 return NULL;
726
727 tag = deepcopy(self->tag, memo);
728 if (!tag)
729 return NULL;
730
731 if (self->extra) {
732 attrib = deepcopy(self->extra->attrib, memo);
733 if (!attrib) {
734 Py_DECREF(tag);
735 return NULL;
736 }
737 } else {
738 Py_INCREF(Py_None);
739 attrib = Py_None;
740 }
741
Eli Bendersky092af1f2012-03-04 07:14:03 +0200742 element = (ElementObject*) create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000743
744 Py_DECREF(tag);
745 Py_DECREF(attrib);
746
747 if (!element)
748 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100749
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000750 text = deepcopy(JOIN_OBJ(self->text), memo);
751 if (!text)
752 goto error;
753 Py_DECREF(element->text);
754 element->text = JOIN_SET(text, JOIN_GET(self->text));
755
756 tail = deepcopy(JOIN_OBJ(self->tail), memo);
757 if (!tail)
758 goto error;
759 Py_DECREF(element->tail);
760 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
761
762 if (self->extra) {
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100763
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000764 if (element_resize(element, self->extra->length) < 0)
765 goto error;
766
767 for (i = 0; i < self->extra->length; i++) {
768 PyObject* child = deepcopy(self->extra->children[i], memo);
769 if (!child) {
770 element->extra->length = i;
771 goto error;
772 }
773 element->extra->children[i] = child;
774 }
775
776 element->extra->length = self->extra->length;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100777
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000778 }
779
780 /* add object to memo dictionary (so deepcopy won't visit it again) */
Christian Heimes217cfd12007-12-02 14:31:20 +0000781 id = PyLong_FromLong((Py_uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000782 if (!id)
783 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000784
785 i = PyDict_SetItem(memo, id, (PyObject*) element);
786
787 Py_DECREF(id);
788
789 if (i < 0)
790 goto error;
791
792 return (PyObject*) element;
793
794 error:
795 Py_DECREF(element);
796 return NULL;
797}
798
799LOCAL(int)
800checkpath(PyObject* tag)
801{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000802 Py_ssize_t i;
803 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000804
805 /* check if a tag contains an xpath character */
806
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000807#define PATHCHAR(ch) \
808 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000809
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000810 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200811 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
812 void *data = PyUnicode_DATA(tag);
813 unsigned int kind = PyUnicode_KIND(tag);
814 for (i = 0; i < len; i++) {
815 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
816 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000817 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200818 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000819 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200820 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000821 return 1;
822 }
823 return 0;
824 }
Christian Heimes72b710a2008-05-26 13:28:38 +0000825 if (PyBytes_Check(tag)) {
826 char *p = PyBytes_AS_STRING(tag);
827 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000828 if (p[i] == '{')
829 check = 0;
830 else if (p[i] == '}')
831 check = 1;
832 else if (check && PATHCHAR(p[i]))
833 return 1;
834 }
835 return 0;
836 }
837
838 return 1; /* unknown type; might be path expression */
839}
840
841static PyObject*
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000842element_extend(ElementObject* self, PyObject* args)
843{
844 PyObject* seq;
845 Py_ssize_t i, seqlen = 0;
846
847 PyObject* seq_in;
848 if (!PyArg_ParseTuple(args, "O:extend", &seq_in))
849 return NULL;
850
851 seq = PySequence_Fast(seq_in, "");
852 if (!seq) {
853 PyErr_Format(
854 PyExc_TypeError,
855 "expected sequence, not \"%.200s\"", Py_TYPE(seq_in)->tp_name
856 );
857 return NULL;
858 }
859
860 seqlen = PySequence_Size(seq);
861 for (i = 0; i < seqlen; i++) {
862 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
Eli Bendersky396e8fc2012-03-23 14:24:20 +0200863 if (!PyObject_IsInstance(element, (PyObject *)&Element_Type)) {
864 Py_DECREF(seq);
865 PyErr_Format(
866 PyExc_TypeError,
867 "expected an Element, not \"%.200s\"",
868 Py_TYPE(element)->tp_name);
869 return NULL;
870 }
871
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000872 if (element_add_subelement(self, element) < 0) {
873 Py_DECREF(seq);
874 return NULL;
875 }
876 }
877
878 Py_DECREF(seq);
879
880 Py_RETURN_NONE;
881}
882
883static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000884element_find(ElementObject* self, PyObject* args)
885{
886 int i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000887 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000888 PyObject* namespaces = Py_None;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200889
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000890 if (!PyArg_ParseTuple(args, "O|O:find", &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000891 return NULL;
892
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200893 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200894 _Py_IDENTIFIER(find);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200895 return _PyObject_CallMethodId(
896 elementpath_obj, &PyId_find, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000897 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200898 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000899
900 if (!self->extra)
901 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100902
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000903 for (i = 0; i < self->extra->length; i++) {
904 PyObject* item = self->extra->children[i];
905 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +0000906 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000907 Py_INCREF(item);
908 return item;
909 }
910 }
911
912 Py_RETURN_NONE;
913}
914
915static PyObject*
916element_findtext(ElementObject* self, PyObject* args)
917{
918 int i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000919 PyObject* tag;
920 PyObject* default_value = Py_None;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000921 PyObject* namespaces = Py_None;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200922 _Py_IDENTIFIER(findtext);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200923
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000924 if (!PyArg_ParseTuple(args, "O|OO:findtext", &tag, &default_value, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000925 return NULL;
926
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000927 if (checkpath(tag) || namespaces != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200928 return _PyObject_CallMethodId(
929 elementpath_obj, &PyId_findtext, "OOOO", self, tag, default_value, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000930 );
931
932 if (!self->extra) {
933 Py_INCREF(default_value);
934 return default_value;
935 }
936
937 for (i = 0; i < self->extra->length; i++) {
938 ElementObject* item = (ElementObject*) self->extra->children[i];
Mark Dickinson211c6252009-02-01 10:28:51 +0000939 if (Element_CheckExact(item) && (PyObject_RichCompareBool(item->tag, tag, Py_EQ) == 1)) {
940
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000941 PyObject* text = element_get_text(item);
942 if (text == Py_None)
Christian Heimes72b710a2008-05-26 13:28:38 +0000943 return PyBytes_FromString("");
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000944 Py_XINCREF(text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000945 return text;
946 }
947 }
948
949 Py_INCREF(default_value);
950 return default_value;
951}
952
953static PyObject*
954element_findall(ElementObject* self, PyObject* args)
955{
956 int i;
957 PyObject* out;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000958 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000959 PyObject* namespaces = Py_None;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200960
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000961 if (!PyArg_ParseTuple(args, "O|O:findall", &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000962 return NULL;
963
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200964 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200965 _Py_IDENTIFIER(findall);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200966 return _PyObject_CallMethodId(
967 elementpath_obj, &PyId_findall, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000968 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200969 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000970
971 out = PyList_New(0);
972 if (!out)
973 return NULL;
974
975 if (!self->extra)
976 return out;
977
978 for (i = 0; i < self->extra->length; i++) {
979 PyObject* item = self->extra->children[i];
980 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +0000981 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000982 if (PyList_Append(out, item) < 0) {
983 Py_DECREF(out);
984 return NULL;
985 }
986 }
987 }
988
989 return out;
990}
991
992static PyObject*
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000993element_iterfind(ElementObject* self, PyObject* args)
994{
995 PyObject* tag;
996 PyObject* namespaces = Py_None;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200997 _Py_IDENTIFIER(iterfind);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200998
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000999 if (!PyArg_ParseTuple(args, "O|O:iterfind", &tag, &namespaces))
1000 return NULL;
1001
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001002 return _PyObject_CallMethodId(
1003 elementpath_obj, &PyId_iterfind, "OOO", self, tag, namespaces
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001004 );
1005}
1006
1007static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001008element_get(ElementObject* self, PyObject* args)
1009{
1010 PyObject* value;
1011
1012 PyObject* key;
1013 PyObject* default_value = Py_None;
1014 if (!PyArg_ParseTuple(args, "O|O:get", &key, &default_value))
1015 return NULL;
1016
1017 if (!self->extra || self->extra->attrib == Py_None)
1018 value = default_value;
1019 else {
1020 value = PyDict_GetItem(self->extra->attrib, key);
1021 if (!value)
1022 value = default_value;
1023 }
1024
1025 Py_INCREF(value);
1026 return value;
1027}
1028
1029static PyObject*
1030element_getchildren(ElementObject* self, PyObject* args)
1031{
1032 int i;
1033 PyObject* list;
1034
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001035 /* FIXME: report as deprecated? */
1036
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001037 if (!PyArg_ParseTuple(args, ":getchildren"))
1038 return NULL;
1039
1040 if (!self->extra)
1041 return PyList_New(0);
1042
1043 list = PyList_New(self->extra->length);
1044 if (!list)
1045 return NULL;
1046
1047 for (i = 0; i < self->extra->length; i++) {
1048 PyObject* item = self->extra->children[i];
1049 Py_INCREF(item);
1050 PyList_SET_ITEM(list, i, item);
1051 }
1052
1053 return list;
1054}
1055
1056static PyObject*
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001057element_iter(ElementObject* self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001058{
1059 PyObject* result;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001060
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001061 PyObject* tag = Py_None;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001062 if (!PyArg_ParseTuple(args, "|O:iter", &tag))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001063 return NULL;
1064
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001065 if (!elementtree_iter_obj) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001066 PyErr_SetString(
1067 PyExc_RuntimeError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001068 "iter helper not found"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001069 );
1070 return NULL;
1071 }
1072
1073 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001074 if (!args)
1075 return NULL;
Neal Norwitz02876df2006-02-07 06:58:52 +00001076
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001077 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
1078 Py_INCREF(tag); PyTuple_SET_ITEM(args, 1, (PyObject*) tag);
1079
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001080 result = PyObject_CallObject(elementtree_iter_obj, args);
1081
1082 Py_DECREF(args);
1083
1084 return result;
1085}
1086
1087
1088static PyObject*
1089element_itertext(ElementObject* self, PyObject* args)
1090{
1091 PyObject* result;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001092
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001093 if (!PyArg_ParseTuple(args, ":itertext"))
1094 return NULL;
1095
1096 if (!elementtree_itertext_obj) {
1097 PyErr_SetString(
1098 PyExc_RuntimeError,
1099 "itertext helper not found"
1100 );
1101 return NULL;
1102 }
1103
1104 args = PyTuple_New(1);
1105 if (!args)
1106 return NULL;
1107
1108 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
1109
1110 result = PyObject_CallObject(elementtree_itertext_obj, args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001111
1112 Py_DECREF(args);
1113
1114 return result;
1115}
1116
1117static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001118element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001119{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001120 ElementObject* self = (ElementObject*) self_;
1121
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001122 if (!self->extra || index < 0 || index >= self->extra->length) {
1123 PyErr_SetString(
1124 PyExc_IndexError,
1125 "child index out of range"
1126 );
1127 return NULL;
1128 }
1129
1130 Py_INCREF(self->extra->children[index]);
1131 return self->extra->children[index];
1132}
1133
1134static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001135element_insert(ElementObject* self, PyObject* args)
1136{
1137 int i;
1138
1139 int index;
1140 PyObject* element;
1141 if (!PyArg_ParseTuple(args, "iO!:insert", &index,
1142 &Element_Type, &element))
1143 return NULL;
1144
1145 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001146 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001147
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001148 if (index < 0) {
1149 index += self->extra->length;
1150 if (index < 0)
1151 index = 0;
1152 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001153 if (index > self->extra->length)
1154 index = self->extra->length;
1155
1156 if (element_resize(self, 1) < 0)
1157 return NULL;
1158
1159 for (i = self->extra->length; i > index; i--)
1160 self->extra->children[i] = self->extra->children[i-1];
1161
1162 Py_INCREF(element);
1163 self->extra->children[index] = element;
1164
1165 self->extra->length++;
1166
1167 Py_RETURN_NONE;
1168}
1169
1170static PyObject*
1171element_items(ElementObject* self, PyObject* args)
1172{
1173 if (!PyArg_ParseTuple(args, ":items"))
1174 return NULL;
1175
1176 if (!self->extra || self->extra->attrib == Py_None)
1177 return PyList_New(0);
1178
1179 return PyDict_Items(self->extra->attrib);
1180}
1181
1182static PyObject*
1183element_keys(ElementObject* self, PyObject* args)
1184{
1185 if (!PyArg_ParseTuple(args, ":keys"))
1186 return NULL;
1187
1188 if (!self->extra || self->extra->attrib == Py_None)
1189 return PyList_New(0);
1190
1191 return PyDict_Keys(self->extra->attrib);
1192}
1193
Martin v. Löwis18e16552006-02-15 17:27:45 +00001194static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001195element_length(ElementObject* self)
1196{
1197 if (!self->extra)
1198 return 0;
1199
1200 return self->extra->length;
1201}
1202
1203static PyObject*
1204element_makeelement(PyObject* self, PyObject* args, PyObject* kw)
1205{
1206 PyObject* elem;
1207
1208 PyObject* tag;
1209 PyObject* attrib;
1210 if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib))
1211 return NULL;
1212
1213 attrib = PyDict_Copy(attrib);
1214 if (!attrib)
1215 return NULL;
1216
Eli Bendersky092af1f2012-03-04 07:14:03 +02001217 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001218
1219 Py_DECREF(attrib);
1220
1221 return elem;
1222}
1223
1224static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001225element_remove(ElementObject* self, PyObject* args)
1226{
1227 int i;
1228
1229 PyObject* element;
1230 if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element))
1231 return NULL;
1232
1233 if (!self->extra) {
1234 /* element has no children, so raise exception */
1235 PyErr_SetString(
1236 PyExc_ValueError,
1237 "list.remove(x): x not in list"
1238 );
1239 return NULL;
1240 }
1241
1242 for (i = 0; i < self->extra->length; i++) {
1243 if (self->extra->children[i] == element)
1244 break;
Mark Dickinson211c6252009-02-01 10:28:51 +00001245 if (PyObject_RichCompareBool(self->extra->children[i], element, Py_EQ) == 1)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001246 break;
1247 }
1248
1249 if (i == self->extra->length) {
1250 /* element is not in children, so raise exception */
1251 PyErr_SetString(
1252 PyExc_ValueError,
1253 "list.remove(x): x not in list"
1254 );
1255 return NULL;
1256 }
1257
1258 Py_DECREF(self->extra->children[i]);
1259
1260 self->extra->length--;
1261
1262 for (; i < self->extra->length; i++)
1263 self->extra->children[i] = self->extra->children[i+1];
1264
1265 Py_RETURN_NONE;
1266}
1267
1268static PyObject*
1269element_repr(ElementObject* self)
1270{
Eli Bendersky092af1f2012-03-04 07:14:03 +02001271 if (self->tag)
1272 return PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1273 else
1274 return PyUnicode_FromFormat("<Element at %p>", self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001275}
1276
1277static PyObject*
1278element_set(ElementObject* self, PyObject* args)
1279{
1280 PyObject* attrib;
1281
1282 PyObject* key;
1283 PyObject* value;
1284 if (!PyArg_ParseTuple(args, "OO:set", &key, &value))
1285 return NULL;
1286
1287 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001288 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001289
1290 attrib = element_get_attrib(self);
1291 if (!attrib)
1292 return NULL;
1293
1294 if (PyDict_SetItem(attrib, key, value) < 0)
1295 return NULL;
1296
1297 Py_RETURN_NONE;
1298}
1299
1300static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001301element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001302{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001303 ElementObject* self = (ElementObject*) self_;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001304 int i;
1305 PyObject* old;
1306
1307 if (!self->extra || index < 0 || index >= self->extra->length) {
1308 PyErr_SetString(
1309 PyExc_IndexError,
1310 "child assignment index out of range");
1311 return -1;
1312 }
1313
1314 old = self->extra->children[index];
1315
1316 if (item) {
1317 Py_INCREF(item);
1318 self->extra->children[index] = item;
1319 } else {
1320 self->extra->length--;
1321 for (i = index; i < self->extra->length; i++)
1322 self->extra->children[i] = self->extra->children[i+1];
1323 }
1324
1325 Py_DECREF(old);
1326
1327 return 0;
1328}
1329
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001330static PyObject*
1331element_subscr(PyObject* self_, PyObject* item)
1332{
1333 ElementObject* self = (ElementObject*) self_;
1334
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001335 if (PyIndex_Check(item)) {
1336 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001337
1338 if (i == -1 && PyErr_Occurred()) {
1339 return NULL;
1340 }
1341 if (i < 0 && self->extra)
1342 i += self->extra->length;
1343 return element_getitem(self_, i);
1344 }
1345 else if (PySlice_Check(item)) {
1346 Py_ssize_t start, stop, step, slicelen, cur, i;
1347 PyObject* list;
1348
1349 if (!self->extra)
1350 return PyList_New(0);
1351
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001352 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001353 self->extra->length,
1354 &start, &stop, &step, &slicelen) < 0) {
1355 return NULL;
1356 }
1357
1358 if (slicelen <= 0)
1359 return PyList_New(0);
1360 else {
1361 list = PyList_New(slicelen);
1362 if (!list)
1363 return NULL;
1364
1365 for (cur = start, i = 0; i < slicelen;
1366 cur += step, i++) {
1367 PyObject* item = self->extra->children[cur];
1368 Py_INCREF(item);
1369 PyList_SET_ITEM(list, i, item);
1370 }
1371
1372 return list;
1373 }
1374 }
1375 else {
1376 PyErr_SetString(PyExc_TypeError,
1377 "element indices must be integers");
1378 return NULL;
1379 }
1380}
1381
1382static int
1383element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1384{
1385 ElementObject* self = (ElementObject*) self_;
1386
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001387 if (PyIndex_Check(item)) {
1388 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001389
1390 if (i == -1 && PyErr_Occurred()) {
1391 return -1;
1392 }
1393 if (i < 0 && self->extra)
1394 i += self->extra->length;
1395 return element_setitem(self_, i, value);
1396 }
1397 else if (PySlice_Check(item)) {
1398 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1399
1400 PyObject* recycle = NULL;
1401 PyObject* seq = NULL;
1402
1403 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001404 create_extra(self, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001405
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001406 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001407 self->extra->length,
1408 &start, &stop, &step, &slicelen) < 0) {
1409 return -1;
1410 }
1411
Eli Bendersky865756a2012-03-09 13:38:15 +02001412 if (value == NULL) {
1413 /* Delete slice */
1414 size_t cur;
1415 Py_ssize_t i;
1416
1417 if (slicelen <= 0)
1418 return 0;
1419
1420 /* Since we're deleting, the direction of the range doesn't matter,
1421 * so for simplicity make it always ascending.
1422 */
1423 if (step < 0) {
1424 stop = start + 1;
1425 start = stop + step * (slicelen - 1) - 1;
1426 step = -step;
1427 }
1428
1429 assert((size_t)slicelen <= PY_SIZE_MAX / sizeof(PyObject *));
1430
1431 /* recycle is a list that will contain all the children
1432 * scheduled for removal.
1433 */
1434 if (!(recycle = PyList_New(slicelen))) {
1435 PyErr_NoMemory();
1436 return -1;
1437 }
1438
1439 /* This loop walks over all the children that have to be deleted,
1440 * with cur pointing at them. num_moved is the amount of children
1441 * until the next deleted child that have to be "shifted down" to
1442 * occupy the deleted's places.
1443 * Note that in the ith iteration, shifting is done i+i places down
1444 * because i children were already removed.
1445 */
1446 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1447 /* Compute how many children have to be moved, clipping at the
1448 * list end.
1449 */
1450 Py_ssize_t num_moved = step - 1;
1451 if (cur + step >= (size_t)self->extra->length) {
1452 num_moved = self->extra->length - cur - 1;
1453 }
1454
1455 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1456
1457 memmove(
1458 self->extra->children + cur - i,
1459 self->extra->children + cur + 1,
1460 num_moved * sizeof(PyObject *));
1461 }
1462
1463 /* Leftover "tail" after the last removed child */
1464 cur = start + (size_t)slicelen * step;
1465 if (cur < (size_t)self->extra->length) {
1466 memmove(
1467 self->extra->children + cur - slicelen,
1468 self->extra->children + cur,
1469 (self->extra->length - cur) * sizeof(PyObject *));
1470 }
1471
1472 self->extra->length -= slicelen;
1473
1474 /* Discard the recycle list with all the deleted sub-elements */
1475 Py_XDECREF(recycle);
1476 return 0;
1477 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001478 else {
Eli Bendersky865756a2012-03-09 13:38:15 +02001479 /* A new slice is actually being assigned */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001480 seq = PySequence_Fast(value, "");
1481 if (!seq) {
1482 PyErr_Format(
1483 PyExc_TypeError,
1484 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1485 );
1486 return -1;
1487 }
1488 newlen = PySequence_Size(seq);
1489 }
1490
1491 if (step != 1 && newlen != slicelen)
1492 {
1493 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001494 "attempt to assign sequence of size %zd "
1495 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001496 newlen, slicelen
1497 );
1498 return -1;
1499 }
1500
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001501 /* Resize before creating the recycle bin, to prevent refleaks. */
1502 if (newlen > slicelen) {
1503 if (element_resize(self, newlen - slicelen) < 0) {
1504 if (seq) {
1505 Py_DECREF(seq);
1506 }
1507 return -1;
1508 }
1509 }
1510
1511 if (slicelen > 0) {
1512 /* to avoid recursive calls to this method (via decref), move
1513 old items to the recycle bin here, and get rid of them when
1514 we're done modifying the element */
1515 recycle = PyList_New(slicelen);
1516 if (!recycle) {
1517 if (seq) {
1518 Py_DECREF(seq);
1519 }
1520 return -1;
1521 }
1522 for (cur = start, i = 0; i < slicelen;
1523 cur += step, i++)
1524 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1525 }
1526
1527 if (newlen < slicelen) {
1528 /* delete slice */
1529 for (i = stop; i < self->extra->length; i++)
1530 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1531 } else if (newlen > slicelen) {
1532 /* insert slice */
1533 for (i = self->extra->length-1; i >= stop; i--)
1534 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1535 }
1536
1537 /* replace the slice */
1538 for (cur = start, i = 0; i < newlen;
1539 cur += step, i++) {
1540 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1541 Py_INCREF(element);
1542 self->extra->children[cur] = element;
1543 }
1544
1545 self->extra->length += newlen - slicelen;
1546
1547 if (seq) {
1548 Py_DECREF(seq);
1549 }
1550
1551 /* discard the recycle bin, and everything in it */
1552 Py_XDECREF(recycle);
1553
1554 return 0;
1555 }
1556 else {
1557 PyErr_SetString(PyExc_TypeError,
1558 "element indices must be integers");
1559 return -1;
1560 }
1561}
1562
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001563static PyMethodDef element_methods[] = {
1564
Eli Bendersky0192ba32012-03-30 16:38:33 +03001565 {"clear", (PyCFunction) element_clearmethod, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001566
1567 {"get", (PyCFunction) element_get, METH_VARARGS},
1568 {"set", (PyCFunction) element_set, METH_VARARGS},
1569
1570 {"find", (PyCFunction) element_find, METH_VARARGS},
1571 {"findtext", (PyCFunction) element_findtext, METH_VARARGS},
1572 {"findall", (PyCFunction) element_findall, METH_VARARGS},
1573
1574 {"append", (PyCFunction) element_append, METH_VARARGS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001575 {"extend", (PyCFunction) element_extend, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001576 {"insert", (PyCFunction) element_insert, METH_VARARGS},
1577 {"remove", (PyCFunction) element_remove, METH_VARARGS},
1578
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001579 {"iter", (PyCFunction) element_iter, METH_VARARGS},
1580 {"itertext", (PyCFunction) element_itertext, METH_VARARGS},
1581 {"iterfind", (PyCFunction) element_iterfind, METH_VARARGS},
1582
1583 {"getiterator", (PyCFunction) element_iter, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001584 {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS},
1585
1586 {"items", (PyCFunction) element_items, METH_VARARGS},
1587 {"keys", (PyCFunction) element_keys, METH_VARARGS},
1588
1589 {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS},
1590
1591 {"__copy__", (PyCFunction) element_copy, METH_VARARGS},
1592 {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS},
1593
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001594 {NULL, NULL}
1595};
1596
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001597static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001598element_getattro(ElementObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001599{
1600 PyObject* res;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001601 char *name = "";
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001602
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001603 if (PyUnicode_Check(nameobj))
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001604 name = _PyUnicode_AsString(nameobj);
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001605
Alexander Belopolskye239d232010-12-08 23:31:48 +00001606 if (name == NULL)
1607 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001608
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001609 /* handle common attributes first */
1610 if (strcmp(name, "tag") == 0) {
1611 res = self->tag;
1612 Py_INCREF(res);
1613 return res;
1614 } else if (strcmp(name, "text") == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001615 res = element_get_text(self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001616 Py_INCREF(res);
1617 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001618 }
1619
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001620 /* methods */
1621 res = PyObject_GenericGetAttr((PyObject*) self, nameobj);
1622 if (res)
1623 return res;
1624
1625 /* less common attributes */
1626 if (strcmp(name, "tail") == 0) {
1627 PyErr_Clear();
1628 res = element_get_tail(self);
1629 } else if (strcmp(name, "attrib") == 0) {
1630 PyErr_Clear();
1631 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001632 create_extra(self, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001633 res = element_get_attrib(self);
1634 }
1635
1636 if (!res)
1637 return NULL;
1638
1639 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001640 return res;
1641}
1642
1643static int
1644element_setattr(ElementObject* self, const char* name, PyObject* value)
1645{
1646 if (value == NULL) {
1647 PyErr_SetString(
1648 PyExc_AttributeError,
1649 "can't delete element attributes"
1650 );
1651 return -1;
1652 }
1653
1654 if (strcmp(name, "tag") == 0) {
1655 Py_DECREF(self->tag);
1656 self->tag = value;
1657 Py_INCREF(self->tag);
1658 } else if (strcmp(name, "text") == 0) {
1659 Py_DECREF(JOIN_OBJ(self->text));
1660 self->text = value;
1661 Py_INCREF(self->text);
1662 } else if (strcmp(name, "tail") == 0) {
1663 Py_DECREF(JOIN_OBJ(self->tail));
1664 self->tail = value;
1665 Py_INCREF(self->tail);
1666 } else if (strcmp(name, "attrib") == 0) {
1667 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001668 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001669 Py_DECREF(self->extra->attrib);
1670 self->extra->attrib = value;
1671 Py_INCREF(self->extra->attrib);
1672 } else {
1673 PyErr_SetString(PyExc_AttributeError, name);
1674 return -1;
1675 }
1676
1677 return 0;
1678}
1679
1680static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001681 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001682 0, /* sq_concat */
1683 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001684 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001685 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001686 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001687 0,
1688};
1689
1690static PyMappingMethods element_as_mapping = {
1691 (lenfunc) element_length,
1692 (binaryfunc) element_subscr,
1693 (objobjargproc) element_ass_subscr,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001694};
1695
Neal Norwitz227b5332006-03-22 09:28:35 +00001696static PyTypeObject Element_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001697 PyVarObject_HEAD_INIT(NULL, 0)
1698 "Element", sizeof(ElementObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001699 /* methods */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001700 (destructor)element_dealloc, /* tp_dealloc */
1701 0, /* tp_print */
1702 0, /* tp_getattr */
1703 (setattrfunc)element_setattr, /* tp_setattr */
1704 0, /* tp_reserved */
1705 (reprfunc)element_repr, /* tp_repr */
1706 0, /* tp_as_number */
1707 &element_as_sequence, /* tp_as_sequence */
1708 &element_as_mapping, /* tp_as_mapping */
1709 0, /* tp_hash */
1710 0, /* tp_call */
1711 0, /* tp_str */
1712 (getattrofunc)element_getattro, /* tp_getattro */
1713 0, /* tp_setattro */
1714 0, /* tp_as_buffer */
Eli Bendersky0192ba32012-03-30 16:38:33 +03001715 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
1716 /* tp_flags */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001717 0, /* tp_doc */
Eli Bendersky0192ba32012-03-30 16:38:33 +03001718 (traverseproc)element_gc_traverse, /* tp_traverse */
1719 (inquiry)element_gc_clear, /* tp_clear */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001720 0, /* tp_richcompare */
Eli Benderskyebf37a22012-04-03 22:02:37 +03001721 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001722 0, /* tp_iter */
1723 0, /* tp_iternext */
1724 element_methods, /* tp_methods */
1725 0, /* tp_members */
1726 0, /* tp_getset */
1727 0, /* tp_base */
1728 0, /* tp_dict */
1729 0, /* tp_descr_get */
1730 0, /* tp_descr_set */
1731 0, /* tp_dictoffset */
1732 (initproc)element_init, /* tp_init */
1733 PyType_GenericAlloc, /* tp_alloc */
1734 element_new, /* tp_new */
1735 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001736};
1737
1738/* ==================================================================== */
1739/* the tree builder type */
1740
1741typedef struct {
1742 PyObject_HEAD
1743
1744 PyObject* root; /* root node (first created node) */
1745
1746 ElementObject* this; /* current node */
1747 ElementObject* last; /* most recently created node */
1748
1749 PyObject* data; /* data collector (string or list), or NULL */
1750
1751 PyObject* stack; /* element stack */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001752 Py_ssize_t index; /* current stack size (0=empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001753
1754 /* element tracing */
1755 PyObject* events; /* list of events, or NULL if not collecting */
1756 PyObject* start_event_obj; /* event objects (NULL to ignore) */
1757 PyObject* end_event_obj;
1758 PyObject* start_ns_event_obj;
1759 PyObject* end_ns_event_obj;
1760
1761} TreeBuilderObject;
1762
Neal Norwitz227b5332006-03-22 09:28:35 +00001763static PyTypeObject TreeBuilder_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001764
Christian Heimes90aa7642007-12-19 02:45:37 +00001765#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001766
1767/* -------------------------------------------------------------------- */
1768/* constructor and destructor */
1769
1770LOCAL(PyObject*)
1771treebuilder_new(void)
1772{
1773 TreeBuilderObject* self;
1774
1775 self = PyObject_New(TreeBuilderObject, &TreeBuilder_Type);
1776 if (self == NULL)
1777 return NULL;
1778
1779 self->root = NULL;
1780
1781 Py_INCREF(Py_None);
1782 self->this = (ElementObject*) Py_None;
1783
1784 Py_INCREF(Py_None);
1785 self->last = (ElementObject*) Py_None;
1786
1787 self->data = NULL;
1788
1789 self->stack = PyList_New(20);
1790 self->index = 0;
1791
1792 self->events = NULL;
1793 self->start_event_obj = self->end_event_obj = NULL;
1794 self->start_ns_event_obj = self->end_ns_event_obj = NULL;
1795
1796 ALLOC(sizeof(TreeBuilderObject), "create treebuilder");
1797
1798 return (PyObject*) self;
1799}
1800
1801static PyObject*
Thomas Wouters73e5a5b2006-06-08 15:35:45 +00001802treebuilder(PyObject* self_, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001803{
1804 if (!PyArg_ParseTuple(args, ":TreeBuilder"))
1805 return NULL;
1806
1807 return treebuilder_new();
1808}
1809
1810static void
1811treebuilder_dealloc(TreeBuilderObject* self)
1812{
1813 Py_XDECREF(self->end_ns_event_obj);
1814 Py_XDECREF(self->start_ns_event_obj);
1815 Py_XDECREF(self->end_event_obj);
1816 Py_XDECREF(self->start_event_obj);
1817 Py_XDECREF(self->events);
1818 Py_DECREF(self->stack);
1819 Py_XDECREF(self->data);
1820 Py_DECREF(self->last);
1821 Py_DECREF(self->this);
1822 Py_XDECREF(self->root);
1823
1824 RELEASE(sizeof(TreeBuilderObject), "destroy treebuilder");
1825
1826 PyObject_Del(self);
1827}
1828
1829/* -------------------------------------------------------------------- */
1830/* handlers */
1831
1832LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001833treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
1834 PyObject* attrib)
1835{
1836 PyObject* node;
1837 PyObject* this;
1838
1839 if (self->data) {
1840 if (self->this == self->last) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001841 Py_DECREF(JOIN_OBJ(self->last->text));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001842 self->last->text = JOIN_SET(
1843 self->data, PyList_CheckExact(self->data)
1844 );
1845 } else {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001846 Py_DECREF(JOIN_OBJ(self->last->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001847 self->last->tail = JOIN_SET(
1848 self->data, PyList_CheckExact(self->data)
1849 );
1850 }
1851 self->data = NULL;
1852 }
1853
Eli Bendersky092af1f2012-03-04 07:14:03 +02001854 node = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001855 if (!node)
1856 return NULL;
1857
1858 this = (PyObject*) self->this;
1859
1860 if (this != Py_None) {
1861 if (element_add_subelement((ElementObject*) this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001862 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001863 } else {
1864 if (self->root) {
1865 PyErr_SetString(
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001866 elementtree_parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001867 "multiple elements on top level"
1868 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001869 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001870 }
1871 Py_INCREF(node);
1872 self->root = node;
1873 }
1874
1875 if (self->index < PyList_GET_SIZE(self->stack)) {
1876 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001877 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001878 Py_INCREF(this);
1879 } else {
1880 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001881 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001882 }
1883 self->index++;
1884
1885 Py_DECREF(this);
1886 Py_INCREF(node);
1887 self->this = (ElementObject*) node;
1888
1889 Py_DECREF(self->last);
1890 Py_INCREF(node);
1891 self->last = (ElementObject*) node;
1892
1893 if (self->start_event_obj) {
1894 PyObject* res;
1895 PyObject* action = self->start_event_obj;
1896 res = PyTuple_New(2);
1897 if (res) {
1898 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
1899 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
1900 PyList_Append(self->events, res);
1901 Py_DECREF(res);
1902 } else
1903 PyErr_Clear(); /* FIXME: propagate error */
1904 }
1905
1906 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001907
1908 error:
1909 Py_DECREF(node);
1910 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001911}
1912
1913LOCAL(PyObject*)
1914treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
1915{
1916 if (!self->data) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001917 if (self->last == (ElementObject*) Py_None) {
1918 /* ignore calls to data before the first call to start */
1919 Py_RETURN_NONE;
1920 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001921 /* store the first item as is */
1922 Py_INCREF(data); self->data = data;
1923 } else {
1924 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00001925 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
1926 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001927 /* expat often generates single character data sections; handle
1928 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00001929 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
1930 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001931 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00001932 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001933 } else if (PyList_CheckExact(self->data)) {
1934 if (PyList_Append(self->data, data) < 0)
1935 return NULL;
1936 } else {
1937 PyObject* list = PyList_New(2);
1938 if (!list)
1939 return NULL;
1940 PyList_SET_ITEM(list, 0, self->data);
1941 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
1942 self->data = list;
1943 }
1944 }
1945
1946 Py_RETURN_NONE;
1947}
1948
1949LOCAL(PyObject*)
1950treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
1951{
1952 PyObject* item;
1953
1954 if (self->data) {
1955 if (self->this == self->last) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001956 Py_DECREF(JOIN_OBJ(self->last->text));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001957 self->last->text = JOIN_SET(
1958 self->data, PyList_CheckExact(self->data)
1959 );
1960 } else {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001961 Py_DECREF(JOIN_OBJ(self->last->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001962 self->last->tail = JOIN_SET(
1963 self->data, PyList_CheckExact(self->data)
1964 );
1965 }
1966 self->data = NULL;
1967 }
1968
1969 if (self->index == 0) {
1970 PyErr_SetString(
1971 PyExc_IndexError,
1972 "pop from empty stack"
1973 );
1974 return NULL;
1975 }
1976
1977 self->index--;
1978
1979 item = PyList_GET_ITEM(self->stack, self->index);
1980 Py_INCREF(item);
1981
1982 Py_DECREF(self->last);
1983
1984 self->last = (ElementObject*) self->this;
1985 self->this = (ElementObject*) item;
1986
1987 if (self->end_event_obj) {
1988 PyObject* res;
1989 PyObject* action = self->end_event_obj;
1990 PyObject* node = (PyObject*) self->last;
1991 res = PyTuple_New(2);
1992 if (res) {
1993 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
1994 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
1995 PyList_Append(self->events, res);
1996 Py_DECREF(res);
1997 } else
1998 PyErr_Clear(); /* FIXME: propagate error */
1999 }
2000
2001 Py_INCREF(self->last);
2002 return (PyObject*) self->last;
2003}
2004
2005LOCAL(void)
2006treebuilder_handle_namespace(TreeBuilderObject* self, int start,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002007 PyObject *prefix, PyObject *uri)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002008{
2009 PyObject* res;
2010 PyObject* action;
2011 PyObject* parcel;
2012
2013 if (!self->events)
2014 return;
2015
2016 if (start) {
2017 if (!self->start_ns_event_obj)
2018 return;
2019 action = self->start_ns_event_obj;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002020 parcel = Py_BuildValue("OO", prefix, uri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002021 if (!parcel)
2022 return;
2023 Py_INCREF(action);
2024 } else {
2025 if (!self->end_ns_event_obj)
2026 return;
2027 action = self->end_ns_event_obj;
2028 Py_INCREF(action);
2029 parcel = Py_None;
2030 Py_INCREF(parcel);
2031 }
2032
2033 res = PyTuple_New(2);
2034
2035 if (res) {
2036 PyTuple_SET_ITEM(res, 0, action);
2037 PyTuple_SET_ITEM(res, 1, parcel);
2038 PyList_Append(self->events, res);
2039 Py_DECREF(res);
2040 } else
2041 PyErr_Clear(); /* FIXME: propagate error */
2042}
2043
2044/* -------------------------------------------------------------------- */
2045/* methods (in alphabetical order) */
2046
2047static PyObject*
2048treebuilder_data(TreeBuilderObject* self, PyObject* args)
2049{
2050 PyObject* data;
2051 if (!PyArg_ParseTuple(args, "O:data", &data))
2052 return NULL;
2053
2054 return treebuilder_handle_data(self, data);
2055}
2056
2057static PyObject*
2058treebuilder_end(TreeBuilderObject* self, PyObject* args)
2059{
2060 PyObject* tag;
2061 if (!PyArg_ParseTuple(args, "O:end", &tag))
2062 return NULL;
2063
2064 return treebuilder_handle_end(self, tag);
2065}
2066
2067LOCAL(PyObject*)
2068treebuilder_done(TreeBuilderObject* self)
2069{
2070 PyObject* res;
2071
2072 /* FIXME: check stack size? */
2073
2074 if (self->root)
2075 res = self->root;
2076 else
2077 res = Py_None;
2078
2079 Py_INCREF(res);
2080 return res;
2081}
2082
2083static PyObject*
2084treebuilder_close(TreeBuilderObject* self, PyObject* args)
2085{
2086 if (!PyArg_ParseTuple(args, ":close"))
2087 return NULL;
2088
2089 return treebuilder_done(self);
2090}
2091
2092static PyObject*
2093treebuilder_start(TreeBuilderObject* self, PyObject* args)
2094{
2095 PyObject* tag;
2096 PyObject* attrib = Py_None;
2097 if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib))
2098 return NULL;
2099
2100 return treebuilder_handle_start(self, tag, attrib);
2101}
2102
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002103static PyMethodDef treebuilder_methods[] = {
2104 {"data", (PyCFunction) treebuilder_data, METH_VARARGS},
2105 {"start", (PyCFunction) treebuilder_start, METH_VARARGS},
2106 {"end", (PyCFunction) treebuilder_end, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002107 {"close", (PyCFunction) treebuilder_close, METH_VARARGS},
2108 {NULL, NULL}
2109};
2110
Neal Norwitz227b5332006-03-22 09:28:35 +00002111static PyTypeObject TreeBuilder_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002112 PyVarObject_HEAD_INIT(NULL, 0)
2113 "TreeBuilder", sizeof(TreeBuilderObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002114 /* methods */
2115 (destructor)treebuilder_dealloc, /* tp_dealloc */
2116 0, /* tp_print */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002117 0, /* tp_getattr */
2118 0, /* tp_setattr */
Mark Dickinsone94c6792009-02-02 20:36:42 +00002119 0, /* tp_reserved */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002120 0, /* tp_repr */
2121 0, /* tp_as_number */
2122 0, /* tp_as_sequence */
2123 0, /* tp_as_mapping */
2124 0, /* tp_hash */
2125 0, /* tp_call */
2126 0, /* tp_str */
2127 0, /* tp_getattro */
2128 0, /* tp_setattro */
2129 0, /* tp_as_buffer */
2130 Py_TPFLAGS_DEFAULT, /* tp_flags */
2131 0, /* tp_doc */
2132 0, /* tp_traverse */
2133 0, /* tp_clear */
2134 0, /* tp_richcompare */
2135 0, /* tp_weaklistoffset */
2136 0, /* tp_iter */
2137 0, /* tp_iternext */
2138 treebuilder_methods, /* tp_methods */
2139 0, /* tp_members */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002140};
2141
2142/* ==================================================================== */
2143/* the expat interface */
2144
2145#if defined(USE_EXPAT)
2146
2147#include "expat.h"
2148
2149#if defined(USE_PYEXPAT_CAPI)
2150#include "pyexpat.h"
2151static struct PyExpat_CAPI* expat_capi;
2152#define EXPAT(func) (expat_capi->func)
2153#else
2154#define EXPAT(func) (XML_##func)
2155#endif
2156
2157typedef struct {
2158 PyObject_HEAD
2159
2160 XML_Parser parser;
2161
2162 PyObject* target;
2163 PyObject* entity;
2164
2165 PyObject* names;
2166
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002167 PyObject* handle_start;
2168 PyObject* handle_data;
2169 PyObject* handle_end;
2170
2171 PyObject* handle_comment;
2172 PyObject* handle_pi;
2173
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002174 PyObject* handle_close;
2175
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002176} XMLParserObject;
2177
Neal Norwitz227b5332006-03-22 09:28:35 +00002178static PyTypeObject XMLParser_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002179
2180/* helpers */
2181
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002182LOCAL(PyObject*)
2183makeuniversal(XMLParserObject* self, const char* string)
2184{
2185 /* convert a UTF-8 tag/attribute name from the expat parser
2186 to a universal name string */
2187
2188 int size = strlen(string);
2189 PyObject* key;
2190 PyObject* value;
2191
2192 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00002193 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002194 if (!key)
2195 return NULL;
2196
2197 value = PyDict_GetItem(self->names, key);
2198
2199 if (value) {
2200 Py_INCREF(value);
2201 } else {
2202 /* new name. convert to universal name, and decode as
2203 necessary */
2204
2205 PyObject* tag;
2206 char* p;
2207 int i;
2208
2209 /* look for namespace separator */
2210 for (i = 0; i < size; i++)
2211 if (string[i] == '}')
2212 break;
2213 if (i != size) {
2214 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002215 tag = PyBytes_FromStringAndSize(NULL, size+1);
2216 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002217 p[0] = '{';
2218 memcpy(p+1, string, size);
2219 size++;
2220 } else {
2221 /* plain name; use key as tag */
2222 Py_INCREF(key);
2223 tag = key;
2224 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002225
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002226 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002227 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00002228 value = PyUnicode_DecodeUTF8(p, size, "strict");
2229 Py_DECREF(tag);
2230 if (!value) {
2231 Py_DECREF(key);
2232 return NULL;
2233 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002234
2235 /* add to names dictionary */
2236 if (PyDict_SetItem(self->names, key, value) < 0) {
2237 Py_DECREF(key);
2238 Py_DECREF(value);
2239 return NULL;
2240 }
2241 }
2242
2243 Py_DECREF(key);
2244 return value;
2245}
2246
Eli Bendersky5b77d812012-03-16 08:20:05 +02002247/* Set the ParseError exception with the given parameters.
2248 * If message is not NULL, it's used as the error string. Otherwise, the
2249 * message string is the default for the given error_code.
2250*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002251static void
Eli Bendersky5b77d812012-03-16 08:20:05 +02002252expat_set_error(enum XML_Error error_code, int line, int column, char *message)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002253{
Eli Bendersky5b77d812012-03-16 08:20:05 +02002254 PyObject *errmsg, *error, *position, *code;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002255
Victor Stinner499dfcf2011-03-21 13:26:24 +01002256 errmsg = PyUnicode_FromFormat("%s: line %d, column %d",
Eli Bendersky5b77d812012-03-16 08:20:05 +02002257 message ? message : EXPAT(ErrorString)(error_code),
2258 line, column);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002259 if (errmsg == NULL)
2260 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002261
Victor Stinner499dfcf2011-03-21 13:26:24 +01002262 error = PyObject_CallFunction(elementtree_parseerror_obj, "O", errmsg);
2263 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002264 if (!error)
2265 return;
2266
Eli Bendersky5b77d812012-03-16 08:20:05 +02002267 /* Add code and position attributes */
2268 code = PyLong_FromLong((long)error_code);
2269 if (!code) {
2270 Py_DECREF(error);
2271 return;
2272 }
2273 if (PyObject_SetAttrString(error, "code", code) == -1) {
2274 Py_DECREF(error);
2275 Py_DECREF(code);
2276 return;
2277 }
2278 Py_DECREF(code);
2279
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002280 position = Py_BuildValue("(ii)", line, column);
2281 if (!position) {
2282 Py_DECREF(error);
2283 return;
2284 }
2285 if (PyObject_SetAttrString(error, "position", position) == -1) {
2286 Py_DECREF(error);
2287 Py_DECREF(position);
2288 return;
2289 }
2290 Py_DECREF(position);
2291
2292 PyErr_SetObject(elementtree_parseerror_obj, error);
2293 Py_DECREF(error);
2294}
2295
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002296/* -------------------------------------------------------------------- */
2297/* handlers */
2298
2299static void
2300expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2301 int data_len)
2302{
2303 PyObject* key;
2304 PyObject* value;
2305 PyObject* res;
2306
2307 if (data_len < 2 || data_in[0] != '&')
2308 return;
2309
Neal Norwitz0269b912007-08-08 06:56:02 +00002310 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002311 if (!key)
2312 return;
2313
2314 value = PyDict_GetItem(self->entity, key);
2315
2316 if (value) {
2317 if (TreeBuilder_CheckExact(self->target))
2318 res = treebuilder_handle_data(
2319 (TreeBuilderObject*) self->target, value
2320 );
2321 else if (self->handle_data)
2322 res = PyObject_CallFunction(self->handle_data, "O", value);
2323 else
2324 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002325 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002326 } else if (!PyErr_Occurred()) {
2327 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00002328 char message[128] = "undefined entity ";
2329 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002330 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02002331 XML_ERROR_UNDEFINED_ENTITY,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002332 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02002333 EXPAT(GetErrorColumnNumber)(self->parser),
2334 message
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002335 );
2336 }
2337
2338 Py_DECREF(key);
2339}
2340
2341static void
2342expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2343 const XML_Char **attrib_in)
2344{
2345 PyObject* res;
2346 PyObject* tag;
2347 PyObject* attrib;
2348 int ok;
2349
2350 /* tag name */
2351 tag = makeuniversal(self, tag_in);
2352 if (!tag)
2353 return; /* parser will look for errors */
2354
2355 /* attributes */
2356 if (attrib_in[0]) {
2357 attrib = PyDict_New();
2358 if (!attrib)
2359 return;
2360 while (attrib_in[0] && attrib_in[1]) {
2361 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00002362 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002363 if (!key || !value) {
2364 Py_XDECREF(value);
2365 Py_XDECREF(key);
2366 Py_DECREF(attrib);
2367 return;
2368 }
2369 ok = PyDict_SetItem(attrib, key, value);
2370 Py_DECREF(value);
2371 Py_DECREF(key);
2372 if (ok < 0) {
2373 Py_DECREF(attrib);
2374 return;
2375 }
2376 attrib_in += 2;
2377 }
2378 } else {
2379 Py_INCREF(Py_None);
2380 attrib = Py_None;
2381 }
2382
2383 if (TreeBuilder_CheckExact(self->target))
2384 /* shortcut */
2385 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2386 tag, attrib);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002387 else if (self->handle_start) {
2388 if (attrib == Py_None) {
2389 Py_DECREF(attrib);
2390 attrib = PyDict_New();
2391 if (!attrib)
2392 return;
2393 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002394 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002395 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002396 res = NULL;
2397
2398 Py_DECREF(tag);
2399 Py_DECREF(attrib);
2400
2401 Py_XDECREF(res);
2402}
2403
2404static void
2405expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2406 int data_len)
2407{
2408 PyObject* data;
2409 PyObject* res;
2410
Neal Norwitz0269b912007-08-08 06:56:02 +00002411 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002412 if (!data)
2413 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002414
2415 if (TreeBuilder_CheckExact(self->target))
2416 /* shortcut */
2417 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
2418 else if (self->handle_data)
2419 res = PyObject_CallFunction(self->handle_data, "O", data);
2420 else
2421 res = NULL;
2422
2423 Py_DECREF(data);
2424
2425 Py_XDECREF(res);
2426}
2427
2428static void
2429expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
2430{
2431 PyObject* tag;
2432 PyObject* res = NULL;
2433
2434 if (TreeBuilder_CheckExact(self->target))
2435 /* shortcut */
2436 /* the standard tree builder doesn't look at the end tag */
2437 res = treebuilder_handle_end(
2438 (TreeBuilderObject*) self->target, Py_None
2439 );
2440 else if (self->handle_end) {
2441 tag = makeuniversal(self, tag_in);
2442 if (tag) {
2443 res = PyObject_CallFunction(self->handle_end, "O", tag);
2444 Py_DECREF(tag);
2445 }
2446 }
2447
2448 Py_XDECREF(res);
2449}
2450
2451static void
2452expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
2453 const XML_Char *uri)
2454{
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002455 PyObject* sprefix = NULL;
2456 PyObject* suri = NULL;
2457
2458 suri = PyUnicode_DecodeUTF8(uri, strlen(uri), "strict");
2459 if (!suri)
2460 return;
2461
2462 if (prefix)
2463 sprefix = PyUnicode_DecodeUTF8(prefix, strlen(prefix), "strict");
2464 else
2465 sprefix = PyUnicode_FromString("");
2466 if (!sprefix) {
2467 Py_DECREF(suri);
2468 return;
2469 }
2470
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002471 treebuilder_handle_namespace(
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002472 (TreeBuilderObject*) self->target, 1, sprefix, suri
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002473 );
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002474
2475 Py_DECREF(sprefix);
2476 Py_DECREF(suri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002477}
2478
2479static void
2480expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
2481{
2482 treebuilder_handle_namespace(
2483 (TreeBuilderObject*) self->target, 0, NULL, NULL
2484 );
2485}
2486
2487static void
2488expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
2489{
2490 PyObject* comment;
2491 PyObject* res;
2492
2493 if (self->handle_comment) {
Neal Norwitz0269b912007-08-08 06:56:02 +00002494 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002495 if (comment) {
2496 res = PyObject_CallFunction(self->handle_comment, "O", comment);
2497 Py_XDECREF(res);
2498 Py_DECREF(comment);
2499 }
2500 }
2501}
2502
2503static void
2504expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
2505 const XML_Char* data_in)
2506{
2507 PyObject* target;
2508 PyObject* data;
2509 PyObject* res;
2510
2511 if (self->handle_pi) {
Neal Norwitz0269b912007-08-08 06:56:02 +00002512 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
2513 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002514 if (target && data) {
2515 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
2516 Py_XDECREF(res);
2517 Py_DECREF(data);
2518 Py_DECREF(target);
2519 } else {
2520 Py_XDECREF(data);
2521 Py_XDECREF(target);
2522 }
2523 }
2524}
2525
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002526static int
2527expat_unknown_encoding_handler(XMLParserObject *self, const XML_Char *name,
2528 XML_Encoding *info)
2529{
2530 PyObject* u;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002531 unsigned char s[256];
2532 int i;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002533 void *data;
2534 unsigned int kind;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002535
2536 memset(info, 0, sizeof(XML_Encoding));
2537
2538 for (i = 0; i < 256; i++)
2539 s[i] = i;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002540
Fredrik Lundhc3389992005-12-25 11:40:19 +00002541 u = PyUnicode_Decode((char*) s, 256, name, "replace");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002542 if (!u)
2543 return XML_STATUS_ERROR;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002544 if (PyUnicode_READY(u))
2545 return XML_STATUS_ERROR;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002546
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002547 if (PyUnicode_GET_LENGTH(u) != 256) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002548 Py_DECREF(u);
2549 return XML_STATUS_ERROR;
2550 }
2551
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002552 kind = PyUnicode_KIND(u);
2553 data = PyUnicode_DATA(u);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002554 for (i = 0; i < 256; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002555 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
2556 if (ch != Py_UNICODE_REPLACEMENT_CHARACTER)
2557 info->map[i] = ch;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002558 else
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002559 info->map[i] = -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002560 }
2561
2562 Py_DECREF(u);
2563
2564 return XML_STATUS_OK;
2565}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002566
2567/* -------------------------------------------------------------------- */
2568/* constructor and destructor */
2569
2570static PyObject*
Thomas Wouters73e5a5b2006-06-08 15:35:45 +00002571xmlparser(PyObject* self_, PyObject* args, PyObject* kw)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002572{
2573 XMLParserObject* self;
2574 /* FIXME: does this need to be static? */
2575 static XML_Memory_Handling_Suite memory_handler;
2576
2577 PyObject* target = NULL;
2578 char* encoding = NULL;
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +00002579 static char* kwlist[] = { "target", "encoding", NULL };
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002580 if (!PyArg_ParseTupleAndKeywords(args, kw, "|Oz:XMLParser", kwlist,
2581 &target, &encoding))
2582 return NULL;
2583
2584#if defined(USE_PYEXPAT_CAPI)
2585 if (!expat_capi) {
2586 PyErr_SetString(
2587 PyExc_RuntimeError, "cannot load dispatch table from pyexpat"
2588 );
2589 return NULL;
2590 }
2591#endif
2592
2593 self = PyObject_New(XMLParserObject, &XMLParser_Type);
2594 if (self == NULL)
2595 return NULL;
2596
2597 self->entity = PyDict_New();
2598 if (!self->entity) {
2599 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002600 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002601 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002602
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002603 self->names = PyDict_New();
2604 if (!self->names) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002605 PyObject_Del(self->entity);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002606 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002607 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002608 }
2609
2610 memory_handler.malloc_fcn = PyObject_Malloc;
2611 memory_handler.realloc_fcn = PyObject_Realloc;
2612 memory_handler.free_fcn = PyObject_Free;
2613
2614 self->parser = EXPAT(ParserCreate_MM)(encoding, &memory_handler, "}");
2615 if (!self->parser) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002616 PyObject_Del(self->names);
2617 PyObject_Del(self->entity);
2618 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002619 PyErr_NoMemory();
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002620 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002621 }
2622
2623 /* setup target handlers */
2624 if (!target) {
2625 target = treebuilder_new();
2626 if (!target) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002627 EXPAT(ParserFree)(self->parser);
2628 PyObject_Del(self->names);
2629 PyObject_Del(self->entity);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002630 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002631 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002632 }
2633 } else
2634 Py_INCREF(target);
2635 self->target = target;
2636
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002637 self->handle_start = PyObject_GetAttrString(target, "start");
2638 self->handle_data = PyObject_GetAttrString(target, "data");
2639 self->handle_end = PyObject_GetAttrString(target, "end");
2640 self->handle_comment = PyObject_GetAttrString(target, "comment");
2641 self->handle_pi = PyObject_GetAttrString(target, "pi");
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002642 self->handle_close = PyObject_GetAttrString(target, "close");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002643
2644 PyErr_Clear();
2645
2646 /* configure parser */
2647 EXPAT(SetUserData)(self->parser, self);
2648 EXPAT(SetElementHandler)(
2649 self->parser,
2650 (XML_StartElementHandler) expat_start_handler,
2651 (XML_EndElementHandler) expat_end_handler
2652 );
2653 EXPAT(SetDefaultHandlerExpand)(
2654 self->parser,
2655 (XML_DefaultHandler) expat_default_handler
2656 );
2657 EXPAT(SetCharacterDataHandler)(
2658 self->parser,
2659 (XML_CharacterDataHandler) expat_data_handler
2660 );
2661 if (self->handle_comment)
2662 EXPAT(SetCommentHandler)(
2663 self->parser,
2664 (XML_CommentHandler) expat_comment_handler
2665 );
2666 if (self->handle_pi)
2667 EXPAT(SetProcessingInstructionHandler)(
2668 self->parser,
2669 (XML_ProcessingInstructionHandler) expat_pi_handler
2670 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002671 EXPAT(SetUnknownEncodingHandler)(
2672 self->parser,
2673 (XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL
2674 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002675
2676 ALLOC(sizeof(XMLParserObject), "create expatparser");
2677
2678 return (PyObject*) self;
2679}
2680
2681static void
2682xmlparser_dealloc(XMLParserObject* self)
2683{
2684 EXPAT(ParserFree)(self->parser);
2685
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002686 Py_XDECREF(self->handle_close);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002687 Py_XDECREF(self->handle_pi);
2688 Py_XDECREF(self->handle_comment);
2689 Py_XDECREF(self->handle_end);
2690 Py_XDECREF(self->handle_data);
2691 Py_XDECREF(self->handle_start);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002692
2693 Py_DECREF(self->target);
2694 Py_DECREF(self->entity);
2695 Py_DECREF(self->names);
2696
2697 RELEASE(sizeof(XMLParserObject), "destroy expatparser");
2698
2699 PyObject_Del(self);
2700}
2701
2702/* -------------------------------------------------------------------- */
2703/* methods (in alphabetical order) */
2704
2705LOCAL(PyObject*)
2706expat_parse(XMLParserObject* self, char* data, int data_len, int final)
2707{
2708 int ok;
2709
2710 ok = EXPAT(Parse)(self->parser, data, data_len, final);
2711
2712 if (PyErr_Occurred())
2713 return NULL;
2714
2715 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002716 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02002717 EXPAT(GetErrorCode)(self->parser),
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002718 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02002719 EXPAT(GetErrorColumnNumber)(self->parser),
2720 NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002721 );
2722 return NULL;
2723 }
2724
2725 Py_RETURN_NONE;
2726}
2727
2728static PyObject*
2729xmlparser_close(XMLParserObject* self, PyObject* args)
2730{
2731 /* end feeding data to parser */
2732
2733 PyObject* res;
2734 if (!PyArg_ParseTuple(args, ":close"))
2735 return NULL;
2736
2737 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002738 if (!res)
2739 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002740
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002741 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002742 Py_DECREF(res);
2743 return treebuilder_done((TreeBuilderObject*) self->target);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002744 } if (self->handle_close) {
2745 Py_DECREF(res);
2746 return PyObject_CallFunction(self->handle_close, "");
2747 } else
2748 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002749}
2750
2751static PyObject*
2752xmlparser_feed(XMLParserObject* self, PyObject* args)
2753{
2754 /* feed data to parser */
2755
2756 char* data;
2757 int data_len;
2758 if (!PyArg_ParseTuple(args, "s#:feed", &data, &data_len))
2759 return NULL;
2760
2761 return expat_parse(self, data, data_len, 0);
2762}
2763
2764static PyObject*
2765xmlparser_parse(XMLParserObject* self, PyObject* args)
2766{
2767 /* (internal) parse until end of input stream */
2768
2769 PyObject* reader;
2770 PyObject* buffer;
Eli Benderskyf996e772012-03-16 05:53:30 +02002771 PyObject* temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002772 PyObject* res;
2773
2774 PyObject* fileobj;
2775 if (!PyArg_ParseTuple(args, "O:_parse", &fileobj))
2776 return NULL;
2777
2778 reader = PyObject_GetAttrString(fileobj, "read");
2779 if (!reader)
2780 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002781
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002782 /* read from open file object */
2783 for (;;) {
2784
2785 buffer = PyObject_CallFunction(reader, "i", 64*1024);
2786
2787 if (!buffer) {
2788 /* read failed (e.g. due to KeyboardInterrupt) */
2789 Py_DECREF(reader);
2790 return NULL;
2791 }
2792
Eli Benderskyf996e772012-03-16 05:53:30 +02002793 if (PyUnicode_CheckExact(buffer)) {
2794 /* A unicode object is encoded into bytes using UTF-8 */
2795 if (PyUnicode_GET_SIZE(buffer) == 0) {
2796 Py_DECREF(buffer);
2797 break;
2798 }
2799 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
2800 if (!temp) {
2801 /* Propagate exception from PyUnicode_AsEncodedString */
2802 Py_DECREF(buffer);
2803 Py_DECREF(reader);
2804 return NULL;
2805 }
2806
2807 /* Here we no longer need the original buffer since it contains
2808 * unicode. Make it point to the encoded bytes object.
2809 */
2810 Py_DECREF(buffer);
2811 buffer = temp;
2812 }
2813 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002814 Py_DECREF(buffer);
2815 break;
2816 }
2817
2818 res = expat_parse(
Christian Heimes72b710a2008-05-26 13:28:38 +00002819 self, PyBytes_AS_STRING(buffer), PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002820 );
2821
2822 Py_DECREF(buffer);
2823
2824 if (!res) {
2825 Py_DECREF(reader);
2826 return NULL;
2827 }
2828 Py_DECREF(res);
2829
2830 }
2831
2832 Py_DECREF(reader);
2833
2834 res = expat_parse(self, "", 0, 1);
2835
2836 if (res && TreeBuilder_CheckExact(self->target)) {
2837 Py_DECREF(res);
2838 return treebuilder_done((TreeBuilderObject*) self->target);
2839 }
2840
2841 return res;
2842}
2843
2844static PyObject*
2845xmlparser_setevents(XMLParserObject* self, PyObject* args)
2846{
2847 /* activate element event reporting */
2848
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002849 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002850 TreeBuilderObject* target;
2851
2852 PyObject* events; /* event collector */
2853 PyObject* event_set = Py_None;
2854 if (!PyArg_ParseTuple(args, "O!|O:_setevents", &PyList_Type, &events,
2855 &event_set))
2856 return NULL;
2857
2858 if (!TreeBuilder_CheckExact(self->target)) {
2859 PyErr_SetString(
2860 PyExc_TypeError,
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01002861 "event handling only supported for ElementTree.TreeBuilder "
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002862 "targets"
2863 );
2864 return NULL;
2865 }
2866
2867 target = (TreeBuilderObject*) self->target;
2868
2869 Py_INCREF(events);
2870 Py_XDECREF(target->events);
2871 target->events = events;
2872
2873 /* clear out existing events */
2874 Py_XDECREF(target->start_event_obj); target->start_event_obj = NULL;
2875 Py_XDECREF(target->end_event_obj); target->end_event_obj = NULL;
2876 Py_XDECREF(target->start_ns_event_obj); target->start_ns_event_obj = NULL;
2877 Py_XDECREF(target->end_ns_event_obj); target->end_ns_event_obj = NULL;
2878
2879 if (event_set == Py_None) {
2880 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002881 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002882 Py_RETURN_NONE;
2883 }
2884
2885 if (!PyTuple_Check(event_set)) /* FIXME: handle arbitrary sequences */
2886 goto error;
2887
2888 for (i = 0; i < PyTuple_GET_SIZE(event_set); i++) {
2889 PyObject* item = PyTuple_GET_ITEM(event_set, i);
2890 char* event;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002891 if (PyUnicode_Check(item)) {
2892 event = _PyUnicode_AsString(item);
Victor Stinner0477bf32010-03-22 12:11:44 +00002893 if (event == NULL)
2894 goto error;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002895 } else if (PyBytes_Check(item))
2896 event = PyBytes_AS_STRING(item);
2897 else {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002898 goto error;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002899 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002900 if (strcmp(event, "start") == 0) {
2901 Py_INCREF(item);
2902 target->start_event_obj = item;
2903 } else if (strcmp(event, "end") == 0) {
2904 Py_INCREF(item);
2905 Py_XDECREF(target->end_event_obj);
2906 target->end_event_obj = item;
2907 } else if (strcmp(event, "start-ns") == 0) {
2908 Py_INCREF(item);
2909 Py_XDECREF(target->start_ns_event_obj);
2910 target->start_ns_event_obj = item;
2911 EXPAT(SetNamespaceDeclHandler)(
2912 self->parser,
2913 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2914 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2915 );
2916 } else if (strcmp(event, "end-ns") == 0) {
2917 Py_INCREF(item);
2918 Py_XDECREF(target->end_ns_event_obj);
2919 target->end_ns_event_obj = item;
2920 EXPAT(SetNamespaceDeclHandler)(
2921 self->parser,
2922 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2923 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2924 );
2925 } else {
2926 PyErr_Format(
2927 PyExc_ValueError,
2928 "unknown event '%s'", event
2929 );
2930 return NULL;
2931 }
2932 }
2933
2934 Py_RETURN_NONE;
2935
2936 error:
2937 PyErr_SetString(
2938 PyExc_TypeError,
2939 "invalid event tuple"
2940 );
2941 return NULL;
2942}
2943
2944static PyMethodDef xmlparser_methods[] = {
2945 {"feed", (PyCFunction) xmlparser_feed, METH_VARARGS},
2946 {"close", (PyCFunction) xmlparser_close, METH_VARARGS},
2947 {"_parse", (PyCFunction) xmlparser_parse, METH_VARARGS},
2948 {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
2949 {NULL, NULL}
2950};
2951
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002952static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002953xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002954{
Alexander Belopolskye239d232010-12-08 23:31:48 +00002955 if (PyUnicode_Check(nameobj)) {
2956 PyObject* res;
2957 if (PyUnicode_CompareWithASCIIString(nameobj, "entity") == 0)
2958 res = self->entity;
2959 else if (PyUnicode_CompareWithASCIIString(nameobj, "target") == 0)
2960 res = self->target;
2961 else if (PyUnicode_CompareWithASCIIString(nameobj, "version") == 0) {
2962 return PyUnicode_FromFormat(
2963 "Expat %d.%d.%d", XML_MAJOR_VERSION,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002964 XML_MINOR_VERSION, XML_MICRO_VERSION);
Alexander Belopolskye239d232010-12-08 23:31:48 +00002965 }
2966 else
2967 goto generic;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002968
Alexander Belopolskye239d232010-12-08 23:31:48 +00002969 Py_INCREF(res);
2970 return res;
2971 }
2972 generic:
2973 return PyObject_GenericGetAttr((PyObject*) self, nameobj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002974}
2975
Neal Norwitz227b5332006-03-22 09:28:35 +00002976static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002977 PyVarObject_HEAD_INIT(NULL, 0)
2978 "XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002979 /* methods */
2980 (destructor)xmlparser_dealloc, /* tp_dealloc */
2981 0, /* tp_print */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002982 0, /* tp_getattr */
2983 0, /* tp_setattr */
Mark Dickinsone94c6792009-02-02 20:36:42 +00002984 0, /* tp_reserved */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002985 0, /* tp_repr */
2986 0, /* tp_as_number */
2987 0, /* tp_as_sequence */
2988 0, /* tp_as_mapping */
2989 0, /* tp_hash */
2990 0, /* tp_call */
2991 0, /* tp_str */
2992 (getattrofunc)xmlparser_getattro, /* tp_getattro */
2993 0, /* tp_setattro */
2994 0, /* tp_as_buffer */
2995 Py_TPFLAGS_DEFAULT, /* tp_flags */
2996 0, /* tp_doc */
2997 0, /* tp_traverse */
2998 0, /* tp_clear */
2999 0, /* tp_richcompare */
3000 0, /* tp_weaklistoffset */
3001 0, /* tp_iter */
3002 0, /* tp_iternext */
3003 xmlparser_methods, /* tp_methods */
3004 0, /* tp_members */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003005};
3006
3007#endif
3008
3009/* ==================================================================== */
3010/* python module interface */
3011
3012static PyMethodDef _functions[] = {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003013 {"SubElement", (PyCFunction) subelement, METH_VARARGS|METH_KEYWORDS},
3014 {"TreeBuilder", (PyCFunction) treebuilder, METH_VARARGS},
3015#if defined(USE_EXPAT)
3016 {"XMLParser", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003017#endif
3018 {NULL, NULL}
3019};
3020
Martin v. Löwis1a214512008-06-11 05:26:20 +00003021
3022static struct PyModuleDef _elementtreemodule = {
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003023 PyModuleDef_HEAD_INIT,
3024 "_elementtree",
3025 NULL,
3026 -1,
3027 _functions,
3028 NULL,
3029 NULL,
3030 NULL,
3031 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00003032};
3033
Neal Norwitzf6657e62006-12-28 04:47:50 +00003034PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00003035PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003036{
Eli Bendersky828efde2012-04-05 05:40:58 +03003037 PyObject *m, *g, *temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003038 char* bootstrap;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003039
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003040 /* Initialize object types */
3041 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003042 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003043 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003044 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003045#if defined(USE_EXPAT)
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003046 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003047 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003048#endif
3049
Martin v. Löwis1a214512008-06-11 05:26:20 +00003050 m = PyModule_Create(&_elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003051 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00003052 return NULL;
3053
3054 /* The code below requires that the module gets already added
3055 to sys.modules. */
3056 PyDict_SetItemString(PyImport_GetModuleDict(),
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003057 _elementtreemodule.m_name,
3058 m);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003059
3060 /* python glue code */
3061
3062 g = PyDict_New();
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003063 if (!g)
Martin v. Löwis1a214512008-06-11 05:26:20 +00003064 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003065
3066 PyDict_SetItemString(g, "__builtins__", PyEval_GetBuiltins());
3067
3068 bootstrap = (
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003069 "def iter(node, tag=None):\n" /* helper */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003070 " if tag == '*':\n"
3071 " tag = None\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003072 " if tag is None or node.tag == tag:\n"
3073 " yield node\n"
3074 " for node in node:\n"
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003075 " for node in iter(node, tag):\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003076 " yield node\n"
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003077
3078 "def itertext(node):\n" /* helper */
3079 " if node.text:\n"
3080 " yield node.text\n"
3081 " for e in node:\n"
3082 " for s in e.itertext():\n"
3083 " yield s\n"
3084 " if e.tail:\n"
3085 " yield e.tail\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003086
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003087 );
3088
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003089 if (!PyRun_String(bootstrap, Py_file_input, g, NULL))
3090 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003091
Eli Bendersky828efde2012-04-05 05:40:58 +03003092 if (!(temp = PyImport_ImportModule("copy")))
3093 return NULL;
3094 elementtree_deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
3095 Py_XDECREF(temp);
3096
3097 if (!(elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
3098 return NULL;
3099
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003100 elementtree_iter_obj = PyDict_GetItemString(g, "iter");
3101 elementtree_itertext_obj = PyDict_GetItemString(g, "itertext");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003102
3103#if defined(USE_PYEXPAT_CAPI)
3104 /* link against pyexpat, if possible */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003105 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
3106 if (expat_capi) {
3107 /* check that it's usable */
3108 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
3109 expat_capi->size < sizeof(struct PyExpat_CAPI) ||
3110 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
3111 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
3112 expat_capi->MICRO_VERSION != XML_MICRO_VERSION)
3113 expat_capi = NULL;
3114 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003115#endif
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003116
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003117 elementtree_parseerror_obj = PyErr_NewException(
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003118 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003119 );
3120 Py_INCREF(elementtree_parseerror_obj);
3121 PyModule_AddObject(m, "ParseError", elementtree_parseerror_obj);
3122
Eli Bendersky092af1f2012-03-04 07:14:03 +02003123 Py_INCREF((PyObject *)&Element_Type);
3124 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
3125
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003126 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003127}