blob: 5425269c1ad643e882da5f26d5d8f922bf5bb6cc [file] [log] [blame]
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001/*
2 * ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003 * $Id: _elementtree.c 3473 2009-01-11 22:53:55Z fredrik $
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
5 * elementtree accelerator
6 *
7 * History:
8 * 1999-06-20 fl created (as part of sgmlop)
9 * 2001-05-29 fl effdom edition
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000010 * 2003-02-27 fl elementtree edition (alpha)
11 * 2004-06-03 fl updates for elementtree 1.2
Florent Xiclunaf15351d2010-03-13 23:24:31 +000012 * 2005-01-05 fl major optimization effort
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000013 * 2005-01-11 fl first public release (cElementTree 0.8)
14 * 2005-01-12 fl split element object into base and extras
15 * 2005-01-13 fl use tagged pointers for tail/text (cElementTree 0.9)
16 * 2005-01-17 fl added treebuilder close method
17 * 2005-01-17 fl fixed crash in getchildren
18 * 2005-01-18 fl removed observer api, added iterparse (cElementTree 0.9.3)
19 * 2005-01-23 fl revised iterparse api; added namespace event support (0.9.8)
20 * 2005-01-26 fl added VERSION module property (cElementTree 1.0)
21 * 2005-01-28 fl added remove method (1.0.1)
22 * 2005-03-01 fl added iselement function; fixed makeelement aliasing (1.0.2)
23 * 2005-03-13 fl export Comment and ProcessingInstruction/PI helpers
24 * 2005-03-26 fl added Comment and PI support to XMLParser
25 * 2005-03-27 fl event optimizations; complain about bogus events
26 * 2005-08-08 fl fixed read error handling in parse
27 * 2005-08-11 fl added runtime test for copy workaround (1.0.3)
28 * 2005-12-13 fl added expat_capi support (for xml.etree) (1.0.4)
29 * 2005-12-16 fl added support for non-standard encodings
Fredrik Lundh44ed4db2006-03-12 21:06:35 +000030 * 2006-03-08 fl fixed a couple of potential null-refs and leaks
31 * 2006-03-12 fl merge in 2.5 ssize_t changes
Florent Xiclunaf15351d2010-03-13 23:24:31 +000032 * 2007-08-25 fl call custom builder's close method from XMLParser
33 * 2007-08-31 fl added iter, extend from ET 1.3
34 * 2007-09-01 fl fixed ParseError exception, setslice source type, etc
35 * 2007-09-03 fl fixed handling of negative insert indexes
36 * 2007-09-04 fl added itertext from ET 1.3
37 * 2007-09-06 fl added position attribute to ParseError exception
38 * 2008-06-06 fl delay error reporting in iterparse (from Hrvoje Niksic)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000039 *
Florent Xiclunaf15351d2010-03-13 23:24:31 +000040 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
41 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000042 *
43 * info@pythonware.com
44 * http://www.pythonware.com
45 */
46
Fredrik Lundh6d52b552005-12-16 22:06:43 +000047/* Licensed to PSF under a Contributor Agreement. */
Florent Xiclunaf15351d2010-03-13 23:24:31 +000048/* See http://www.python.org/psf/license for licensing details. */
Fredrik Lundh6d52b552005-12-16 22:06:43 +000049
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000050#include "Python.h"
Eli Benderskyebf37a22012-04-03 22:02:37 +030051#include "structmember.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000052
Thomas Wouters00ee7ba2006-08-21 19:07:27 +000053#define VERSION "1.0.6"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000054
55/* -------------------------------------------------------------------- */
56/* configuration */
57
58/* Leave defined to include the expat-based XMLParser type */
59#define USE_EXPAT
60
Florent Xiclunaf15351d2010-03-13 23:24:31 +000061/* Define to do all expat calls via pyexpat's embedded expat library */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000062/* #define USE_PYEXPAT_CAPI */
63
64/* An element can hold this many children without extra memory
65 allocations. */
66#define STATIC_CHILDREN 4
67
68/* For best performance, chose a value so that 80-90% of all nodes
69 have no more than the given number of children. Set this to zero
70 to minimize the size of the element structure itself (this only
71 helps if you have lots of leaf nodes with attributes). */
72
73/* Also note that pymalloc always allocates blocks in multiples of
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010074 eight bytes. For the current C version of ElementTree, this means
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000075 that the number of children should be an even number, at least on
76 32-bit platforms. */
77
78/* -------------------------------------------------------------------- */
79
80#if 0
81static int memory = 0;
82#define ALLOC(size, comment)\
83do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
84#define RELEASE(size, comment)\
85do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
86#else
87#define ALLOC(size, comment)
88#define RELEASE(size, comment)
89#endif
90
91/* compiler tweaks */
92#if defined(_MSC_VER)
93#define LOCAL(type) static __inline type __fastcall
94#else
95#define LOCAL(type) static type
96#endif
97
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000098/* macros used to store 'join' flags in string object pointers. note
99 that all use of text and tail as object pointers must be wrapped in
100 JOIN_OBJ. see comments in the ElementObject definition for more
101 info. */
102#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
103#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
104#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~1))
105
106/* glue functions (see the init function for details) */
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000107static PyObject* elementtree_parseerror_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000108static PyObject* elementtree_deepcopy_obj;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000109static PyObject* elementtree_iter_obj;
110static PyObject* elementtree_itertext_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000111static PyObject* elementpath_obj;
112
113/* helpers */
114
115LOCAL(PyObject*)
116deepcopy(PyObject* object, PyObject* memo)
117{
118 /* do a deep copy of the given object */
119
120 PyObject* args;
121 PyObject* result;
122
123 if (!elementtree_deepcopy_obj) {
124 PyErr_SetString(
125 PyExc_RuntimeError,
126 "deepcopy helper not found"
127 );
128 return NULL;
129 }
130
131 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000132 if (!args)
133 return NULL;
134
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000135 Py_INCREF(object); PyTuple_SET_ITEM(args, 0, (PyObject*) object);
136 Py_INCREF(memo); PyTuple_SET_ITEM(args, 1, (PyObject*) memo);
137
138 result = PyObject_CallObject(elementtree_deepcopy_obj, args);
139
140 Py_DECREF(args);
141
142 return result;
143}
144
145LOCAL(PyObject*)
146list_join(PyObject* list)
147{
148 /* join list elements (destroying the list in the process) */
149
150 PyObject* joiner;
151 PyObject* function;
152 PyObject* args;
153 PyObject* result;
154
155 switch (PyList_GET_SIZE(list)) {
156 case 0:
157 Py_DECREF(list);
Christian Heimes72b710a2008-05-26 13:28:38 +0000158 return PyBytes_FromString("");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000159 case 1:
160 result = PyList_GET_ITEM(list, 0);
161 Py_INCREF(result);
162 Py_DECREF(list);
163 return result;
164 }
165
166 /* two or more elements: slice out a suitable separator from the
167 first member, and use that to join the entire list */
168
169 joiner = PySequence_GetSlice(PyList_GET_ITEM(list, 0), 0, 0);
170 if (!joiner)
171 return NULL;
172
173 function = PyObject_GetAttrString(joiner, "join");
174 if (!function) {
175 Py_DECREF(joiner);
176 return NULL;
177 }
178
179 args = PyTuple_New(1);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000180 if (!args)
181 return NULL;
182
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000183 PyTuple_SET_ITEM(args, 0, list);
184
185 result = PyObject_CallObject(function, args);
186
187 Py_DECREF(args); /* also removes list */
188 Py_DECREF(function);
189 Py_DECREF(joiner);
190
191 return result;
192}
193
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000194/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200195/* the Element type */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000196
197typedef struct {
198
199 /* attributes (a dictionary object), or None if no attributes */
200 PyObject* attrib;
201
202 /* child elements */
203 int length; /* actual number of items */
204 int allocated; /* allocated items */
205
206 /* this either points to _children or to a malloced buffer */
207 PyObject* *children;
208
209 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100210
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000211} ElementObjectExtra;
212
213typedef struct {
214 PyObject_HEAD
215
216 /* element tag (a string). */
217 PyObject* tag;
218
219 /* text before first child. note that this is a tagged pointer;
220 use JOIN_OBJ to get the object pointer. the join flag is used
221 to distinguish lists created by the tree builder from lists
222 assigned to the attribute by application code; the former
223 should be joined before being returned to the user, the latter
224 should be left intact. */
225 PyObject* text;
226
227 /* text after this element, in parent. note that this is a tagged
228 pointer; use JOIN_OBJ to get the object pointer. */
229 PyObject* tail;
230
231 ElementObjectExtra* extra;
232
Eli Benderskyebf37a22012-04-03 22:02:37 +0300233 PyObject *weakreflist; /* For tp_weaklistoffset */
234
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000235} ElementObject;
236
Neal Norwitz227b5332006-03-22 09:28:35 +0000237static PyTypeObject Element_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000238
Christian Heimes90aa7642007-12-19 02:45:37 +0000239#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000240
241/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200242/* Element constructors and destructor */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000243
244LOCAL(int)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200245create_extra(ElementObject* self, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000246{
247 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
248 if (!self->extra)
249 return -1;
250
251 if (!attrib)
252 attrib = Py_None;
253
254 Py_INCREF(attrib);
255 self->extra->attrib = attrib;
256
257 self->extra->length = 0;
258 self->extra->allocated = STATIC_CHILDREN;
259 self->extra->children = self->extra->_children;
260
261 return 0;
262}
263
264LOCAL(void)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200265dealloc_extra(ElementObject* self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000266{
Eli Benderskyebf37a22012-04-03 22:02:37 +0300267 if (!self->extra)
268 return;
269
270 /* Avoid DECREFs calling into this code again (cycles, etc.)
271 */
272 ElementObjectExtra *myextra = self->extra;
273 self->extra = NULL;
274
275 Py_DECREF(myextra->attrib);
276
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000277 int i;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300278 for (i = 0; i < myextra->length; i++)
279 Py_DECREF(myextra->children[i]);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000280
Eli Benderskyebf37a22012-04-03 22:02:37 +0300281 if (myextra->children != myextra->_children)
282 PyObject_Free(myextra->children);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000283
Eli Benderskyebf37a22012-04-03 22:02:37 +0300284 PyObject_Free(myextra);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000285}
286
Eli Bendersky092af1f2012-03-04 07:14:03 +0200287/* Convenience internal function to create new Element objects with the given
288 * tag and attributes.
289*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000290LOCAL(PyObject*)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200291create_new_element(PyObject* tag, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000292{
293 ElementObject* self;
294
Eli Bendersky0192ba32012-03-30 16:38:33 +0300295 self = PyObject_GC_New(ElementObject, &Element_Type);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000296 if (self == NULL)
297 return NULL;
298
299 /* use None for empty dictionaries */
300 if (PyDict_CheckExact(attrib) && !PyDict_Size(attrib))
301 attrib = Py_None;
302
303 self->extra = NULL;
304
305 if (attrib != Py_None) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200306 if (create_extra(self, attrib) < 0) {
Thomas Wouters477c8d52006-05-27 19:21:47 +0000307 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000308 return NULL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000309 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000310 }
311
312 Py_INCREF(tag);
313 self->tag = tag;
314
315 Py_INCREF(Py_None);
316 self->text = Py_None;
317
318 Py_INCREF(Py_None);
319 self->tail = Py_None;
320
Eli Benderskyebf37a22012-04-03 22:02:37 +0300321 self->weakreflist = NULL;
322
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000323 ALLOC(sizeof(ElementObject), "create element");
Eli Bendersky0192ba32012-03-30 16:38:33 +0300324 PyObject_GC_Track(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000325 return (PyObject*) self;
326}
327
Eli Bendersky092af1f2012-03-04 07:14:03 +0200328static PyObject *
329element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
330{
331 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
332 if (e != NULL) {
333 Py_INCREF(Py_None);
334 e->tag = Py_None;
335
336 Py_INCREF(Py_None);
337 e->text = Py_None;
338
339 Py_INCREF(Py_None);
340 e->tail = Py_None;
341
342 e->extra = NULL;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300343 e->weakreflist = NULL;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200344 }
345 return (PyObject *)e;
346}
347
348static int
349element_init(PyObject *self, PyObject *args, PyObject *kwds)
350{
351 PyObject *tag;
352 PyObject *tmp;
353 PyObject *attrib = NULL;
354 ElementObject *self_elem;
355
356 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
357 return -1;
358
359 if (attrib || kwds) {
360 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
361 if (!attrib)
362 return -1;
363 if (kwds)
364 PyDict_Update(attrib, kwds);
365 } else {
366 Py_INCREF(Py_None);
367 attrib = Py_None;
368 }
369
370 self_elem = (ElementObject *)self;
371
372 /* Use None for empty dictionaries */
373 if (PyDict_CheckExact(attrib) && PyDict_Size(attrib) == 0) {
374 Py_INCREF(Py_None);
375 attrib = Py_None;
376 }
377
378 if (attrib != Py_None) {
379 if (create_extra(self_elem, attrib) < 0) {
380 PyObject_Del(self_elem);
381 return -1;
382 }
383 }
384
385 /* If create_extra needed attrib, it took a reference to it, so we can
386 * release ours anyway.
387 */
388 Py_DECREF(attrib);
389
390 /* Replace the objects already pointed to by tag, text and tail. */
391 tmp = self_elem->tag;
392 self_elem->tag = tag;
393 Py_INCREF(tag);
394 Py_DECREF(tmp);
395
396 tmp = self_elem->text;
397 self_elem->text = Py_None;
398 Py_INCREF(Py_None);
399 Py_DECREF(JOIN_OBJ(tmp));
400
401 tmp = self_elem->tail;
402 self_elem->tail = Py_None;
403 Py_INCREF(Py_None);
404 Py_DECREF(JOIN_OBJ(tmp));
405
406 return 0;
407}
408
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000409LOCAL(int)
410element_resize(ElementObject* self, int extra)
411{
412 int size;
413 PyObject* *children;
414
415 /* make sure self->children can hold the given number of extra
416 elements. set an exception and return -1 if allocation failed */
417
418 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200419 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000420
421 size = self->extra->length + extra;
422
423 if (size > self->extra->allocated) {
424 /* use Python 2.4's list growth strategy */
425 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000426 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100427 * which needs at least 4 bytes.
428 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000429 * be safe.
430 */
431 size = size ? size : 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000432 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000433 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100434 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000435 * false alarm always assume at least one child to be safe.
436 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000437 children = PyObject_Realloc(self->extra->children,
438 size * sizeof(PyObject*));
439 if (!children)
440 goto nomemory;
441 } else {
442 children = PyObject_Malloc(size * sizeof(PyObject*));
443 if (!children)
444 goto nomemory;
445 /* copy existing children from static area to malloc buffer */
446 memcpy(children, self->extra->children,
447 self->extra->length * sizeof(PyObject*));
448 }
449 self->extra->children = children;
450 self->extra->allocated = size;
451 }
452
453 return 0;
454
455 nomemory:
456 PyErr_NoMemory();
457 return -1;
458}
459
460LOCAL(int)
461element_add_subelement(ElementObject* self, PyObject* element)
462{
463 /* add a child element to a parent */
464
465 if (element_resize(self, 1) < 0)
466 return -1;
467
468 Py_INCREF(element);
469 self->extra->children[self->extra->length] = element;
470
471 self->extra->length++;
472
473 return 0;
474}
475
476LOCAL(PyObject*)
477element_get_attrib(ElementObject* self)
478{
479 /* return borrowed reference to attrib dictionary */
480 /* note: this function assumes that the extra section exists */
481
482 PyObject* res = self->extra->attrib;
483
484 if (res == Py_None) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000485 Py_DECREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000486 /* create missing dictionary */
487 res = PyDict_New();
488 if (!res)
489 return NULL;
490 self->extra->attrib = res;
491 }
492
493 return res;
494}
495
496LOCAL(PyObject*)
497element_get_text(ElementObject* self)
498{
499 /* return borrowed reference to text attribute */
500
501 PyObject* res = self->text;
502
503 if (JOIN_GET(res)) {
504 res = JOIN_OBJ(res);
505 if (PyList_CheckExact(res)) {
506 res = list_join(res);
507 if (!res)
508 return NULL;
509 self->text = res;
510 }
511 }
512
513 return res;
514}
515
516LOCAL(PyObject*)
517element_get_tail(ElementObject* self)
518{
519 /* return borrowed reference to text attribute */
520
521 PyObject* res = self->tail;
522
523 if (JOIN_GET(res)) {
524 res = JOIN_OBJ(res);
525 if (PyList_CheckExact(res)) {
526 res = list_join(res);
527 if (!res)
528 return NULL;
529 self->tail = res;
530 }
531 }
532
533 return res;
534}
535
536static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000537subelement(PyObject* self, PyObject* args, PyObject* kw)
538{
539 PyObject* elem;
540
541 ElementObject* parent;
542 PyObject* tag;
543 PyObject* attrib = NULL;
544 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
545 &Element_Type, &parent, &tag,
546 &PyDict_Type, &attrib))
547 return NULL;
548
549 if (attrib || kw) {
550 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
551 if (!attrib)
552 return NULL;
553 if (kw)
554 PyDict_Update(attrib, kw);
555 } else {
556 Py_INCREF(Py_None);
557 attrib = Py_None;
558 }
559
Eli Bendersky092af1f2012-03-04 07:14:03 +0200560 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000561
562 Py_DECREF(attrib);
563
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000564 if (element_add_subelement(parent, elem) < 0) {
565 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000566 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000567 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000568
569 return elem;
570}
571
Eli Bendersky0192ba32012-03-30 16:38:33 +0300572static int
573element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
574{
575 Py_VISIT(self->tag);
576 Py_VISIT(JOIN_OBJ(self->text));
577 Py_VISIT(JOIN_OBJ(self->tail));
578
579 if (self->extra) {
580 int i;
581 Py_VISIT(self->extra->attrib);
582
583 for (i = 0; i < self->extra->length; ++i)
584 Py_VISIT(self->extra->children[i]);
585 }
586 return 0;
587}
588
589static int
590element_gc_clear(ElementObject *self)
591{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300592 Py_CLEAR(self->tag);
Eli Benderskyebf37a22012-04-03 22:02:37 +0300593
594 /* The following is like Py_CLEAR for self->text and self->tail, but
595 * written explicitily because the real pointers hide behind access
596 * macros.
597 */
598 if (self->text) {
599 PyObject *tmp = JOIN_OBJ(self->text);
600 self->text = NULL;
601 Py_DECREF(tmp);
602 }
603
604 if (self->tail) {
605 PyObject *tmp = JOIN_OBJ(self->tail);
606 self->tail = NULL;
607 Py_DECREF(tmp);
608 }
Eli Bendersky0192ba32012-03-30 16:38:33 +0300609
610 /* After dropping all references from extra, it's no longer valid anyway,
Eli Benderskyebf37a22012-04-03 22:02:37 +0300611 * so fully deallocate it.
Eli Bendersky0192ba32012-03-30 16:38:33 +0300612 */
Eli Benderskyebf37a22012-04-03 22:02:37 +0300613 dealloc_extra(self);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300614 return 0;
615}
616
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000617static void
618element_dealloc(ElementObject* self)
619{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300620 PyObject_GC_UnTrack(self);
Eli Benderskyebf37a22012-04-03 22:02:37 +0300621
622 if (self->weakreflist != NULL)
623 PyObject_ClearWeakRefs((PyObject *) self);
624
Eli Bendersky0192ba32012-03-30 16:38:33 +0300625 /* element_gc_clear clears all references and deallocates extra
626 */
627 element_gc_clear(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000628
629 RELEASE(sizeof(ElementObject), "destroy element");
Eli Bendersky092af1f2012-03-04 07:14:03 +0200630 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000631}
632
633/* -------------------------------------------------------------------- */
634/* methods (in alphabetical order) */
635
636static PyObject*
637element_append(ElementObject* self, PyObject* args)
638{
639 PyObject* element;
640 if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element))
641 return NULL;
642
643 if (element_add_subelement(self, element) < 0)
644 return NULL;
645
646 Py_RETURN_NONE;
647}
648
649static PyObject*
Eli Bendersky0192ba32012-03-30 16:38:33 +0300650element_clearmethod(ElementObject* self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000651{
652 if (!PyArg_ParseTuple(args, ":clear"))
653 return NULL;
654
Eli Benderskyebf37a22012-04-03 22:02:37 +0300655 dealloc_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000656
657 Py_INCREF(Py_None);
658 Py_DECREF(JOIN_OBJ(self->text));
659 self->text = Py_None;
660
661 Py_INCREF(Py_None);
662 Py_DECREF(JOIN_OBJ(self->tail));
663 self->tail = Py_None;
664
665 Py_RETURN_NONE;
666}
667
668static PyObject*
669element_copy(ElementObject* self, PyObject* args)
670{
671 int i;
672 ElementObject* element;
673
674 if (!PyArg_ParseTuple(args, ":__copy__"))
675 return NULL;
676
Eli Bendersky092af1f2012-03-04 07:14:03 +0200677 element = (ElementObject*) create_new_element(
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000678 self->tag, (self->extra) ? self->extra->attrib : Py_None
679 );
680 if (!element)
681 return NULL;
682
683 Py_DECREF(JOIN_OBJ(element->text));
684 element->text = self->text;
685 Py_INCREF(JOIN_OBJ(element->text));
686
687 Py_DECREF(JOIN_OBJ(element->tail));
688 element->tail = self->tail;
689 Py_INCREF(JOIN_OBJ(element->tail));
690
691 if (self->extra) {
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100692
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000693 if (element_resize(element, self->extra->length) < 0) {
694 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000695 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000696 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000697
698 for (i = 0; i < self->extra->length; i++) {
699 Py_INCREF(self->extra->children[i]);
700 element->extra->children[i] = self->extra->children[i];
701 }
702
703 element->extra->length = self->extra->length;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100704
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000705 }
706
707 return (PyObject*) element;
708}
709
710static PyObject*
711element_deepcopy(ElementObject* self, PyObject* args)
712{
713 int i;
714 ElementObject* element;
715 PyObject* tag;
716 PyObject* attrib;
717 PyObject* text;
718 PyObject* tail;
719 PyObject* id;
720
721 PyObject* memo;
722 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
723 return NULL;
724
725 tag = deepcopy(self->tag, memo);
726 if (!tag)
727 return NULL;
728
729 if (self->extra) {
730 attrib = deepcopy(self->extra->attrib, memo);
731 if (!attrib) {
732 Py_DECREF(tag);
733 return NULL;
734 }
735 } else {
736 Py_INCREF(Py_None);
737 attrib = Py_None;
738 }
739
Eli Bendersky092af1f2012-03-04 07:14:03 +0200740 element = (ElementObject*) create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000741
742 Py_DECREF(tag);
743 Py_DECREF(attrib);
744
745 if (!element)
746 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100747
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000748 text = deepcopy(JOIN_OBJ(self->text), memo);
749 if (!text)
750 goto error;
751 Py_DECREF(element->text);
752 element->text = JOIN_SET(text, JOIN_GET(self->text));
753
754 tail = deepcopy(JOIN_OBJ(self->tail), memo);
755 if (!tail)
756 goto error;
757 Py_DECREF(element->tail);
758 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
759
760 if (self->extra) {
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100761
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000762 if (element_resize(element, self->extra->length) < 0)
763 goto error;
764
765 for (i = 0; i < self->extra->length; i++) {
766 PyObject* child = deepcopy(self->extra->children[i], memo);
767 if (!child) {
768 element->extra->length = i;
769 goto error;
770 }
771 element->extra->children[i] = child;
772 }
773
774 element->extra->length = self->extra->length;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100775
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000776 }
777
778 /* add object to memo dictionary (so deepcopy won't visit it again) */
Christian Heimes217cfd12007-12-02 14:31:20 +0000779 id = PyLong_FromLong((Py_uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000780 if (!id)
781 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000782
783 i = PyDict_SetItem(memo, id, (PyObject*) element);
784
785 Py_DECREF(id);
786
787 if (i < 0)
788 goto error;
789
790 return (PyObject*) element;
791
792 error:
793 Py_DECREF(element);
794 return NULL;
795}
796
797LOCAL(int)
798checkpath(PyObject* tag)
799{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000800 Py_ssize_t i;
801 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000802
803 /* check if a tag contains an xpath character */
804
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000805#define PATHCHAR(ch) \
806 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000807
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000808 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200809 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
810 void *data = PyUnicode_DATA(tag);
811 unsigned int kind = PyUnicode_KIND(tag);
812 for (i = 0; i < len; i++) {
813 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
814 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000815 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200816 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000817 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200818 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000819 return 1;
820 }
821 return 0;
822 }
Christian Heimes72b710a2008-05-26 13:28:38 +0000823 if (PyBytes_Check(tag)) {
824 char *p = PyBytes_AS_STRING(tag);
825 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000826 if (p[i] == '{')
827 check = 0;
828 else if (p[i] == '}')
829 check = 1;
830 else if (check && PATHCHAR(p[i]))
831 return 1;
832 }
833 return 0;
834 }
835
836 return 1; /* unknown type; might be path expression */
837}
838
839static PyObject*
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000840element_extend(ElementObject* self, PyObject* args)
841{
842 PyObject* seq;
843 Py_ssize_t i, seqlen = 0;
844
845 PyObject* seq_in;
846 if (!PyArg_ParseTuple(args, "O:extend", &seq_in))
847 return NULL;
848
849 seq = PySequence_Fast(seq_in, "");
850 if (!seq) {
851 PyErr_Format(
852 PyExc_TypeError,
853 "expected sequence, not \"%.200s\"", Py_TYPE(seq_in)->tp_name
854 );
855 return NULL;
856 }
857
858 seqlen = PySequence_Size(seq);
859 for (i = 0; i < seqlen; i++) {
860 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
Eli Bendersky396e8fc2012-03-23 14:24:20 +0200861 if (!PyObject_IsInstance(element, (PyObject *)&Element_Type)) {
862 Py_DECREF(seq);
863 PyErr_Format(
864 PyExc_TypeError,
865 "expected an Element, not \"%.200s\"",
866 Py_TYPE(element)->tp_name);
867 return NULL;
868 }
869
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000870 if (element_add_subelement(self, element) < 0) {
871 Py_DECREF(seq);
872 return NULL;
873 }
874 }
875
876 Py_DECREF(seq);
877
878 Py_RETURN_NONE;
879}
880
881static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000882element_find(ElementObject* self, PyObject* args)
883{
884 int i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000885 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000886 PyObject* namespaces = Py_None;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200887
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000888 if (!PyArg_ParseTuple(args, "O|O:find", &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000889 return NULL;
890
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200891 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200892 _Py_IDENTIFIER(find);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200893 return _PyObject_CallMethodId(
894 elementpath_obj, &PyId_find, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000895 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200896 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000897
898 if (!self->extra)
899 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100900
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000901 for (i = 0; i < self->extra->length; i++) {
902 PyObject* item = self->extra->children[i];
903 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +0000904 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000905 Py_INCREF(item);
906 return item;
907 }
908 }
909
910 Py_RETURN_NONE;
911}
912
913static PyObject*
914element_findtext(ElementObject* self, PyObject* args)
915{
916 int i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000917 PyObject* tag;
918 PyObject* default_value = Py_None;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000919 PyObject* namespaces = Py_None;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200920 _Py_IDENTIFIER(findtext);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200921
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000922 if (!PyArg_ParseTuple(args, "O|OO:findtext", &tag, &default_value, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000923 return NULL;
924
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000925 if (checkpath(tag) || namespaces != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200926 return _PyObject_CallMethodId(
927 elementpath_obj, &PyId_findtext, "OOOO", self, tag, default_value, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000928 );
929
930 if (!self->extra) {
931 Py_INCREF(default_value);
932 return default_value;
933 }
934
935 for (i = 0; i < self->extra->length; i++) {
936 ElementObject* item = (ElementObject*) self->extra->children[i];
Mark Dickinson211c6252009-02-01 10:28:51 +0000937 if (Element_CheckExact(item) && (PyObject_RichCompareBool(item->tag, tag, Py_EQ) == 1)) {
938
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000939 PyObject* text = element_get_text(item);
940 if (text == Py_None)
Christian Heimes72b710a2008-05-26 13:28:38 +0000941 return PyBytes_FromString("");
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000942 Py_XINCREF(text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000943 return text;
944 }
945 }
946
947 Py_INCREF(default_value);
948 return default_value;
949}
950
951static PyObject*
952element_findall(ElementObject* self, PyObject* args)
953{
954 int i;
955 PyObject* out;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000956 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000957 PyObject* namespaces = Py_None;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200958
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000959 if (!PyArg_ParseTuple(args, "O|O:findall", &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000960 return NULL;
961
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200962 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200963 _Py_IDENTIFIER(findall);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200964 return _PyObject_CallMethodId(
965 elementpath_obj, &PyId_findall, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000966 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200967 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000968
969 out = PyList_New(0);
970 if (!out)
971 return NULL;
972
973 if (!self->extra)
974 return out;
975
976 for (i = 0; i < self->extra->length; i++) {
977 PyObject* item = self->extra->children[i];
978 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +0000979 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000980 if (PyList_Append(out, item) < 0) {
981 Py_DECREF(out);
982 return NULL;
983 }
984 }
985 }
986
987 return out;
988}
989
990static PyObject*
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000991element_iterfind(ElementObject* self, PyObject* args)
992{
993 PyObject* tag;
994 PyObject* namespaces = Py_None;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200995 _Py_IDENTIFIER(iterfind);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200996
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000997 if (!PyArg_ParseTuple(args, "O|O:iterfind", &tag, &namespaces))
998 return NULL;
999
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001000 return _PyObject_CallMethodId(
1001 elementpath_obj, &PyId_iterfind, "OOO", self, tag, namespaces
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001002 );
1003}
1004
1005static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001006element_get(ElementObject* self, PyObject* args)
1007{
1008 PyObject* value;
1009
1010 PyObject* key;
1011 PyObject* default_value = Py_None;
1012 if (!PyArg_ParseTuple(args, "O|O:get", &key, &default_value))
1013 return NULL;
1014
1015 if (!self->extra || self->extra->attrib == Py_None)
1016 value = default_value;
1017 else {
1018 value = PyDict_GetItem(self->extra->attrib, key);
1019 if (!value)
1020 value = default_value;
1021 }
1022
1023 Py_INCREF(value);
1024 return value;
1025}
1026
1027static PyObject*
1028element_getchildren(ElementObject* self, PyObject* args)
1029{
1030 int i;
1031 PyObject* list;
1032
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001033 /* FIXME: report as deprecated? */
1034
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001035 if (!PyArg_ParseTuple(args, ":getchildren"))
1036 return NULL;
1037
1038 if (!self->extra)
1039 return PyList_New(0);
1040
1041 list = PyList_New(self->extra->length);
1042 if (!list)
1043 return NULL;
1044
1045 for (i = 0; i < self->extra->length; i++) {
1046 PyObject* item = self->extra->children[i];
1047 Py_INCREF(item);
1048 PyList_SET_ITEM(list, i, item);
1049 }
1050
1051 return list;
1052}
1053
1054static PyObject*
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001055element_iter(ElementObject* self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001056{
1057 PyObject* result;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001058
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001059 PyObject* tag = Py_None;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001060 if (!PyArg_ParseTuple(args, "|O:iter", &tag))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001061 return NULL;
1062
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001063 if (!elementtree_iter_obj) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001064 PyErr_SetString(
1065 PyExc_RuntimeError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001066 "iter helper not found"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001067 );
1068 return NULL;
1069 }
1070
1071 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001072 if (!args)
1073 return NULL;
Neal Norwitz02876df2006-02-07 06:58:52 +00001074
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001075 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
1076 Py_INCREF(tag); PyTuple_SET_ITEM(args, 1, (PyObject*) tag);
1077
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001078 result = PyObject_CallObject(elementtree_iter_obj, args);
1079
1080 Py_DECREF(args);
1081
1082 return result;
1083}
1084
1085
1086static PyObject*
1087element_itertext(ElementObject* self, PyObject* args)
1088{
1089 PyObject* result;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001090
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001091 if (!PyArg_ParseTuple(args, ":itertext"))
1092 return NULL;
1093
1094 if (!elementtree_itertext_obj) {
1095 PyErr_SetString(
1096 PyExc_RuntimeError,
1097 "itertext helper not found"
1098 );
1099 return NULL;
1100 }
1101
1102 args = PyTuple_New(1);
1103 if (!args)
1104 return NULL;
1105
1106 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
1107
1108 result = PyObject_CallObject(elementtree_itertext_obj, args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001109
1110 Py_DECREF(args);
1111
1112 return result;
1113}
1114
1115static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001116element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001117{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001118 ElementObject* self = (ElementObject*) self_;
1119
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001120 if (!self->extra || index < 0 || index >= self->extra->length) {
1121 PyErr_SetString(
1122 PyExc_IndexError,
1123 "child index out of range"
1124 );
1125 return NULL;
1126 }
1127
1128 Py_INCREF(self->extra->children[index]);
1129 return self->extra->children[index];
1130}
1131
1132static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001133element_insert(ElementObject* self, PyObject* args)
1134{
1135 int i;
1136
1137 int index;
1138 PyObject* element;
1139 if (!PyArg_ParseTuple(args, "iO!:insert", &index,
1140 &Element_Type, &element))
1141 return NULL;
1142
1143 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001144 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001145
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001146 if (index < 0) {
1147 index += self->extra->length;
1148 if (index < 0)
1149 index = 0;
1150 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001151 if (index > self->extra->length)
1152 index = self->extra->length;
1153
1154 if (element_resize(self, 1) < 0)
1155 return NULL;
1156
1157 for (i = self->extra->length; i > index; i--)
1158 self->extra->children[i] = self->extra->children[i-1];
1159
1160 Py_INCREF(element);
1161 self->extra->children[index] = element;
1162
1163 self->extra->length++;
1164
1165 Py_RETURN_NONE;
1166}
1167
1168static PyObject*
1169element_items(ElementObject* self, PyObject* args)
1170{
1171 if (!PyArg_ParseTuple(args, ":items"))
1172 return NULL;
1173
1174 if (!self->extra || self->extra->attrib == Py_None)
1175 return PyList_New(0);
1176
1177 return PyDict_Items(self->extra->attrib);
1178}
1179
1180static PyObject*
1181element_keys(ElementObject* self, PyObject* args)
1182{
1183 if (!PyArg_ParseTuple(args, ":keys"))
1184 return NULL;
1185
1186 if (!self->extra || self->extra->attrib == Py_None)
1187 return PyList_New(0);
1188
1189 return PyDict_Keys(self->extra->attrib);
1190}
1191
Martin v. Löwis18e16552006-02-15 17:27:45 +00001192static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001193element_length(ElementObject* self)
1194{
1195 if (!self->extra)
1196 return 0;
1197
1198 return self->extra->length;
1199}
1200
1201static PyObject*
1202element_makeelement(PyObject* self, PyObject* args, PyObject* kw)
1203{
1204 PyObject* elem;
1205
1206 PyObject* tag;
1207 PyObject* attrib;
1208 if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib))
1209 return NULL;
1210
1211 attrib = PyDict_Copy(attrib);
1212 if (!attrib)
1213 return NULL;
1214
Eli Bendersky092af1f2012-03-04 07:14:03 +02001215 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001216
1217 Py_DECREF(attrib);
1218
1219 return elem;
1220}
1221
1222static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001223element_remove(ElementObject* self, PyObject* args)
1224{
1225 int i;
1226
1227 PyObject* element;
1228 if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element))
1229 return NULL;
1230
1231 if (!self->extra) {
1232 /* element has no children, so raise exception */
1233 PyErr_SetString(
1234 PyExc_ValueError,
1235 "list.remove(x): x not in list"
1236 );
1237 return NULL;
1238 }
1239
1240 for (i = 0; i < self->extra->length; i++) {
1241 if (self->extra->children[i] == element)
1242 break;
Mark Dickinson211c6252009-02-01 10:28:51 +00001243 if (PyObject_RichCompareBool(self->extra->children[i], element, Py_EQ) == 1)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001244 break;
1245 }
1246
1247 if (i == self->extra->length) {
1248 /* element is not in children, so raise exception */
1249 PyErr_SetString(
1250 PyExc_ValueError,
1251 "list.remove(x): x not in list"
1252 );
1253 return NULL;
1254 }
1255
1256 Py_DECREF(self->extra->children[i]);
1257
1258 self->extra->length--;
1259
1260 for (; i < self->extra->length; i++)
1261 self->extra->children[i] = self->extra->children[i+1];
1262
1263 Py_RETURN_NONE;
1264}
1265
1266static PyObject*
1267element_repr(ElementObject* self)
1268{
Eli Bendersky092af1f2012-03-04 07:14:03 +02001269 if (self->tag)
1270 return PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1271 else
1272 return PyUnicode_FromFormat("<Element at %p>", self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001273}
1274
1275static PyObject*
1276element_set(ElementObject* self, PyObject* args)
1277{
1278 PyObject* attrib;
1279
1280 PyObject* key;
1281 PyObject* value;
1282 if (!PyArg_ParseTuple(args, "OO:set", &key, &value))
1283 return NULL;
1284
1285 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001286 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001287
1288 attrib = element_get_attrib(self);
1289 if (!attrib)
1290 return NULL;
1291
1292 if (PyDict_SetItem(attrib, key, value) < 0)
1293 return NULL;
1294
1295 Py_RETURN_NONE;
1296}
1297
1298static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001299element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001300{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001301 ElementObject* self = (ElementObject*) self_;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001302 int i;
1303 PyObject* old;
1304
1305 if (!self->extra || index < 0 || index >= self->extra->length) {
1306 PyErr_SetString(
1307 PyExc_IndexError,
1308 "child assignment index out of range");
1309 return -1;
1310 }
1311
1312 old = self->extra->children[index];
1313
1314 if (item) {
1315 Py_INCREF(item);
1316 self->extra->children[index] = item;
1317 } else {
1318 self->extra->length--;
1319 for (i = index; i < self->extra->length; i++)
1320 self->extra->children[i] = self->extra->children[i+1];
1321 }
1322
1323 Py_DECREF(old);
1324
1325 return 0;
1326}
1327
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001328static PyObject*
1329element_subscr(PyObject* self_, PyObject* item)
1330{
1331 ElementObject* self = (ElementObject*) self_;
1332
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001333 if (PyIndex_Check(item)) {
1334 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001335
1336 if (i == -1 && PyErr_Occurred()) {
1337 return NULL;
1338 }
1339 if (i < 0 && self->extra)
1340 i += self->extra->length;
1341 return element_getitem(self_, i);
1342 }
1343 else if (PySlice_Check(item)) {
1344 Py_ssize_t start, stop, step, slicelen, cur, i;
1345 PyObject* list;
1346
1347 if (!self->extra)
1348 return PyList_New(0);
1349
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001350 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001351 self->extra->length,
1352 &start, &stop, &step, &slicelen) < 0) {
1353 return NULL;
1354 }
1355
1356 if (slicelen <= 0)
1357 return PyList_New(0);
1358 else {
1359 list = PyList_New(slicelen);
1360 if (!list)
1361 return NULL;
1362
1363 for (cur = start, i = 0; i < slicelen;
1364 cur += step, i++) {
1365 PyObject* item = self->extra->children[cur];
1366 Py_INCREF(item);
1367 PyList_SET_ITEM(list, i, item);
1368 }
1369
1370 return list;
1371 }
1372 }
1373 else {
1374 PyErr_SetString(PyExc_TypeError,
1375 "element indices must be integers");
1376 return NULL;
1377 }
1378}
1379
1380static int
1381element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1382{
1383 ElementObject* self = (ElementObject*) self_;
1384
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001385 if (PyIndex_Check(item)) {
1386 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001387
1388 if (i == -1 && PyErr_Occurred()) {
1389 return -1;
1390 }
1391 if (i < 0 && self->extra)
1392 i += self->extra->length;
1393 return element_setitem(self_, i, value);
1394 }
1395 else if (PySlice_Check(item)) {
1396 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1397
1398 PyObject* recycle = NULL;
1399 PyObject* seq = NULL;
1400
1401 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001402 create_extra(self, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001403
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001404 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001405 self->extra->length,
1406 &start, &stop, &step, &slicelen) < 0) {
1407 return -1;
1408 }
1409
Eli Bendersky865756a2012-03-09 13:38:15 +02001410 if (value == NULL) {
1411 /* Delete slice */
1412 size_t cur;
1413 Py_ssize_t i;
1414
1415 if (slicelen <= 0)
1416 return 0;
1417
1418 /* Since we're deleting, the direction of the range doesn't matter,
1419 * so for simplicity make it always ascending.
1420 */
1421 if (step < 0) {
1422 stop = start + 1;
1423 start = stop + step * (slicelen - 1) - 1;
1424 step = -step;
1425 }
1426
1427 assert((size_t)slicelen <= PY_SIZE_MAX / sizeof(PyObject *));
1428
1429 /* recycle is a list that will contain all the children
1430 * scheduled for removal.
1431 */
1432 if (!(recycle = PyList_New(slicelen))) {
1433 PyErr_NoMemory();
1434 return -1;
1435 }
1436
1437 /* This loop walks over all the children that have to be deleted,
1438 * with cur pointing at them. num_moved is the amount of children
1439 * until the next deleted child that have to be "shifted down" to
1440 * occupy the deleted's places.
1441 * Note that in the ith iteration, shifting is done i+i places down
1442 * because i children were already removed.
1443 */
1444 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1445 /* Compute how many children have to be moved, clipping at the
1446 * list end.
1447 */
1448 Py_ssize_t num_moved = step - 1;
1449 if (cur + step >= (size_t)self->extra->length) {
1450 num_moved = self->extra->length - cur - 1;
1451 }
1452
1453 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1454
1455 memmove(
1456 self->extra->children + cur - i,
1457 self->extra->children + cur + 1,
1458 num_moved * sizeof(PyObject *));
1459 }
1460
1461 /* Leftover "tail" after the last removed child */
1462 cur = start + (size_t)slicelen * step;
1463 if (cur < (size_t)self->extra->length) {
1464 memmove(
1465 self->extra->children + cur - slicelen,
1466 self->extra->children + cur,
1467 (self->extra->length - cur) * sizeof(PyObject *));
1468 }
1469
1470 self->extra->length -= slicelen;
1471
1472 /* Discard the recycle list with all the deleted sub-elements */
1473 Py_XDECREF(recycle);
1474 return 0;
1475 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001476 else {
Eli Bendersky865756a2012-03-09 13:38:15 +02001477 /* A new slice is actually being assigned */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001478 seq = PySequence_Fast(value, "");
1479 if (!seq) {
1480 PyErr_Format(
1481 PyExc_TypeError,
1482 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1483 );
1484 return -1;
1485 }
1486 newlen = PySequence_Size(seq);
1487 }
1488
1489 if (step != 1 && newlen != slicelen)
1490 {
1491 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001492 "attempt to assign sequence of size %zd "
1493 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001494 newlen, slicelen
1495 );
1496 return -1;
1497 }
1498
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001499 /* Resize before creating the recycle bin, to prevent refleaks. */
1500 if (newlen > slicelen) {
1501 if (element_resize(self, newlen - slicelen) < 0) {
1502 if (seq) {
1503 Py_DECREF(seq);
1504 }
1505 return -1;
1506 }
1507 }
1508
1509 if (slicelen > 0) {
1510 /* to avoid recursive calls to this method (via decref), move
1511 old items to the recycle bin here, and get rid of them when
1512 we're done modifying the element */
1513 recycle = PyList_New(slicelen);
1514 if (!recycle) {
1515 if (seq) {
1516 Py_DECREF(seq);
1517 }
1518 return -1;
1519 }
1520 for (cur = start, i = 0; i < slicelen;
1521 cur += step, i++)
1522 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1523 }
1524
1525 if (newlen < slicelen) {
1526 /* delete slice */
1527 for (i = stop; i < self->extra->length; i++)
1528 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1529 } else if (newlen > slicelen) {
1530 /* insert slice */
1531 for (i = self->extra->length-1; i >= stop; i--)
1532 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1533 }
1534
1535 /* replace the slice */
1536 for (cur = start, i = 0; i < newlen;
1537 cur += step, i++) {
1538 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1539 Py_INCREF(element);
1540 self->extra->children[cur] = element;
1541 }
1542
1543 self->extra->length += newlen - slicelen;
1544
1545 if (seq) {
1546 Py_DECREF(seq);
1547 }
1548
1549 /* discard the recycle bin, and everything in it */
1550 Py_XDECREF(recycle);
1551
1552 return 0;
1553 }
1554 else {
1555 PyErr_SetString(PyExc_TypeError,
1556 "element indices must be integers");
1557 return -1;
1558 }
1559}
1560
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001561static PyMethodDef element_methods[] = {
1562
Eli Bendersky0192ba32012-03-30 16:38:33 +03001563 {"clear", (PyCFunction) element_clearmethod, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001564
1565 {"get", (PyCFunction) element_get, METH_VARARGS},
1566 {"set", (PyCFunction) element_set, METH_VARARGS},
1567
1568 {"find", (PyCFunction) element_find, METH_VARARGS},
1569 {"findtext", (PyCFunction) element_findtext, METH_VARARGS},
1570 {"findall", (PyCFunction) element_findall, METH_VARARGS},
1571
1572 {"append", (PyCFunction) element_append, METH_VARARGS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001573 {"extend", (PyCFunction) element_extend, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001574 {"insert", (PyCFunction) element_insert, METH_VARARGS},
1575 {"remove", (PyCFunction) element_remove, METH_VARARGS},
1576
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001577 {"iter", (PyCFunction) element_iter, METH_VARARGS},
1578 {"itertext", (PyCFunction) element_itertext, METH_VARARGS},
1579 {"iterfind", (PyCFunction) element_iterfind, METH_VARARGS},
1580
1581 {"getiterator", (PyCFunction) element_iter, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001582 {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS},
1583
1584 {"items", (PyCFunction) element_items, METH_VARARGS},
1585 {"keys", (PyCFunction) element_keys, METH_VARARGS},
1586
1587 {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS},
1588
1589 {"__copy__", (PyCFunction) element_copy, METH_VARARGS},
1590 {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS},
1591
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001592 {NULL, NULL}
1593};
1594
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001595static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001596element_getattro(ElementObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001597{
1598 PyObject* res;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001599 char *name = "";
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001600
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001601 if (PyUnicode_Check(nameobj))
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001602 name = _PyUnicode_AsString(nameobj);
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001603
Alexander Belopolskye239d232010-12-08 23:31:48 +00001604 if (name == NULL)
1605 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001606
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001607 /* handle common attributes first */
1608 if (strcmp(name, "tag") == 0) {
1609 res = self->tag;
1610 Py_INCREF(res);
1611 return res;
1612 } else if (strcmp(name, "text") == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001613 res = element_get_text(self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001614 Py_INCREF(res);
1615 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001616 }
1617
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001618 /* methods */
1619 res = PyObject_GenericGetAttr((PyObject*) self, nameobj);
1620 if (res)
1621 return res;
1622
1623 /* less common attributes */
1624 if (strcmp(name, "tail") == 0) {
1625 PyErr_Clear();
1626 res = element_get_tail(self);
1627 } else if (strcmp(name, "attrib") == 0) {
1628 PyErr_Clear();
1629 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001630 create_extra(self, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001631 res = element_get_attrib(self);
1632 }
1633
1634 if (!res)
1635 return NULL;
1636
1637 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001638 return res;
1639}
1640
1641static int
1642element_setattr(ElementObject* self, const char* name, PyObject* value)
1643{
1644 if (value == NULL) {
1645 PyErr_SetString(
1646 PyExc_AttributeError,
1647 "can't delete element attributes"
1648 );
1649 return -1;
1650 }
1651
1652 if (strcmp(name, "tag") == 0) {
1653 Py_DECREF(self->tag);
1654 self->tag = value;
1655 Py_INCREF(self->tag);
1656 } else if (strcmp(name, "text") == 0) {
1657 Py_DECREF(JOIN_OBJ(self->text));
1658 self->text = value;
1659 Py_INCREF(self->text);
1660 } else if (strcmp(name, "tail") == 0) {
1661 Py_DECREF(JOIN_OBJ(self->tail));
1662 self->tail = value;
1663 Py_INCREF(self->tail);
1664 } else if (strcmp(name, "attrib") == 0) {
1665 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001666 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001667 Py_DECREF(self->extra->attrib);
1668 self->extra->attrib = value;
1669 Py_INCREF(self->extra->attrib);
1670 } else {
1671 PyErr_SetString(PyExc_AttributeError, name);
1672 return -1;
1673 }
1674
1675 return 0;
1676}
1677
1678static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001679 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001680 0, /* sq_concat */
1681 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001682 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001683 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001684 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001685 0,
1686};
1687
1688static PyMappingMethods element_as_mapping = {
1689 (lenfunc) element_length,
1690 (binaryfunc) element_subscr,
1691 (objobjargproc) element_ass_subscr,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001692};
1693
Neal Norwitz227b5332006-03-22 09:28:35 +00001694static PyTypeObject Element_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001695 PyVarObject_HEAD_INIT(NULL, 0)
1696 "Element", sizeof(ElementObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001697 /* methods */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001698 (destructor)element_dealloc, /* tp_dealloc */
1699 0, /* tp_print */
1700 0, /* tp_getattr */
1701 (setattrfunc)element_setattr, /* tp_setattr */
1702 0, /* tp_reserved */
1703 (reprfunc)element_repr, /* tp_repr */
1704 0, /* tp_as_number */
1705 &element_as_sequence, /* tp_as_sequence */
1706 &element_as_mapping, /* tp_as_mapping */
1707 0, /* tp_hash */
1708 0, /* tp_call */
1709 0, /* tp_str */
1710 (getattrofunc)element_getattro, /* tp_getattro */
1711 0, /* tp_setattro */
1712 0, /* tp_as_buffer */
Eli Bendersky0192ba32012-03-30 16:38:33 +03001713 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
1714 /* tp_flags */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001715 0, /* tp_doc */
Eli Bendersky0192ba32012-03-30 16:38:33 +03001716 (traverseproc)element_gc_traverse, /* tp_traverse */
1717 (inquiry)element_gc_clear, /* tp_clear */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001718 0, /* tp_richcompare */
Eli Benderskyebf37a22012-04-03 22:02:37 +03001719 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001720 0, /* tp_iter */
1721 0, /* tp_iternext */
1722 element_methods, /* tp_methods */
1723 0, /* tp_members */
1724 0, /* tp_getset */
1725 0, /* tp_base */
1726 0, /* tp_dict */
1727 0, /* tp_descr_get */
1728 0, /* tp_descr_set */
1729 0, /* tp_dictoffset */
1730 (initproc)element_init, /* tp_init */
1731 PyType_GenericAlloc, /* tp_alloc */
1732 element_new, /* tp_new */
1733 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001734};
1735
1736/* ==================================================================== */
1737/* the tree builder type */
1738
1739typedef struct {
1740 PyObject_HEAD
1741
1742 PyObject* root; /* root node (first created node) */
1743
1744 ElementObject* this; /* current node */
1745 ElementObject* last; /* most recently created node */
1746
1747 PyObject* data; /* data collector (string or list), or NULL */
1748
1749 PyObject* stack; /* element stack */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001750 Py_ssize_t index; /* current stack size (0=empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001751
1752 /* element tracing */
1753 PyObject* events; /* list of events, or NULL if not collecting */
1754 PyObject* start_event_obj; /* event objects (NULL to ignore) */
1755 PyObject* end_event_obj;
1756 PyObject* start_ns_event_obj;
1757 PyObject* end_ns_event_obj;
1758
1759} TreeBuilderObject;
1760
Neal Norwitz227b5332006-03-22 09:28:35 +00001761static PyTypeObject TreeBuilder_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001762
Christian Heimes90aa7642007-12-19 02:45:37 +00001763#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001764
1765/* -------------------------------------------------------------------- */
1766/* constructor and destructor */
1767
1768LOCAL(PyObject*)
1769treebuilder_new(void)
1770{
1771 TreeBuilderObject* self;
1772
1773 self = PyObject_New(TreeBuilderObject, &TreeBuilder_Type);
1774 if (self == NULL)
1775 return NULL;
1776
1777 self->root = NULL;
1778
1779 Py_INCREF(Py_None);
1780 self->this = (ElementObject*) Py_None;
1781
1782 Py_INCREF(Py_None);
1783 self->last = (ElementObject*) Py_None;
1784
1785 self->data = NULL;
1786
1787 self->stack = PyList_New(20);
1788 self->index = 0;
1789
1790 self->events = NULL;
1791 self->start_event_obj = self->end_event_obj = NULL;
1792 self->start_ns_event_obj = self->end_ns_event_obj = NULL;
1793
1794 ALLOC(sizeof(TreeBuilderObject), "create treebuilder");
1795
1796 return (PyObject*) self;
1797}
1798
1799static PyObject*
Thomas Wouters73e5a5b2006-06-08 15:35:45 +00001800treebuilder(PyObject* self_, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001801{
1802 if (!PyArg_ParseTuple(args, ":TreeBuilder"))
1803 return NULL;
1804
1805 return treebuilder_new();
1806}
1807
1808static void
1809treebuilder_dealloc(TreeBuilderObject* self)
1810{
1811 Py_XDECREF(self->end_ns_event_obj);
1812 Py_XDECREF(self->start_ns_event_obj);
1813 Py_XDECREF(self->end_event_obj);
1814 Py_XDECREF(self->start_event_obj);
1815 Py_XDECREF(self->events);
1816 Py_DECREF(self->stack);
1817 Py_XDECREF(self->data);
1818 Py_DECREF(self->last);
1819 Py_DECREF(self->this);
1820 Py_XDECREF(self->root);
1821
1822 RELEASE(sizeof(TreeBuilderObject), "destroy treebuilder");
1823
1824 PyObject_Del(self);
1825}
1826
1827/* -------------------------------------------------------------------- */
1828/* handlers */
1829
1830LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001831treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
1832 PyObject* attrib)
1833{
1834 PyObject* node;
1835 PyObject* this;
1836
1837 if (self->data) {
1838 if (self->this == self->last) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001839 Py_DECREF(JOIN_OBJ(self->last->text));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001840 self->last->text = JOIN_SET(
1841 self->data, PyList_CheckExact(self->data)
1842 );
1843 } else {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001844 Py_DECREF(JOIN_OBJ(self->last->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001845 self->last->tail = JOIN_SET(
1846 self->data, PyList_CheckExact(self->data)
1847 );
1848 }
1849 self->data = NULL;
1850 }
1851
Eli Bendersky092af1f2012-03-04 07:14:03 +02001852 node = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001853 if (!node)
1854 return NULL;
1855
1856 this = (PyObject*) self->this;
1857
1858 if (this != Py_None) {
1859 if (element_add_subelement((ElementObject*) this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001860 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001861 } else {
1862 if (self->root) {
1863 PyErr_SetString(
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001864 elementtree_parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001865 "multiple elements on top level"
1866 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001867 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001868 }
1869 Py_INCREF(node);
1870 self->root = node;
1871 }
1872
1873 if (self->index < PyList_GET_SIZE(self->stack)) {
1874 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001875 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001876 Py_INCREF(this);
1877 } else {
1878 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001879 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001880 }
1881 self->index++;
1882
1883 Py_DECREF(this);
1884 Py_INCREF(node);
1885 self->this = (ElementObject*) node;
1886
1887 Py_DECREF(self->last);
1888 Py_INCREF(node);
1889 self->last = (ElementObject*) node;
1890
1891 if (self->start_event_obj) {
1892 PyObject* res;
1893 PyObject* action = self->start_event_obj;
1894 res = PyTuple_New(2);
1895 if (res) {
1896 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
1897 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
1898 PyList_Append(self->events, res);
1899 Py_DECREF(res);
1900 } else
1901 PyErr_Clear(); /* FIXME: propagate error */
1902 }
1903
1904 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001905
1906 error:
1907 Py_DECREF(node);
1908 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001909}
1910
1911LOCAL(PyObject*)
1912treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
1913{
1914 if (!self->data) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001915 if (self->last == (ElementObject*) Py_None) {
1916 /* ignore calls to data before the first call to start */
1917 Py_RETURN_NONE;
1918 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001919 /* store the first item as is */
1920 Py_INCREF(data); self->data = data;
1921 } else {
1922 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00001923 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
1924 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001925 /* expat often generates single character data sections; handle
1926 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00001927 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
1928 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001929 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00001930 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001931 } else if (PyList_CheckExact(self->data)) {
1932 if (PyList_Append(self->data, data) < 0)
1933 return NULL;
1934 } else {
1935 PyObject* list = PyList_New(2);
1936 if (!list)
1937 return NULL;
1938 PyList_SET_ITEM(list, 0, self->data);
1939 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
1940 self->data = list;
1941 }
1942 }
1943
1944 Py_RETURN_NONE;
1945}
1946
1947LOCAL(PyObject*)
1948treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
1949{
1950 PyObject* item;
1951
1952 if (self->data) {
1953 if (self->this == self->last) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001954 Py_DECREF(JOIN_OBJ(self->last->text));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001955 self->last->text = JOIN_SET(
1956 self->data, PyList_CheckExact(self->data)
1957 );
1958 } else {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001959 Py_DECREF(JOIN_OBJ(self->last->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001960 self->last->tail = JOIN_SET(
1961 self->data, PyList_CheckExact(self->data)
1962 );
1963 }
1964 self->data = NULL;
1965 }
1966
1967 if (self->index == 0) {
1968 PyErr_SetString(
1969 PyExc_IndexError,
1970 "pop from empty stack"
1971 );
1972 return NULL;
1973 }
1974
1975 self->index--;
1976
1977 item = PyList_GET_ITEM(self->stack, self->index);
1978 Py_INCREF(item);
1979
1980 Py_DECREF(self->last);
1981
1982 self->last = (ElementObject*) self->this;
1983 self->this = (ElementObject*) item;
1984
1985 if (self->end_event_obj) {
1986 PyObject* res;
1987 PyObject* action = self->end_event_obj;
1988 PyObject* node = (PyObject*) self->last;
1989 res = PyTuple_New(2);
1990 if (res) {
1991 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
1992 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
1993 PyList_Append(self->events, res);
1994 Py_DECREF(res);
1995 } else
1996 PyErr_Clear(); /* FIXME: propagate error */
1997 }
1998
1999 Py_INCREF(self->last);
2000 return (PyObject*) self->last;
2001}
2002
2003LOCAL(void)
2004treebuilder_handle_namespace(TreeBuilderObject* self, int start,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002005 PyObject *prefix, PyObject *uri)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002006{
2007 PyObject* res;
2008 PyObject* action;
2009 PyObject* parcel;
2010
2011 if (!self->events)
2012 return;
2013
2014 if (start) {
2015 if (!self->start_ns_event_obj)
2016 return;
2017 action = self->start_ns_event_obj;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002018 parcel = Py_BuildValue("OO", prefix, uri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002019 if (!parcel)
2020 return;
2021 Py_INCREF(action);
2022 } else {
2023 if (!self->end_ns_event_obj)
2024 return;
2025 action = self->end_ns_event_obj;
2026 Py_INCREF(action);
2027 parcel = Py_None;
2028 Py_INCREF(parcel);
2029 }
2030
2031 res = PyTuple_New(2);
2032
2033 if (res) {
2034 PyTuple_SET_ITEM(res, 0, action);
2035 PyTuple_SET_ITEM(res, 1, parcel);
2036 PyList_Append(self->events, res);
2037 Py_DECREF(res);
2038 } else
2039 PyErr_Clear(); /* FIXME: propagate error */
2040}
2041
2042/* -------------------------------------------------------------------- */
2043/* methods (in alphabetical order) */
2044
2045static PyObject*
2046treebuilder_data(TreeBuilderObject* self, PyObject* args)
2047{
2048 PyObject* data;
2049 if (!PyArg_ParseTuple(args, "O:data", &data))
2050 return NULL;
2051
2052 return treebuilder_handle_data(self, data);
2053}
2054
2055static PyObject*
2056treebuilder_end(TreeBuilderObject* self, PyObject* args)
2057{
2058 PyObject* tag;
2059 if (!PyArg_ParseTuple(args, "O:end", &tag))
2060 return NULL;
2061
2062 return treebuilder_handle_end(self, tag);
2063}
2064
2065LOCAL(PyObject*)
2066treebuilder_done(TreeBuilderObject* self)
2067{
2068 PyObject* res;
2069
2070 /* FIXME: check stack size? */
2071
2072 if (self->root)
2073 res = self->root;
2074 else
2075 res = Py_None;
2076
2077 Py_INCREF(res);
2078 return res;
2079}
2080
2081static PyObject*
2082treebuilder_close(TreeBuilderObject* self, PyObject* args)
2083{
2084 if (!PyArg_ParseTuple(args, ":close"))
2085 return NULL;
2086
2087 return treebuilder_done(self);
2088}
2089
2090static PyObject*
2091treebuilder_start(TreeBuilderObject* self, PyObject* args)
2092{
2093 PyObject* tag;
2094 PyObject* attrib = Py_None;
2095 if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib))
2096 return NULL;
2097
2098 return treebuilder_handle_start(self, tag, attrib);
2099}
2100
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002101static PyMethodDef treebuilder_methods[] = {
2102 {"data", (PyCFunction) treebuilder_data, METH_VARARGS},
2103 {"start", (PyCFunction) treebuilder_start, METH_VARARGS},
2104 {"end", (PyCFunction) treebuilder_end, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002105 {"close", (PyCFunction) treebuilder_close, METH_VARARGS},
2106 {NULL, NULL}
2107};
2108
Neal Norwitz227b5332006-03-22 09:28:35 +00002109static PyTypeObject TreeBuilder_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002110 PyVarObject_HEAD_INIT(NULL, 0)
2111 "TreeBuilder", sizeof(TreeBuilderObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002112 /* methods */
2113 (destructor)treebuilder_dealloc, /* tp_dealloc */
2114 0, /* tp_print */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002115 0, /* tp_getattr */
2116 0, /* tp_setattr */
Mark Dickinsone94c6792009-02-02 20:36:42 +00002117 0, /* tp_reserved */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002118 0, /* tp_repr */
2119 0, /* tp_as_number */
2120 0, /* tp_as_sequence */
2121 0, /* tp_as_mapping */
2122 0, /* tp_hash */
2123 0, /* tp_call */
2124 0, /* tp_str */
2125 0, /* tp_getattro */
2126 0, /* tp_setattro */
2127 0, /* tp_as_buffer */
2128 Py_TPFLAGS_DEFAULT, /* tp_flags */
2129 0, /* tp_doc */
2130 0, /* tp_traverse */
2131 0, /* tp_clear */
2132 0, /* tp_richcompare */
2133 0, /* tp_weaklistoffset */
2134 0, /* tp_iter */
2135 0, /* tp_iternext */
2136 treebuilder_methods, /* tp_methods */
2137 0, /* tp_members */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002138};
2139
2140/* ==================================================================== */
2141/* the expat interface */
2142
2143#if defined(USE_EXPAT)
2144
2145#include "expat.h"
2146
2147#if defined(USE_PYEXPAT_CAPI)
2148#include "pyexpat.h"
2149static struct PyExpat_CAPI* expat_capi;
2150#define EXPAT(func) (expat_capi->func)
2151#else
2152#define EXPAT(func) (XML_##func)
2153#endif
2154
2155typedef struct {
2156 PyObject_HEAD
2157
2158 XML_Parser parser;
2159
2160 PyObject* target;
2161 PyObject* entity;
2162
2163 PyObject* names;
2164
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002165 PyObject* handle_start;
2166 PyObject* handle_data;
2167 PyObject* handle_end;
2168
2169 PyObject* handle_comment;
2170 PyObject* handle_pi;
2171
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002172 PyObject* handle_close;
2173
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002174} XMLParserObject;
2175
Neal Norwitz227b5332006-03-22 09:28:35 +00002176static PyTypeObject XMLParser_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002177
2178/* helpers */
2179
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002180LOCAL(PyObject*)
2181makeuniversal(XMLParserObject* self, const char* string)
2182{
2183 /* convert a UTF-8 tag/attribute name from the expat parser
2184 to a universal name string */
2185
2186 int size = strlen(string);
2187 PyObject* key;
2188 PyObject* value;
2189
2190 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00002191 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002192 if (!key)
2193 return NULL;
2194
2195 value = PyDict_GetItem(self->names, key);
2196
2197 if (value) {
2198 Py_INCREF(value);
2199 } else {
2200 /* new name. convert to universal name, and decode as
2201 necessary */
2202
2203 PyObject* tag;
2204 char* p;
2205 int i;
2206
2207 /* look for namespace separator */
2208 for (i = 0; i < size; i++)
2209 if (string[i] == '}')
2210 break;
2211 if (i != size) {
2212 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002213 tag = PyBytes_FromStringAndSize(NULL, size+1);
2214 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002215 p[0] = '{';
2216 memcpy(p+1, string, size);
2217 size++;
2218 } else {
2219 /* plain name; use key as tag */
2220 Py_INCREF(key);
2221 tag = key;
2222 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002223
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002224 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002225 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00002226 value = PyUnicode_DecodeUTF8(p, size, "strict");
2227 Py_DECREF(tag);
2228 if (!value) {
2229 Py_DECREF(key);
2230 return NULL;
2231 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002232
2233 /* add to names dictionary */
2234 if (PyDict_SetItem(self->names, key, value) < 0) {
2235 Py_DECREF(key);
2236 Py_DECREF(value);
2237 return NULL;
2238 }
2239 }
2240
2241 Py_DECREF(key);
2242 return value;
2243}
2244
Eli Bendersky5b77d812012-03-16 08:20:05 +02002245/* Set the ParseError exception with the given parameters.
2246 * If message is not NULL, it's used as the error string. Otherwise, the
2247 * message string is the default for the given error_code.
2248*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002249static void
Eli Bendersky5b77d812012-03-16 08:20:05 +02002250expat_set_error(enum XML_Error error_code, int line, int column, char *message)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002251{
Eli Bendersky5b77d812012-03-16 08:20:05 +02002252 PyObject *errmsg, *error, *position, *code;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002253
Victor Stinner499dfcf2011-03-21 13:26:24 +01002254 errmsg = PyUnicode_FromFormat("%s: line %d, column %d",
Eli Bendersky5b77d812012-03-16 08:20:05 +02002255 message ? message : EXPAT(ErrorString)(error_code),
2256 line, column);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002257 if (errmsg == NULL)
2258 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002259
Victor Stinner499dfcf2011-03-21 13:26:24 +01002260 error = PyObject_CallFunction(elementtree_parseerror_obj, "O", errmsg);
2261 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002262 if (!error)
2263 return;
2264
Eli Bendersky5b77d812012-03-16 08:20:05 +02002265 /* Add code and position attributes */
2266 code = PyLong_FromLong((long)error_code);
2267 if (!code) {
2268 Py_DECREF(error);
2269 return;
2270 }
2271 if (PyObject_SetAttrString(error, "code", code) == -1) {
2272 Py_DECREF(error);
2273 Py_DECREF(code);
2274 return;
2275 }
2276 Py_DECREF(code);
2277
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002278 position = Py_BuildValue("(ii)", line, column);
2279 if (!position) {
2280 Py_DECREF(error);
2281 return;
2282 }
2283 if (PyObject_SetAttrString(error, "position", position) == -1) {
2284 Py_DECREF(error);
2285 Py_DECREF(position);
2286 return;
2287 }
2288 Py_DECREF(position);
2289
2290 PyErr_SetObject(elementtree_parseerror_obj, error);
2291 Py_DECREF(error);
2292}
2293
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002294/* -------------------------------------------------------------------- */
2295/* handlers */
2296
2297static void
2298expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2299 int data_len)
2300{
2301 PyObject* key;
2302 PyObject* value;
2303 PyObject* res;
2304
2305 if (data_len < 2 || data_in[0] != '&')
2306 return;
2307
Neal Norwitz0269b912007-08-08 06:56:02 +00002308 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002309 if (!key)
2310 return;
2311
2312 value = PyDict_GetItem(self->entity, key);
2313
2314 if (value) {
2315 if (TreeBuilder_CheckExact(self->target))
2316 res = treebuilder_handle_data(
2317 (TreeBuilderObject*) self->target, value
2318 );
2319 else if (self->handle_data)
2320 res = PyObject_CallFunction(self->handle_data, "O", value);
2321 else
2322 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002323 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002324 } else if (!PyErr_Occurred()) {
2325 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00002326 char message[128] = "undefined entity ";
2327 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002328 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02002329 XML_ERROR_UNDEFINED_ENTITY,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002330 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02002331 EXPAT(GetErrorColumnNumber)(self->parser),
2332 message
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002333 );
2334 }
2335
2336 Py_DECREF(key);
2337}
2338
2339static void
2340expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2341 const XML_Char **attrib_in)
2342{
2343 PyObject* res;
2344 PyObject* tag;
2345 PyObject* attrib;
2346 int ok;
2347
2348 /* tag name */
2349 tag = makeuniversal(self, tag_in);
2350 if (!tag)
2351 return; /* parser will look for errors */
2352
2353 /* attributes */
2354 if (attrib_in[0]) {
2355 attrib = PyDict_New();
2356 if (!attrib)
2357 return;
2358 while (attrib_in[0] && attrib_in[1]) {
2359 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00002360 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002361 if (!key || !value) {
2362 Py_XDECREF(value);
2363 Py_XDECREF(key);
2364 Py_DECREF(attrib);
2365 return;
2366 }
2367 ok = PyDict_SetItem(attrib, key, value);
2368 Py_DECREF(value);
2369 Py_DECREF(key);
2370 if (ok < 0) {
2371 Py_DECREF(attrib);
2372 return;
2373 }
2374 attrib_in += 2;
2375 }
2376 } else {
2377 Py_INCREF(Py_None);
2378 attrib = Py_None;
2379 }
2380
2381 if (TreeBuilder_CheckExact(self->target))
2382 /* shortcut */
2383 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2384 tag, attrib);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002385 else if (self->handle_start) {
2386 if (attrib == Py_None) {
2387 Py_DECREF(attrib);
2388 attrib = PyDict_New();
2389 if (!attrib)
2390 return;
2391 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002392 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002393 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002394 res = NULL;
2395
2396 Py_DECREF(tag);
2397 Py_DECREF(attrib);
2398
2399 Py_XDECREF(res);
2400}
2401
2402static void
2403expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2404 int data_len)
2405{
2406 PyObject* data;
2407 PyObject* res;
2408
Neal Norwitz0269b912007-08-08 06:56:02 +00002409 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002410 if (!data)
2411 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002412
2413 if (TreeBuilder_CheckExact(self->target))
2414 /* shortcut */
2415 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
2416 else if (self->handle_data)
2417 res = PyObject_CallFunction(self->handle_data, "O", data);
2418 else
2419 res = NULL;
2420
2421 Py_DECREF(data);
2422
2423 Py_XDECREF(res);
2424}
2425
2426static void
2427expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
2428{
2429 PyObject* tag;
2430 PyObject* res = NULL;
2431
2432 if (TreeBuilder_CheckExact(self->target))
2433 /* shortcut */
2434 /* the standard tree builder doesn't look at the end tag */
2435 res = treebuilder_handle_end(
2436 (TreeBuilderObject*) self->target, Py_None
2437 );
2438 else if (self->handle_end) {
2439 tag = makeuniversal(self, tag_in);
2440 if (tag) {
2441 res = PyObject_CallFunction(self->handle_end, "O", tag);
2442 Py_DECREF(tag);
2443 }
2444 }
2445
2446 Py_XDECREF(res);
2447}
2448
2449static void
2450expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
2451 const XML_Char *uri)
2452{
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002453 PyObject* sprefix = NULL;
2454 PyObject* suri = NULL;
2455
2456 suri = PyUnicode_DecodeUTF8(uri, strlen(uri), "strict");
2457 if (!suri)
2458 return;
2459
2460 if (prefix)
2461 sprefix = PyUnicode_DecodeUTF8(prefix, strlen(prefix), "strict");
2462 else
2463 sprefix = PyUnicode_FromString("");
2464 if (!sprefix) {
2465 Py_DECREF(suri);
2466 return;
2467 }
2468
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002469 treebuilder_handle_namespace(
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002470 (TreeBuilderObject*) self->target, 1, sprefix, suri
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002471 );
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002472
2473 Py_DECREF(sprefix);
2474 Py_DECREF(suri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002475}
2476
2477static void
2478expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
2479{
2480 treebuilder_handle_namespace(
2481 (TreeBuilderObject*) self->target, 0, NULL, NULL
2482 );
2483}
2484
2485static void
2486expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
2487{
2488 PyObject* comment;
2489 PyObject* res;
2490
2491 if (self->handle_comment) {
Neal Norwitz0269b912007-08-08 06:56:02 +00002492 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002493 if (comment) {
2494 res = PyObject_CallFunction(self->handle_comment, "O", comment);
2495 Py_XDECREF(res);
2496 Py_DECREF(comment);
2497 }
2498 }
2499}
2500
2501static void
2502expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
2503 const XML_Char* data_in)
2504{
2505 PyObject* target;
2506 PyObject* data;
2507 PyObject* res;
2508
2509 if (self->handle_pi) {
Neal Norwitz0269b912007-08-08 06:56:02 +00002510 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
2511 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002512 if (target && data) {
2513 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
2514 Py_XDECREF(res);
2515 Py_DECREF(data);
2516 Py_DECREF(target);
2517 } else {
2518 Py_XDECREF(data);
2519 Py_XDECREF(target);
2520 }
2521 }
2522}
2523
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002524static int
2525expat_unknown_encoding_handler(XMLParserObject *self, const XML_Char *name,
2526 XML_Encoding *info)
2527{
2528 PyObject* u;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002529 unsigned char s[256];
2530 int i;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002531 void *data;
2532 unsigned int kind;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002533
2534 memset(info, 0, sizeof(XML_Encoding));
2535
2536 for (i = 0; i < 256; i++)
2537 s[i] = i;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002538
Fredrik Lundhc3389992005-12-25 11:40:19 +00002539 u = PyUnicode_Decode((char*) s, 256, name, "replace");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002540 if (!u)
2541 return XML_STATUS_ERROR;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002542 if (PyUnicode_READY(u))
2543 return XML_STATUS_ERROR;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002544
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002545 if (PyUnicode_GET_LENGTH(u) != 256) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002546 Py_DECREF(u);
2547 return XML_STATUS_ERROR;
2548 }
2549
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002550 kind = PyUnicode_KIND(u);
2551 data = PyUnicode_DATA(u);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002552 for (i = 0; i < 256; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002553 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
2554 if (ch != Py_UNICODE_REPLACEMENT_CHARACTER)
2555 info->map[i] = ch;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002556 else
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002557 info->map[i] = -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002558 }
2559
2560 Py_DECREF(u);
2561
2562 return XML_STATUS_OK;
2563}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002564
2565/* -------------------------------------------------------------------- */
2566/* constructor and destructor */
2567
2568static PyObject*
Thomas Wouters73e5a5b2006-06-08 15:35:45 +00002569xmlparser(PyObject* self_, PyObject* args, PyObject* kw)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002570{
2571 XMLParserObject* self;
2572 /* FIXME: does this need to be static? */
2573 static XML_Memory_Handling_Suite memory_handler;
2574
2575 PyObject* target = NULL;
2576 char* encoding = NULL;
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +00002577 static char* kwlist[] = { "target", "encoding", NULL };
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002578 if (!PyArg_ParseTupleAndKeywords(args, kw, "|Oz:XMLParser", kwlist,
2579 &target, &encoding))
2580 return NULL;
2581
2582#if defined(USE_PYEXPAT_CAPI)
2583 if (!expat_capi) {
2584 PyErr_SetString(
2585 PyExc_RuntimeError, "cannot load dispatch table from pyexpat"
2586 );
2587 return NULL;
2588 }
2589#endif
2590
2591 self = PyObject_New(XMLParserObject, &XMLParser_Type);
2592 if (self == NULL)
2593 return NULL;
2594
2595 self->entity = PyDict_New();
2596 if (!self->entity) {
2597 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002598 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002599 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002600
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002601 self->names = PyDict_New();
2602 if (!self->names) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002603 PyObject_Del(self->entity);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002604 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002605 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002606 }
2607
2608 memory_handler.malloc_fcn = PyObject_Malloc;
2609 memory_handler.realloc_fcn = PyObject_Realloc;
2610 memory_handler.free_fcn = PyObject_Free;
2611
2612 self->parser = EXPAT(ParserCreate_MM)(encoding, &memory_handler, "}");
2613 if (!self->parser) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002614 PyObject_Del(self->names);
2615 PyObject_Del(self->entity);
2616 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002617 PyErr_NoMemory();
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002618 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002619 }
2620
2621 /* setup target handlers */
2622 if (!target) {
2623 target = treebuilder_new();
2624 if (!target) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002625 EXPAT(ParserFree)(self->parser);
2626 PyObject_Del(self->names);
2627 PyObject_Del(self->entity);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002628 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002629 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002630 }
2631 } else
2632 Py_INCREF(target);
2633 self->target = target;
2634
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002635 self->handle_start = PyObject_GetAttrString(target, "start");
2636 self->handle_data = PyObject_GetAttrString(target, "data");
2637 self->handle_end = PyObject_GetAttrString(target, "end");
2638 self->handle_comment = PyObject_GetAttrString(target, "comment");
2639 self->handle_pi = PyObject_GetAttrString(target, "pi");
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002640 self->handle_close = PyObject_GetAttrString(target, "close");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002641
2642 PyErr_Clear();
2643
2644 /* configure parser */
2645 EXPAT(SetUserData)(self->parser, self);
2646 EXPAT(SetElementHandler)(
2647 self->parser,
2648 (XML_StartElementHandler) expat_start_handler,
2649 (XML_EndElementHandler) expat_end_handler
2650 );
2651 EXPAT(SetDefaultHandlerExpand)(
2652 self->parser,
2653 (XML_DefaultHandler) expat_default_handler
2654 );
2655 EXPAT(SetCharacterDataHandler)(
2656 self->parser,
2657 (XML_CharacterDataHandler) expat_data_handler
2658 );
2659 if (self->handle_comment)
2660 EXPAT(SetCommentHandler)(
2661 self->parser,
2662 (XML_CommentHandler) expat_comment_handler
2663 );
2664 if (self->handle_pi)
2665 EXPAT(SetProcessingInstructionHandler)(
2666 self->parser,
2667 (XML_ProcessingInstructionHandler) expat_pi_handler
2668 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002669 EXPAT(SetUnknownEncodingHandler)(
2670 self->parser,
2671 (XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL
2672 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002673
2674 ALLOC(sizeof(XMLParserObject), "create expatparser");
2675
2676 return (PyObject*) self;
2677}
2678
2679static void
2680xmlparser_dealloc(XMLParserObject* self)
2681{
2682 EXPAT(ParserFree)(self->parser);
2683
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002684 Py_XDECREF(self->handle_close);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002685 Py_XDECREF(self->handle_pi);
2686 Py_XDECREF(self->handle_comment);
2687 Py_XDECREF(self->handle_end);
2688 Py_XDECREF(self->handle_data);
2689 Py_XDECREF(self->handle_start);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002690
2691 Py_DECREF(self->target);
2692 Py_DECREF(self->entity);
2693 Py_DECREF(self->names);
2694
2695 RELEASE(sizeof(XMLParserObject), "destroy expatparser");
2696
2697 PyObject_Del(self);
2698}
2699
2700/* -------------------------------------------------------------------- */
2701/* methods (in alphabetical order) */
2702
2703LOCAL(PyObject*)
2704expat_parse(XMLParserObject* self, char* data, int data_len, int final)
2705{
2706 int ok;
2707
2708 ok = EXPAT(Parse)(self->parser, data, data_len, final);
2709
2710 if (PyErr_Occurred())
2711 return NULL;
2712
2713 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002714 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02002715 EXPAT(GetErrorCode)(self->parser),
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002716 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02002717 EXPAT(GetErrorColumnNumber)(self->parser),
2718 NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002719 );
2720 return NULL;
2721 }
2722
2723 Py_RETURN_NONE;
2724}
2725
2726static PyObject*
2727xmlparser_close(XMLParserObject* self, PyObject* args)
2728{
2729 /* end feeding data to parser */
2730
2731 PyObject* res;
2732 if (!PyArg_ParseTuple(args, ":close"))
2733 return NULL;
2734
2735 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002736 if (!res)
2737 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002738
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002739 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002740 Py_DECREF(res);
2741 return treebuilder_done((TreeBuilderObject*) self->target);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002742 } if (self->handle_close) {
2743 Py_DECREF(res);
2744 return PyObject_CallFunction(self->handle_close, "");
2745 } else
2746 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002747}
2748
2749static PyObject*
2750xmlparser_feed(XMLParserObject* self, PyObject* args)
2751{
2752 /* feed data to parser */
2753
2754 char* data;
2755 int data_len;
2756 if (!PyArg_ParseTuple(args, "s#:feed", &data, &data_len))
2757 return NULL;
2758
2759 return expat_parse(self, data, data_len, 0);
2760}
2761
2762static PyObject*
2763xmlparser_parse(XMLParserObject* self, PyObject* args)
2764{
2765 /* (internal) parse until end of input stream */
2766
2767 PyObject* reader;
2768 PyObject* buffer;
Eli Benderskyf996e772012-03-16 05:53:30 +02002769 PyObject* temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002770 PyObject* res;
2771
2772 PyObject* fileobj;
2773 if (!PyArg_ParseTuple(args, "O:_parse", &fileobj))
2774 return NULL;
2775
2776 reader = PyObject_GetAttrString(fileobj, "read");
2777 if (!reader)
2778 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002779
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002780 /* read from open file object */
2781 for (;;) {
2782
2783 buffer = PyObject_CallFunction(reader, "i", 64*1024);
2784
2785 if (!buffer) {
2786 /* read failed (e.g. due to KeyboardInterrupt) */
2787 Py_DECREF(reader);
2788 return NULL;
2789 }
2790
Eli Benderskyf996e772012-03-16 05:53:30 +02002791 if (PyUnicode_CheckExact(buffer)) {
2792 /* A unicode object is encoded into bytes using UTF-8 */
2793 if (PyUnicode_GET_SIZE(buffer) == 0) {
2794 Py_DECREF(buffer);
2795 break;
2796 }
2797 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
2798 if (!temp) {
2799 /* Propagate exception from PyUnicode_AsEncodedString */
2800 Py_DECREF(buffer);
2801 Py_DECREF(reader);
2802 return NULL;
2803 }
2804
2805 /* Here we no longer need the original buffer since it contains
2806 * unicode. Make it point to the encoded bytes object.
2807 */
2808 Py_DECREF(buffer);
2809 buffer = temp;
2810 }
2811 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002812 Py_DECREF(buffer);
2813 break;
2814 }
2815
2816 res = expat_parse(
Christian Heimes72b710a2008-05-26 13:28:38 +00002817 self, PyBytes_AS_STRING(buffer), PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002818 );
2819
2820 Py_DECREF(buffer);
2821
2822 if (!res) {
2823 Py_DECREF(reader);
2824 return NULL;
2825 }
2826 Py_DECREF(res);
2827
2828 }
2829
2830 Py_DECREF(reader);
2831
2832 res = expat_parse(self, "", 0, 1);
2833
2834 if (res && TreeBuilder_CheckExact(self->target)) {
2835 Py_DECREF(res);
2836 return treebuilder_done((TreeBuilderObject*) self->target);
2837 }
2838
2839 return res;
2840}
2841
2842static PyObject*
2843xmlparser_setevents(XMLParserObject* self, PyObject* args)
2844{
2845 /* activate element event reporting */
2846
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002847 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002848 TreeBuilderObject* target;
2849
2850 PyObject* events; /* event collector */
2851 PyObject* event_set = Py_None;
2852 if (!PyArg_ParseTuple(args, "O!|O:_setevents", &PyList_Type, &events,
2853 &event_set))
2854 return NULL;
2855
2856 if (!TreeBuilder_CheckExact(self->target)) {
2857 PyErr_SetString(
2858 PyExc_TypeError,
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01002859 "event handling only supported for ElementTree.TreeBuilder "
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002860 "targets"
2861 );
2862 return NULL;
2863 }
2864
2865 target = (TreeBuilderObject*) self->target;
2866
2867 Py_INCREF(events);
2868 Py_XDECREF(target->events);
2869 target->events = events;
2870
2871 /* clear out existing events */
2872 Py_XDECREF(target->start_event_obj); target->start_event_obj = NULL;
2873 Py_XDECREF(target->end_event_obj); target->end_event_obj = NULL;
2874 Py_XDECREF(target->start_ns_event_obj); target->start_ns_event_obj = NULL;
2875 Py_XDECREF(target->end_ns_event_obj); target->end_ns_event_obj = NULL;
2876
2877 if (event_set == Py_None) {
2878 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002879 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002880 Py_RETURN_NONE;
2881 }
2882
2883 if (!PyTuple_Check(event_set)) /* FIXME: handle arbitrary sequences */
2884 goto error;
2885
2886 for (i = 0; i < PyTuple_GET_SIZE(event_set); i++) {
2887 PyObject* item = PyTuple_GET_ITEM(event_set, i);
2888 char* event;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002889 if (PyUnicode_Check(item)) {
2890 event = _PyUnicode_AsString(item);
Victor Stinner0477bf32010-03-22 12:11:44 +00002891 if (event == NULL)
2892 goto error;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002893 } else if (PyBytes_Check(item))
2894 event = PyBytes_AS_STRING(item);
2895 else {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002896 goto error;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002897 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002898 if (strcmp(event, "start") == 0) {
2899 Py_INCREF(item);
2900 target->start_event_obj = item;
2901 } else if (strcmp(event, "end") == 0) {
2902 Py_INCREF(item);
2903 Py_XDECREF(target->end_event_obj);
2904 target->end_event_obj = item;
2905 } else if (strcmp(event, "start-ns") == 0) {
2906 Py_INCREF(item);
2907 Py_XDECREF(target->start_ns_event_obj);
2908 target->start_ns_event_obj = item;
2909 EXPAT(SetNamespaceDeclHandler)(
2910 self->parser,
2911 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2912 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2913 );
2914 } else if (strcmp(event, "end-ns") == 0) {
2915 Py_INCREF(item);
2916 Py_XDECREF(target->end_ns_event_obj);
2917 target->end_ns_event_obj = item;
2918 EXPAT(SetNamespaceDeclHandler)(
2919 self->parser,
2920 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2921 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2922 );
2923 } else {
2924 PyErr_Format(
2925 PyExc_ValueError,
2926 "unknown event '%s'", event
2927 );
2928 return NULL;
2929 }
2930 }
2931
2932 Py_RETURN_NONE;
2933
2934 error:
2935 PyErr_SetString(
2936 PyExc_TypeError,
2937 "invalid event tuple"
2938 );
2939 return NULL;
2940}
2941
2942static PyMethodDef xmlparser_methods[] = {
2943 {"feed", (PyCFunction) xmlparser_feed, METH_VARARGS},
2944 {"close", (PyCFunction) xmlparser_close, METH_VARARGS},
2945 {"_parse", (PyCFunction) xmlparser_parse, METH_VARARGS},
2946 {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
2947 {NULL, NULL}
2948};
2949
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002950static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002951xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002952{
Alexander Belopolskye239d232010-12-08 23:31:48 +00002953 if (PyUnicode_Check(nameobj)) {
2954 PyObject* res;
2955 if (PyUnicode_CompareWithASCIIString(nameobj, "entity") == 0)
2956 res = self->entity;
2957 else if (PyUnicode_CompareWithASCIIString(nameobj, "target") == 0)
2958 res = self->target;
2959 else if (PyUnicode_CompareWithASCIIString(nameobj, "version") == 0) {
2960 return PyUnicode_FromFormat(
2961 "Expat %d.%d.%d", XML_MAJOR_VERSION,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002962 XML_MINOR_VERSION, XML_MICRO_VERSION);
Alexander Belopolskye239d232010-12-08 23:31:48 +00002963 }
2964 else
2965 goto generic;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002966
Alexander Belopolskye239d232010-12-08 23:31:48 +00002967 Py_INCREF(res);
2968 return res;
2969 }
2970 generic:
2971 return PyObject_GenericGetAttr((PyObject*) self, nameobj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002972}
2973
Neal Norwitz227b5332006-03-22 09:28:35 +00002974static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002975 PyVarObject_HEAD_INIT(NULL, 0)
2976 "XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002977 /* methods */
2978 (destructor)xmlparser_dealloc, /* tp_dealloc */
2979 0, /* tp_print */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002980 0, /* tp_getattr */
2981 0, /* tp_setattr */
Mark Dickinsone94c6792009-02-02 20:36:42 +00002982 0, /* tp_reserved */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002983 0, /* tp_repr */
2984 0, /* tp_as_number */
2985 0, /* tp_as_sequence */
2986 0, /* tp_as_mapping */
2987 0, /* tp_hash */
2988 0, /* tp_call */
2989 0, /* tp_str */
2990 (getattrofunc)xmlparser_getattro, /* tp_getattro */
2991 0, /* tp_setattro */
2992 0, /* tp_as_buffer */
2993 Py_TPFLAGS_DEFAULT, /* tp_flags */
2994 0, /* tp_doc */
2995 0, /* tp_traverse */
2996 0, /* tp_clear */
2997 0, /* tp_richcompare */
2998 0, /* tp_weaklistoffset */
2999 0, /* tp_iter */
3000 0, /* tp_iternext */
3001 xmlparser_methods, /* tp_methods */
3002 0, /* tp_members */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003003};
3004
3005#endif
3006
3007/* ==================================================================== */
3008/* python module interface */
3009
3010static PyMethodDef _functions[] = {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003011 {"SubElement", (PyCFunction) subelement, METH_VARARGS|METH_KEYWORDS},
3012 {"TreeBuilder", (PyCFunction) treebuilder, METH_VARARGS},
3013#if defined(USE_EXPAT)
3014 {"XMLParser", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003015#endif
3016 {NULL, NULL}
3017};
3018
Martin v. Löwis1a214512008-06-11 05:26:20 +00003019
3020static struct PyModuleDef _elementtreemodule = {
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003021 PyModuleDef_HEAD_INIT,
3022 "_elementtree",
3023 NULL,
3024 -1,
3025 _functions,
3026 NULL,
3027 NULL,
3028 NULL,
3029 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00003030};
3031
Neal Norwitzf6657e62006-12-28 04:47:50 +00003032PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00003033PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003034{
3035 PyObject* m;
3036 PyObject* g;
3037 char* bootstrap;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003038
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003039 /* Initialize object types */
3040 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003041 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003042 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003043 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003044#if defined(USE_EXPAT)
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003045 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003046 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003047#endif
3048
Martin v. Löwis1a214512008-06-11 05:26:20 +00003049 m = PyModule_Create(&_elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003050 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00003051 return NULL;
3052
3053 /* The code below requires that the module gets already added
3054 to sys.modules. */
3055 PyDict_SetItemString(PyImport_GetModuleDict(),
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003056 _elementtreemodule.m_name,
3057 m);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003058
3059 /* python glue code */
3060
3061 g = PyDict_New();
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003062 if (!g)
Martin v. Löwis1a214512008-06-11 05:26:20 +00003063 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003064
3065 PyDict_SetItemString(g, "__builtins__", PyEval_GetBuiltins());
3066
3067 bootstrap = (
3068
Florent Xiclunaf4bdf4e2012-02-11 11:28:16 +01003069 "from copy import deepcopy\n"
3070 "from xml.etree import ElementPath\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003071
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003072 "def iter(node, tag=None):\n" /* helper */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003073 " if tag == '*':\n"
3074 " tag = None\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003075 " if tag is None or node.tag == tag:\n"
3076 " yield node\n"
3077 " for node in node:\n"
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003078 " for node in iter(node, tag):\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003079 " yield node\n"
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003080
3081 "def itertext(node):\n" /* helper */
3082 " if node.text:\n"
3083 " yield node.text\n"
3084 " for e in node:\n"
3085 " for s in e.itertext():\n"
3086 " yield s\n"
3087 " if e.tail:\n"
3088 " yield e.tail\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003089
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003090 );
3091
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003092 if (!PyRun_String(bootstrap, Py_file_input, g, NULL))
3093 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003094
3095 elementpath_obj = PyDict_GetItemString(g, "ElementPath");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003096 elementtree_deepcopy_obj = PyDict_GetItemString(g, "deepcopy");
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003097 elementtree_iter_obj = PyDict_GetItemString(g, "iter");
3098 elementtree_itertext_obj = PyDict_GetItemString(g, "itertext");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003099
3100#if defined(USE_PYEXPAT_CAPI)
3101 /* link against pyexpat, if possible */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003102 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
3103 if (expat_capi) {
3104 /* check that it's usable */
3105 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
3106 expat_capi->size < sizeof(struct PyExpat_CAPI) ||
3107 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
3108 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
3109 expat_capi->MICRO_VERSION != XML_MICRO_VERSION)
3110 expat_capi = NULL;
3111 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003112#endif
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003113
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003114 elementtree_parseerror_obj = PyErr_NewException(
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003115 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003116 );
3117 Py_INCREF(elementtree_parseerror_obj);
3118 PyModule_AddObject(m, "ParseError", elementtree_parseerror_obj);
3119
Eli Bendersky092af1f2012-03-04 07:14:03 +02003120 Py_INCREF((PyObject *)&Element_Type);
3121 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
3122
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003123 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003124}