blob: 788772113c4e0d12b5fd94fe9eacda74e8f1025a [file] [log] [blame]
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001/*
2 * ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003 * $Id: _elementtree.c 3473 2009-01-11 22:53:55Z fredrik $
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
5 * elementtree accelerator
6 *
7 * History:
8 * 1999-06-20 fl created (as part of sgmlop)
9 * 2001-05-29 fl effdom edition
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000010 * 2003-02-27 fl elementtree edition (alpha)
11 * 2004-06-03 fl updates for elementtree 1.2
Florent Xiclunaf15351d2010-03-13 23:24:31 +000012 * 2005-01-05 fl major optimization effort
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000013 * 2005-01-11 fl first public release (cElementTree 0.8)
14 * 2005-01-12 fl split element object into base and extras
15 * 2005-01-13 fl use tagged pointers for tail/text (cElementTree 0.9)
16 * 2005-01-17 fl added treebuilder close method
17 * 2005-01-17 fl fixed crash in getchildren
18 * 2005-01-18 fl removed observer api, added iterparse (cElementTree 0.9.3)
19 * 2005-01-23 fl revised iterparse api; added namespace event support (0.9.8)
20 * 2005-01-26 fl added VERSION module property (cElementTree 1.0)
21 * 2005-01-28 fl added remove method (1.0.1)
22 * 2005-03-01 fl added iselement function; fixed makeelement aliasing (1.0.2)
23 * 2005-03-13 fl export Comment and ProcessingInstruction/PI helpers
24 * 2005-03-26 fl added Comment and PI support to XMLParser
25 * 2005-03-27 fl event optimizations; complain about bogus events
26 * 2005-08-08 fl fixed read error handling in parse
27 * 2005-08-11 fl added runtime test for copy workaround (1.0.3)
28 * 2005-12-13 fl added expat_capi support (for xml.etree) (1.0.4)
29 * 2005-12-16 fl added support for non-standard encodings
Fredrik Lundh44ed4db2006-03-12 21:06:35 +000030 * 2006-03-08 fl fixed a couple of potential null-refs and leaks
31 * 2006-03-12 fl merge in 2.5 ssize_t changes
Florent Xiclunaf15351d2010-03-13 23:24:31 +000032 * 2007-08-25 fl call custom builder's close method from XMLParser
33 * 2007-08-31 fl added iter, extend from ET 1.3
34 * 2007-09-01 fl fixed ParseError exception, setslice source type, etc
35 * 2007-09-03 fl fixed handling of negative insert indexes
36 * 2007-09-04 fl added itertext from ET 1.3
37 * 2007-09-06 fl added position attribute to ParseError exception
38 * 2008-06-06 fl delay error reporting in iterparse (from Hrvoje Niksic)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000039 *
Florent Xiclunaf15351d2010-03-13 23:24:31 +000040 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
41 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000042 *
43 * info@pythonware.com
44 * http://www.pythonware.com
45 */
46
Fredrik Lundh6d52b552005-12-16 22:06:43 +000047/* Licensed to PSF under a Contributor Agreement. */
Florent Xiclunaf15351d2010-03-13 23:24:31 +000048/* See http://www.python.org/psf/license for licensing details. */
Fredrik Lundh6d52b552005-12-16 22:06:43 +000049
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000050#include "Python.h"
51
Thomas Wouters00ee7ba2006-08-21 19:07:27 +000052#define VERSION "1.0.6"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000053
54/* -------------------------------------------------------------------- */
55/* configuration */
56
57/* Leave defined to include the expat-based XMLParser type */
58#define USE_EXPAT
59
Florent Xiclunaf15351d2010-03-13 23:24:31 +000060/* Define to do all expat calls via pyexpat's embedded expat library */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000061/* #define USE_PYEXPAT_CAPI */
62
63/* An element can hold this many children without extra memory
64 allocations. */
65#define STATIC_CHILDREN 4
66
67/* For best performance, chose a value so that 80-90% of all nodes
68 have no more than the given number of children. Set this to zero
69 to minimize the size of the element structure itself (this only
70 helps if you have lots of leaf nodes with attributes). */
71
72/* Also note that pymalloc always allocates blocks in multiples of
73 eight bytes. For the current version of cElementTree, this means
74 that the number of children should be an even number, at least on
75 32-bit platforms. */
76
77/* -------------------------------------------------------------------- */
78
79#if 0
80static int memory = 0;
81#define ALLOC(size, comment)\
82do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
83#define RELEASE(size, comment)\
84do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
85#else
86#define ALLOC(size, comment)
87#define RELEASE(size, comment)
88#endif
89
90/* compiler tweaks */
91#if defined(_MSC_VER)
92#define LOCAL(type) static __inline type __fastcall
93#else
94#define LOCAL(type) static type
95#endif
96
Florent Xiclunaf15351d2010-03-13 23:24:31 +000097/* compatibility macros */
98#if (PY_VERSION_HEX < 0x02060000)
99#define Py_REFCNT(ob) (((PyObject*)(ob))->ob_refcnt)
100#define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)
101#endif
102
103#if (PY_VERSION_HEX < 0x02050000)
104typedef int Py_ssize_t;
105#define lenfunc inquiry
106#endif
107
108#if (PY_VERSION_HEX < 0x02040000)
109#define PyDict_CheckExact PyDict_Check
110
111#if !defined(Py_RETURN_NONE)
112#define Py_RETURN_NONE return Py_INCREF(Py_None), Py_None
113#endif
114#endif
115
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000116/* macros used to store 'join' flags in string object pointers. note
117 that all use of text and tail as object pointers must be wrapped in
118 JOIN_OBJ. see comments in the ElementObject definition for more
119 info. */
120#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
121#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
122#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~1))
123
124/* glue functions (see the init function for details) */
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000125static PyObject* elementtree_parseerror_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000126static PyObject* elementtree_copyelement_obj;
127static PyObject* elementtree_deepcopy_obj;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000128static PyObject* elementtree_iter_obj;
129static PyObject* elementtree_itertext_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000130static PyObject* elementpath_obj;
131
132/* helpers */
133
134LOCAL(PyObject*)
135deepcopy(PyObject* object, PyObject* memo)
136{
137 /* do a deep copy of the given object */
138
139 PyObject* args;
140 PyObject* result;
141
142 if (!elementtree_deepcopy_obj) {
143 PyErr_SetString(
144 PyExc_RuntimeError,
145 "deepcopy helper not found"
146 );
147 return NULL;
148 }
149
150 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000151 if (!args)
152 return NULL;
153
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000154 Py_INCREF(object); PyTuple_SET_ITEM(args, 0, (PyObject*) object);
155 Py_INCREF(memo); PyTuple_SET_ITEM(args, 1, (PyObject*) memo);
156
157 result = PyObject_CallObject(elementtree_deepcopy_obj, args);
158
159 Py_DECREF(args);
160
161 return result;
162}
163
164LOCAL(PyObject*)
165list_join(PyObject* list)
166{
167 /* join list elements (destroying the list in the process) */
168
169 PyObject* joiner;
170 PyObject* function;
171 PyObject* args;
172 PyObject* result;
173
174 switch (PyList_GET_SIZE(list)) {
175 case 0:
176 Py_DECREF(list);
Christian Heimes72b710a2008-05-26 13:28:38 +0000177 return PyBytes_FromString("");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000178 case 1:
179 result = PyList_GET_ITEM(list, 0);
180 Py_INCREF(result);
181 Py_DECREF(list);
182 return result;
183 }
184
185 /* two or more elements: slice out a suitable separator from the
186 first member, and use that to join the entire list */
187
188 joiner = PySequence_GetSlice(PyList_GET_ITEM(list, 0), 0, 0);
189 if (!joiner)
190 return NULL;
191
192 function = PyObject_GetAttrString(joiner, "join");
193 if (!function) {
194 Py_DECREF(joiner);
195 return NULL;
196 }
197
198 args = PyTuple_New(1);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000199 if (!args)
200 return NULL;
201
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000202 PyTuple_SET_ITEM(args, 0, list);
203
204 result = PyObject_CallObject(function, args);
205
206 Py_DECREF(args); /* also removes list */
207 Py_DECREF(function);
208 Py_DECREF(joiner);
209
210 return result;
211}
212
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000213/* -------------------------------------------------------------------- */
214/* the element type */
215
216typedef struct {
217
218 /* attributes (a dictionary object), or None if no attributes */
219 PyObject* attrib;
220
221 /* child elements */
222 int length; /* actual number of items */
223 int allocated; /* allocated items */
224
225 /* this either points to _children or to a malloced buffer */
226 PyObject* *children;
227
228 PyObject* _children[STATIC_CHILDREN];
229
230} ElementObjectExtra;
231
232typedef struct {
233 PyObject_HEAD
234
235 /* element tag (a string). */
236 PyObject* tag;
237
238 /* text before first child. note that this is a tagged pointer;
239 use JOIN_OBJ to get the object pointer. the join flag is used
240 to distinguish lists created by the tree builder from lists
241 assigned to the attribute by application code; the former
242 should be joined before being returned to the user, the latter
243 should be left intact. */
244 PyObject* text;
245
246 /* text after this element, in parent. note that this is a tagged
247 pointer; use JOIN_OBJ to get the object pointer. */
248 PyObject* tail;
249
250 ElementObjectExtra* extra;
251
252} ElementObject;
253
Neal Norwitz227b5332006-03-22 09:28:35 +0000254static PyTypeObject Element_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000255
Christian Heimes90aa7642007-12-19 02:45:37 +0000256#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000257
258/* -------------------------------------------------------------------- */
259/* element constructor and destructor */
260
261LOCAL(int)
262element_new_extra(ElementObject* self, PyObject* attrib)
263{
264 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
265 if (!self->extra)
266 return -1;
267
268 if (!attrib)
269 attrib = Py_None;
270
271 Py_INCREF(attrib);
272 self->extra->attrib = attrib;
273
274 self->extra->length = 0;
275 self->extra->allocated = STATIC_CHILDREN;
276 self->extra->children = self->extra->_children;
277
278 return 0;
279}
280
281LOCAL(void)
282element_dealloc_extra(ElementObject* self)
283{
284 int i;
285
286 Py_DECREF(self->extra->attrib);
287
288 for (i = 0; i < self->extra->length; i++)
289 Py_DECREF(self->extra->children[i]);
290
291 if (self->extra->children != self->extra->_children)
292 PyObject_Free(self->extra->children);
293
294 PyObject_Free(self->extra);
295}
296
297LOCAL(PyObject*)
298element_new(PyObject* tag, PyObject* attrib)
299{
300 ElementObject* self;
301
302 self = PyObject_New(ElementObject, &Element_Type);
303 if (self == NULL)
304 return NULL;
305
306 /* use None for empty dictionaries */
307 if (PyDict_CheckExact(attrib) && !PyDict_Size(attrib))
308 attrib = Py_None;
309
310 self->extra = NULL;
311
312 if (attrib != Py_None) {
313
Thomas Wouters477c8d52006-05-27 19:21:47 +0000314 if (element_new_extra(self, attrib) < 0) {
315 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000316 return NULL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000317 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000318
319 self->extra->length = 0;
320 self->extra->allocated = STATIC_CHILDREN;
321 self->extra->children = self->extra->_children;
322
323 }
324
325 Py_INCREF(tag);
326 self->tag = tag;
327
328 Py_INCREF(Py_None);
329 self->text = Py_None;
330
331 Py_INCREF(Py_None);
332 self->tail = Py_None;
333
334 ALLOC(sizeof(ElementObject), "create element");
335
336 return (PyObject*) self;
337}
338
339LOCAL(int)
340element_resize(ElementObject* self, int extra)
341{
342 int size;
343 PyObject* *children;
344
345 /* make sure self->children can hold the given number of extra
346 elements. set an exception and return -1 if allocation failed */
347
348 if (!self->extra)
349 element_new_extra(self, NULL);
350
351 size = self->extra->length + extra;
352
353 if (size > self->extra->allocated) {
354 /* use Python 2.4's list growth strategy */
355 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000356 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
357 * which needs at least 4 bytes.
358 * Although it's a false alarm always assume at least one child to
359 * be safe.
360 */
361 size = size ? size : 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000362 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000363 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
364 * "children", which needs at least 4 bytes. Although it's a
365 * false alarm always assume at least one child to be safe.
366 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000367 children = PyObject_Realloc(self->extra->children,
368 size * sizeof(PyObject*));
369 if (!children)
370 goto nomemory;
371 } else {
372 children = PyObject_Malloc(size * sizeof(PyObject*));
373 if (!children)
374 goto nomemory;
375 /* copy existing children from static area to malloc buffer */
376 memcpy(children, self->extra->children,
377 self->extra->length * sizeof(PyObject*));
378 }
379 self->extra->children = children;
380 self->extra->allocated = size;
381 }
382
383 return 0;
384
385 nomemory:
386 PyErr_NoMemory();
387 return -1;
388}
389
390LOCAL(int)
391element_add_subelement(ElementObject* self, PyObject* element)
392{
393 /* add a child element to a parent */
394
395 if (element_resize(self, 1) < 0)
396 return -1;
397
398 Py_INCREF(element);
399 self->extra->children[self->extra->length] = element;
400
401 self->extra->length++;
402
403 return 0;
404}
405
406LOCAL(PyObject*)
407element_get_attrib(ElementObject* self)
408{
409 /* return borrowed reference to attrib dictionary */
410 /* note: this function assumes that the extra section exists */
411
412 PyObject* res = self->extra->attrib;
413
414 if (res == Py_None) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000415 Py_DECREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000416 /* create missing dictionary */
417 res = PyDict_New();
418 if (!res)
419 return NULL;
420 self->extra->attrib = res;
421 }
422
423 return res;
424}
425
426LOCAL(PyObject*)
427element_get_text(ElementObject* self)
428{
429 /* return borrowed reference to text attribute */
430
431 PyObject* res = self->text;
432
433 if (JOIN_GET(res)) {
434 res = JOIN_OBJ(res);
435 if (PyList_CheckExact(res)) {
436 res = list_join(res);
437 if (!res)
438 return NULL;
439 self->text = res;
440 }
441 }
442
443 return res;
444}
445
446LOCAL(PyObject*)
447element_get_tail(ElementObject* self)
448{
449 /* return borrowed reference to text attribute */
450
451 PyObject* res = self->tail;
452
453 if (JOIN_GET(res)) {
454 res = JOIN_OBJ(res);
455 if (PyList_CheckExact(res)) {
456 res = list_join(res);
457 if (!res)
458 return NULL;
459 self->tail = res;
460 }
461 }
462
463 return res;
464}
465
466static PyObject*
467element(PyObject* self, PyObject* args, PyObject* kw)
468{
469 PyObject* elem;
470
471 PyObject* tag;
472 PyObject* attrib = NULL;
473 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag,
474 &PyDict_Type, &attrib))
475 return NULL;
476
477 if (attrib || kw) {
478 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
479 if (!attrib)
480 return NULL;
481 if (kw)
482 PyDict_Update(attrib, kw);
483 } else {
484 Py_INCREF(Py_None);
485 attrib = Py_None;
486 }
487
488 elem = element_new(tag, attrib);
489
490 Py_DECREF(attrib);
491
492 return elem;
493}
494
495static PyObject*
496subelement(PyObject* self, PyObject* args, PyObject* kw)
497{
498 PyObject* elem;
499
500 ElementObject* parent;
501 PyObject* tag;
502 PyObject* attrib = NULL;
503 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
504 &Element_Type, &parent, &tag,
505 &PyDict_Type, &attrib))
506 return NULL;
507
508 if (attrib || kw) {
509 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
510 if (!attrib)
511 return NULL;
512 if (kw)
513 PyDict_Update(attrib, kw);
514 } else {
515 Py_INCREF(Py_None);
516 attrib = Py_None;
517 }
518
519 elem = element_new(tag, attrib);
520
521 Py_DECREF(attrib);
522
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000523 if (element_add_subelement(parent, elem) < 0) {
524 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000525 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000526 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000527
528 return elem;
529}
530
531static void
532element_dealloc(ElementObject* self)
533{
534 if (self->extra)
535 element_dealloc_extra(self);
536
537 /* discard attributes */
538 Py_DECREF(self->tag);
539 Py_DECREF(JOIN_OBJ(self->text));
540 Py_DECREF(JOIN_OBJ(self->tail));
541
542 RELEASE(sizeof(ElementObject), "destroy element");
543
544 PyObject_Del(self);
545}
546
547/* -------------------------------------------------------------------- */
548/* methods (in alphabetical order) */
549
550static PyObject*
551element_append(ElementObject* self, PyObject* args)
552{
553 PyObject* element;
554 if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element))
555 return NULL;
556
557 if (element_add_subelement(self, element) < 0)
558 return NULL;
559
560 Py_RETURN_NONE;
561}
562
563static PyObject*
564element_clear(ElementObject* self, PyObject* args)
565{
566 if (!PyArg_ParseTuple(args, ":clear"))
567 return NULL;
568
569 if (self->extra) {
570 element_dealloc_extra(self);
571 self->extra = NULL;
572 }
573
574 Py_INCREF(Py_None);
575 Py_DECREF(JOIN_OBJ(self->text));
576 self->text = Py_None;
577
578 Py_INCREF(Py_None);
579 Py_DECREF(JOIN_OBJ(self->tail));
580 self->tail = Py_None;
581
582 Py_RETURN_NONE;
583}
584
585static PyObject*
586element_copy(ElementObject* self, PyObject* args)
587{
588 int i;
589 ElementObject* element;
590
591 if (!PyArg_ParseTuple(args, ":__copy__"))
592 return NULL;
593
594 element = (ElementObject*) element_new(
595 self->tag, (self->extra) ? self->extra->attrib : Py_None
596 );
597 if (!element)
598 return NULL;
599
600 Py_DECREF(JOIN_OBJ(element->text));
601 element->text = self->text;
602 Py_INCREF(JOIN_OBJ(element->text));
603
604 Py_DECREF(JOIN_OBJ(element->tail));
605 element->tail = self->tail;
606 Py_INCREF(JOIN_OBJ(element->tail));
607
608 if (self->extra) {
609
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000610 if (element_resize(element, self->extra->length) < 0) {
611 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000612 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000613 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000614
615 for (i = 0; i < self->extra->length; i++) {
616 Py_INCREF(self->extra->children[i]);
617 element->extra->children[i] = self->extra->children[i];
618 }
619
620 element->extra->length = self->extra->length;
621
622 }
623
624 return (PyObject*) element;
625}
626
627static PyObject*
628element_deepcopy(ElementObject* self, PyObject* args)
629{
630 int i;
631 ElementObject* element;
632 PyObject* tag;
633 PyObject* attrib;
634 PyObject* text;
635 PyObject* tail;
636 PyObject* id;
637
638 PyObject* memo;
639 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
640 return NULL;
641
642 tag = deepcopy(self->tag, memo);
643 if (!tag)
644 return NULL;
645
646 if (self->extra) {
647 attrib = deepcopy(self->extra->attrib, memo);
648 if (!attrib) {
649 Py_DECREF(tag);
650 return NULL;
651 }
652 } else {
653 Py_INCREF(Py_None);
654 attrib = Py_None;
655 }
656
657 element = (ElementObject*) element_new(tag, attrib);
658
659 Py_DECREF(tag);
660 Py_DECREF(attrib);
661
662 if (!element)
663 return NULL;
664
665 text = deepcopy(JOIN_OBJ(self->text), memo);
666 if (!text)
667 goto error;
668 Py_DECREF(element->text);
669 element->text = JOIN_SET(text, JOIN_GET(self->text));
670
671 tail = deepcopy(JOIN_OBJ(self->tail), memo);
672 if (!tail)
673 goto error;
674 Py_DECREF(element->tail);
675 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
676
677 if (self->extra) {
678
679 if (element_resize(element, self->extra->length) < 0)
680 goto error;
681
682 for (i = 0; i < self->extra->length; i++) {
683 PyObject* child = deepcopy(self->extra->children[i], memo);
684 if (!child) {
685 element->extra->length = i;
686 goto error;
687 }
688 element->extra->children[i] = child;
689 }
690
691 element->extra->length = self->extra->length;
692
693 }
694
695 /* add object to memo dictionary (so deepcopy won't visit it again) */
Christian Heimes217cfd12007-12-02 14:31:20 +0000696 id = PyLong_FromLong((Py_uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000697 if (!id)
698 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000699
700 i = PyDict_SetItem(memo, id, (PyObject*) element);
701
702 Py_DECREF(id);
703
704 if (i < 0)
705 goto error;
706
707 return (PyObject*) element;
708
709 error:
710 Py_DECREF(element);
711 return NULL;
712}
713
714LOCAL(int)
715checkpath(PyObject* tag)
716{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000717 Py_ssize_t i;
718 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000719
720 /* check if a tag contains an xpath character */
721
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000722#define PATHCHAR(ch) \
723 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000724
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000725 if (PyUnicode_Check(tag)) {
726 Py_UNICODE *p = PyUnicode_AS_UNICODE(tag);
727 for (i = 0; i < PyUnicode_GET_SIZE(tag); i++) {
728 if (p[i] == '{')
729 check = 0;
730 else if (p[i] == '}')
731 check = 1;
732 else if (check && PATHCHAR(p[i]))
733 return 1;
734 }
735 return 0;
736 }
Christian Heimes72b710a2008-05-26 13:28:38 +0000737 if (PyBytes_Check(tag)) {
738 char *p = PyBytes_AS_STRING(tag);
739 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000740 if (p[i] == '{')
741 check = 0;
742 else if (p[i] == '}')
743 check = 1;
744 else if (check && PATHCHAR(p[i]))
745 return 1;
746 }
747 return 0;
748 }
749
750 return 1; /* unknown type; might be path expression */
751}
752
753static PyObject*
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000754element_extend(ElementObject* self, PyObject* args)
755{
756 PyObject* seq;
757 Py_ssize_t i, seqlen = 0;
758
759 PyObject* seq_in;
760 if (!PyArg_ParseTuple(args, "O:extend", &seq_in))
761 return NULL;
762
763 seq = PySequence_Fast(seq_in, "");
764 if (!seq) {
765 PyErr_Format(
766 PyExc_TypeError,
767 "expected sequence, not \"%.200s\"", Py_TYPE(seq_in)->tp_name
768 );
769 return NULL;
770 }
771
772 seqlen = PySequence_Size(seq);
773 for (i = 0; i < seqlen; i++) {
774 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
775 if (element_add_subelement(self, element) < 0) {
776 Py_DECREF(seq);
777 return NULL;
778 }
779 }
780
781 Py_DECREF(seq);
782
783 Py_RETURN_NONE;
784}
785
786static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000787element_find(ElementObject* self, PyObject* args)
788{
789 int i;
790
791 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000792 PyObject* namespaces = Py_None;
793 if (!PyArg_ParseTuple(args, "O|O:find", &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000794 return NULL;
795
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000796 if (checkpath(tag) || namespaces != Py_None)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000797 return PyObject_CallMethod(
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000798 elementpath_obj, "find", "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000799 );
800
801 if (!self->extra)
802 Py_RETURN_NONE;
803
804 for (i = 0; i < self->extra->length; i++) {
805 PyObject* item = self->extra->children[i];
806 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +0000807 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000808 Py_INCREF(item);
809 return item;
810 }
811 }
812
813 Py_RETURN_NONE;
814}
815
816static PyObject*
817element_findtext(ElementObject* self, PyObject* args)
818{
819 int i;
820
821 PyObject* tag;
822 PyObject* default_value = Py_None;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000823 PyObject* namespaces = Py_None;
824 if (!PyArg_ParseTuple(args, "O|OO:findtext", &tag, &default_value, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000825 return NULL;
826
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000827 if (checkpath(tag) || namespaces != Py_None)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000828 return PyObject_CallMethod(
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000829 elementpath_obj, "findtext", "OOOO", self, tag, default_value, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000830 );
831
832 if (!self->extra) {
833 Py_INCREF(default_value);
834 return default_value;
835 }
836
837 for (i = 0; i < self->extra->length; i++) {
838 ElementObject* item = (ElementObject*) self->extra->children[i];
Mark Dickinson211c6252009-02-01 10:28:51 +0000839 if (Element_CheckExact(item) && (PyObject_RichCompareBool(item->tag, tag, Py_EQ) == 1)) {
840
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000841 PyObject* text = element_get_text(item);
842 if (text == Py_None)
Christian Heimes72b710a2008-05-26 13:28:38 +0000843 return PyBytes_FromString("");
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000844 Py_XINCREF(text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000845 return text;
846 }
847 }
848
849 Py_INCREF(default_value);
850 return default_value;
851}
852
853static PyObject*
854element_findall(ElementObject* self, PyObject* args)
855{
856 int i;
857 PyObject* out;
858
859 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000860 PyObject* namespaces = Py_None;
861 if (!PyArg_ParseTuple(args, "O|O:findall", &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000862 return NULL;
863
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000864 if (checkpath(tag) || namespaces != Py_None)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000865 return PyObject_CallMethod(
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000866 elementpath_obj, "findall", "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000867 );
868
869 out = PyList_New(0);
870 if (!out)
871 return NULL;
872
873 if (!self->extra)
874 return out;
875
876 for (i = 0; i < self->extra->length; i++) {
877 PyObject* item = self->extra->children[i];
878 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +0000879 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000880 if (PyList_Append(out, item) < 0) {
881 Py_DECREF(out);
882 return NULL;
883 }
884 }
885 }
886
887 return out;
888}
889
890static PyObject*
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000891element_iterfind(ElementObject* self, PyObject* args)
892{
893 PyObject* tag;
894 PyObject* namespaces = Py_None;
895 if (!PyArg_ParseTuple(args, "O|O:iterfind", &tag, &namespaces))
896 return NULL;
897
898 return PyObject_CallMethod(
899 elementpath_obj, "iterfind", "OOO", self, tag, namespaces
900 );
901}
902
903static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000904element_get(ElementObject* self, PyObject* args)
905{
906 PyObject* value;
907
908 PyObject* key;
909 PyObject* default_value = Py_None;
910 if (!PyArg_ParseTuple(args, "O|O:get", &key, &default_value))
911 return NULL;
912
913 if (!self->extra || self->extra->attrib == Py_None)
914 value = default_value;
915 else {
916 value = PyDict_GetItem(self->extra->attrib, key);
917 if (!value)
918 value = default_value;
919 }
920
921 Py_INCREF(value);
922 return value;
923}
924
925static PyObject*
926element_getchildren(ElementObject* self, PyObject* args)
927{
928 int i;
929 PyObject* list;
930
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000931 /* FIXME: report as deprecated? */
932
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000933 if (!PyArg_ParseTuple(args, ":getchildren"))
934 return NULL;
935
936 if (!self->extra)
937 return PyList_New(0);
938
939 list = PyList_New(self->extra->length);
940 if (!list)
941 return NULL;
942
943 for (i = 0; i < self->extra->length; i++) {
944 PyObject* item = self->extra->children[i];
945 Py_INCREF(item);
946 PyList_SET_ITEM(list, i, item);
947 }
948
949 return list;
950}
951
952static PyObject*
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000953element_iter(ElementObject* self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000954{
955 PyObject* result;
956
957 PyObject* tag = Py_None;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000958 if (!PyArg_ParseTuple(args, "|O:iter", &tag))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000959 return NULL;
960
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000961 if (!elementtree_iter_obj) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000962 PyErr_SetString(
963 PyExc_RuntimeError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000964 "iter helper not found"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000965 );
966 return NULL;
967 }
968
969 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000970 if (!args)
971 return NULL;
Neal Norwitz02876df2006-02-07 06:58:52 +0000972
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000973 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
974 Py_INCREF(tag); PyTuple_SET_ITEM(args, 1, (PyObject*) tag);
975
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000976 result = PyObject_CallObject(elementtree_iter_obj, args);
977
978 Py_DECREF(args);
979
980 return result;
981}
982
983
984static PyObject*
985element_itertext(ElementObject* self, PyObject* args)
986{
987 PyObject* result;
988
989 if (!PyArg_ParseTuple(args, ":itertext"))
990 return NULL;
991
992 if (!elementtree_itertext_obj) {
993 PyErr_SetString(
994 PyExc_RuntimeError,
995 "itertext helper not found"
996 );
997 return NULL;
998 }
999
1000 args = PyTuple_New(1);
1001 if (!args)
1002 return NULL;
1003
1004 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
1005
1006 result = PyObject_CallObject(elementtree_itertext_obj, args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001007
1008 Py_DECREF(args);
1009
1010 return result;
1011}
1012
1013static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001014element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001015{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001016 ElementObject* self = (ElementObject*) self_;
1017
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001018 if (!self->extra || index < 0 || index >= self->extra->length) {
1019 PyErr_SetString(
1020 PyExc_IndexError,
1021 "child index out of range"
1022 );
1023 return NULL;
1024 }
1025
1026 Py_INCREF(self->extra->children[index]);
1027 return self->extra->children[index];
1028}
1029
1030static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001031element_insert(ElementObject* self, PyObject* args)
1032{
1033 int i;
1034
1035 int index;
1036 PyObject* element;
1037 if (!PyArg_ParseTuple(args, "iO!:insert", &index,
1038 &Element_Type, &element))
1039 return NULL;
1040
1041 if (!self->extra)
1042 element_new_extra(self, NULL);
1043
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001044 if (index < 0) {
1045 index += self->extra->length;
1046 if (index < 0)
1047 index = 0;
1048 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001049 if (index > self->extra->length)
1050 index = self->extra->length;
1051
1052 if (element_resize(self, 1) < 0)
1053 return NULL;
1054
1055 for (i = self->extra->length; i > index; i--)
1056 self->extra->children[i] = self->extra->children[i-1];
1057
1058 Py_INCREF(element);
1059 self->extra->children[index] = element;
1060
1061 self->extra->length++;
1062
1063 Py_RETURN_NONE;
1064}
1065
1066static PyObject*
1067element_items(ElementObject* self, PyObject* args)
1068{
1069 if (!PyArg_ParseTuple(args, ":items"))
1070 return NULL;
1071
1072 if (!self->extra || self->extra->attrib == Py_None)
1073 return PyList_New(0);
1074
1075 return PyDict_Items(self->extra->attrib);
1076}
1077
1078static PyObject*
1079element_keys(ElementObject* self, PyObject* args)
1080{
1081 if (!PyArg_ParseTuple(args, ":keys"))
1082 return NULL;
1083
1084 if (!self->extra || self->extra->attrib == Py_None)
1085 return PyList_New(0);
1086
1087 return PyDict_Keys(self->extra->attrib);
1088}
1089
Martin v. Löwis18e16552006-02-15 17:27:45 +00001090static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001091element_length(ElementObject* self)
1092{
1093 if (!self->extra)
1094 return 0;
1095
1096 return self->extra->length;
1097}
1098
1099static PyObject*
1100element_makeelement(PyObject* self, PyObject* args, PyObject* kw)
1101{
1102 PyObject* elem;
1103
1104 PyObject* tag;
1105 PyObject* attrib;
1106 if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib))
1107 return NULL;
1108
1109 attrib = PyDict_Copy(attrib);
1110 if (!attrib)
1111 return NULL;
1112
1113 elem = element_new(tag, attrib);
1114
1115 Py_DECREF(attrib);
1116
1117 return elem;
1118}
1119
1120static PyObject*
1121element_reduce(ElementObject* self, PyObject* args)
1122{
1123 if (!PyArg_ParseTuple(args, ":__reduce__"))
1124 return NULL;
1125
1126 /* Hack alert: This method is used to work around a __copy__
1127 problem on certain 2.3 and 2.4 versions. To save time and
1128 simplify the code, we create the copy in here, and use a dummy
1129 copyelement helper to trick the copy module into doing the
1130 right thing. */
1131
1132 if (!elementtree_copyelement_obj) {
1133 PyErr_SetString(
1134 PyExc_RuntimeError,
1135 "copyelement helper not found"
1136 );
1137 return NULL;
1138 }
1139
1140 return Py_BuildValue(
1141 "O(N)", elementtree_copyelement_obj, element_copy(self, args)
1142 );
1143}
1144
1145static PyObject*
1146element_remove(ElementObject* self, PyObject* args)
1147{
1148 int i;
1149
1150 PyObject* element;
1151 if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element))
1152 return NULL;
1153
1154 if (!self->extra) {
1155 /* element has no children, so raise exception */
1156 PyErr_SetString(
1157 PyExc_ValueError,
1158 "list.remove(x): x not in list"
1159 );
1160 return NULL;
1161 }
1162
1163 for (i = 0; i < self->extra->length; i++) {
1164 if (self->extra->children[i] == element)
1165 break;
Mark Dickinson211c6252009-02-01 10:28:51 +00001166 if (PyObject_RichCompareBool(self->extra->children[i], element, Py_EQ) == 1)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001167 break;
1168 }
1169
1170 if (i == self->extra->length) {
1171 /* element is not in children, so raise exception */
1172 PyErr_SetString(
1173 PyExc_ValueError,
1174 "list.remove(x): x not in list"
1175 );
1176 return NULL;
1177 }
1178
1179 Py_DECREF(self->extra->children[i]);
1180
1181 self->extra->length--;
1182
1183 for (; i < self->extra->length; i++)
1184 self->extra->children[i] = self->extra->children[i+1];
1185
1186 Py_RETURN_NONE;
1187}
1188
1189static PyObject*
1190element_repr(ElementObject* self)
1191{
Walter Dörwald7569dfe2007-05-19 21:49:49 +00001192 return PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001193}
1194
1195static PyObject*
1196element_set(ElementObject* self, PyObject* args)
1197{
1198 PyObject* attrib;
1199
1200 PyObject* key;
1201 PyObject* value;
1202 if (!PyArg_ParseTuple(args, "OO:set", &key, &value))
1203 return NULL;
1204
1205 if (!self->extra)
1206 element_new_extra(self, NULL);
1207
1208 attrib = element_get_attrib(self);
1209 if (!attrib)
1210 return NULL;
1211
1212 if (PyDict_SetItem(attrib, key, value) < 0)
1213 return NULL;
1214
1215 Py_RETURN_NONE;
1216}
1217
1218static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001219element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001220{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001221 ElementObject* self = (ElementObject*) self_;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001222 int i;
1223 PyObject* old;
1224
1225 if (!self->extra || index < 0 || index >= self->extra->length) {
1226 PyErr_SetString(
1227 PyExc_IndexError,
1228 "child assignment index out of range");
1229 return -1;
1230 }
1231
1232 old = self->extra->children[index];
1233
1234 if (item) {
1235 Py_INCREF(item);
1236 self->extra->children[index] = item;
1237 } else {
1238 self->extra->length--;
1239 for (i = index; i < self->extra->length; i++)
1240 self->extra->children[i] = self->extra->children[i+1];
1241 }
1242
1243 Py_DECREF(old);
1244
1245 return 0;
1246}
1247
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001248static PyObject*
1249element_subscr(PyObject* self_, PyObject* item)
1250{
1251 ElementObject* self = (ElementObject*) self_;
1252
1253#if (PY_VERSION_HEX < 0x02050000)
1254 if (PyInt_Check(item) || PyLong_Check(item)) {
1255 long i = PyInt_AsLong(item);
1256#else
1257 if (PyIndex_Check(item)) {
1258 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1259#endif
1260
1261 if (i == -1 && PyErr_Occurred()) {
1262 return NULL;
1263 }
1264 if (i < 0 && self->extra)
1265 i += self->extra->length;
1266 return element_getitem(self_, i);
1267 }
1268 else if (PySlice_Check(item)) {
1269 Py_ssize_t start, stop, step, slicelen, cur, i;
1270 PyObject* list;
1271
1272 if (!self->extra)
1273 return PyList_New(0);
1274
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001275 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001276 self->extra->length,
1277 &start, &stop, &step, &slicelen) < 0) {
1278 return NULL;
1279 }
1280
1281 if (slicelen <= 0)
1282 return PyList_New(0);
1283 else {
1284 list = PyList_New(slicelen);
1285 if (!list)
1286 return NULL;
1287
1288 for (cur = start, i = 0; i < slicelen;
1289 cur += step, i++) {
1290 PyObject* item = self->extra->children[cur];
1291 Py_INCREF(item);
1292 PyList_SET_ITEM(list, i, item);
1293 }
1294
1295 return list;
1296 }
1297 }
1298 else {
1299 PyErr_SetString(PyExc_TypeError,
1300 "element indices must be integers");
1301 return NULL;
1302 }
1303}
1304
1305static int
1306element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1307{
1308 ElementObject* self = (ElementObject*) self_;
1309
1310#if (PY_VERSION_HEX < 0x02050000)
1311 if (PyInt_Check(item) || PyLong_Check(item)) {
1312 long i = PyInt_AsLong(item);
1313#else
1314 if (PyIndex_Check(item)) {
1315 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1316#endif
1317
1318 if (i == -1 && PyErr_Occurred()) {
1319 return -1;
1320 }
1321 if (i < 0 && self->extra)
1322 i += self->extra->length;
1323 return element_setitem(self_, i, value);
1324 }
1325 else if (PySlice_Check(item)) {
1326 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1327
1328 PyObject* recycle = NULL;
1329 PyObject* seq = NULL;
1330
1331 if (!self->extra)
1332 element_new_extra(self, NULL);
1333
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001334 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001335 self->extra->length,
1336 &start, &stop, &step, &slicelen) < 0) {
1337 return -1;
1338 }
1339
1340 if (value == NULL)
1341 newlen = 0;
1342 else {
1343 seq = PySequence_Fast(value, "");
1344 if (!seq) {
1345 PyErr_Format(
1346 PyExc_TypeError,
1347 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1348 );
1349 return -1;
1350 }
1351 newlen = PySequence_Size(seq);
1352 }
1353
1354 if (step != 1 && newlen != slicelen)
1355 {
1356 PyErr_Format(PyExc_ValueError,
1357#if (PY_VERSION_HEX < 0x02050000)
1358 "attempt to assign sequence of size %d "
1359 "to extended slice of size %d",
1360#else
1361 "attempt to assign sequence of size %zd "
1362 "to extended slice of size %zd",
1363#endif
1364 newlen, slicelen
1365 );
1366 return -1;
1367 }
1368
1369
1370 /* Resize before creating the recycle bin, to prevent refleaks. */
1371 if (newlen > slicelen) {
1372 if (element_resize(self, newlen - slicelen) < 0) {
1373 if (seq) {
1374 Py_DECREF(seq);
1375 }
1376 return -1;
1377 }
1378 }
1379
1380 if (slicelen > 0) {
1381 /* to avoid recursive calls to this method (via decref), move
1382 old items to the recycle bin here, and get rid of them when
1383 we're done modifying the element */
1384 recycle = PyList_New(slicelen);
1385 if (!recycle) {
1386 if (seq) {
1387 Py_DECREF(seq);
1388 }
1389 return -1;
1390 }
1391 for (cur = start, i = 0; i < slicelen;
1392 cur += step, i++)
1393 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1394 }
1395
1396 if (newlen < slicelen) {
1397 /* delete slice */
1398 for (i = stop; i < self->extra->length; i++)
1399 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1400 } else if (newlen > slicelen) {
1401 /* insert slice */
1402 for (i = self->extra->length-1; i >= stop; i--)
1403 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1404 }
1405
1406 /* replace the slice */
1407 for (cur = start, i = 0; i < newlen;
1408 cur += step, i++) {
1409 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1410 Py_INCREF(element);
1411 self->extra->children[cur] = element;
1412 }
1413
1414 self->extra->length += newlen - slicelen;
1415
1416 if (seq) {
1417 Py_DECREF(seq);
1418 }
1419
1420 /* discard the recycle bin, and everything in it */
1421 Py_XDECREF(recycle);
1422
1423 return 0;
1424 }
1425 else {
1426 PyErr_SetString(PyExc_TypeError,
1427 "element indices must be integers");
1428 return -1;
1429 }
1430}
1431
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001432static PyMethodDef element_methods[] = {
1433
1434 {"clear", (PyCFunction) element_clear, METH_VARARGS},
1435
1436 {"get", (PyCFunction) element_get, METH_VARARGS},
1437 {"set", (PyCFunction) element_set, METH_VARARGS},
1438
1439 {"find", (PyCFunction) element_find, METH_VARARGS},
1440 {"findtext", (PyCFunction) element_findtext, METH_VARARGS},
1441 {"findall", (PyCFunction) element_findall, METH_VARARGS},
1442
1443 {"append", (PyCFunction) element_append, METH_VARARGS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001444 {"extend", (PyCFunction) element_extend, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001445 {"insert", (PyCFunction) element_insert, METH_VARARGS},
1446 {"remove", (PyCFunction) element_remove, METH_VARARGS},
1447
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001448 {"iter", (PyCFunction) element_iter, METH_VARARGS},
1449 {"itertext", (PyCFunction) element_itertext, METH_VARARGS},
1450 {"iterfind", (PyCFunction) element_iterfind, METH_VARARGS},
1451
1452 {"getiterator", (PyCFunction) element_iter, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001453 {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS},
1454
1455 {"items", (PyCFunction) element_items, METH_VARARGS},
1456 {"keys", (PyCFunction) element_keys, METH_VARARGS},
1457
1458 {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS},
1459
1460 {"__copy__", (PyCFunction) element_copy, METH_VARARGS},
1461 {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS},
1462
1463 /* Some 2.3 and 2.4 versions do not handle the __copy__ method on
1464 C objects correctly, so we have to fake it using a __reduce__-
1465 based hack (see the element_reduce implementation above for
1466 details). */
1467
1468 /* The behaviour has been changed in 2.3.5 and 2.4.1, so we're
1469 using a runtime test to figure out if we need to fake things
1470 or now (see the init code below). The following entry is
1471 enabled only if the hack is needed. */
1472
1473 {"!__reduce__", (PyCFunction) element_reduce, METH_VARARGS},
1474
1475 {NULL, NULL}
1476};
1477
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001478static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001479element_getattro(ElementObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001480{
1481 PyObject* res;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001482 char *name = "";
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001483
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001484 if (PyUnicode_Check(nameobj))
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001485 name = _PyUnicode_AsString(nameobj);
Alexander Belopolskye239d232010-12-08 23:31:48 +00001486
1487 if (name == NULL)
1488 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001489
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001490 /* handle common attributes first */
1491 if (strcmp(name, "tag") == 0) {
1492 res = self->tag;
1493 Py_INCREF(res);
1494 return res;
1495 } else if (strcmp(name, "text") == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001496 res = element_get_text(self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001497 Py_INCREF(res);
1498 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001499 }
1500
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001501 /* methods */
1502 res = PyObject_GenericGetAttr((PyObject*) self, nameobj);
1503 if (res)
1504 return res;
1505
1506 /* less common attributes */
1507 if (strcmp(name, "tail") == 0) {
1508 PyErr_Clear();
1509 res = element_get_tail(self);
1510 } else if (strcmp(name, "attrib") == 0) {
1511 PyErr_Clear();
1512 if (!self->extra)
1513 element_new_extra(self, NULL);
1514 res = element_get_attrib(self);
1515 }
1516
1517 if (!res)
1518 return NULL;
1519
1520 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001521 return res;
1522}
1523
1524static int
1525element_setattr(ElementObject* self, const char* name, PyObject* value)
1526{
1527 if (value == NULL) {
1528 PyErr_SetString(
1529 PyExc_AttributeError,
1530 "can't delete element attributes"
1531 );
1532 return -1;
1533 }
1534
1535 if (strcmp(name, "tag") == 0) {
1536 Py_DECREF(self->tag);
1537 self->tag = value;
1538 Py_INCREF(self->tag);
1539 } else if (strcmp(name, "text") == 0) {
1540 Py_DECREF(JOIN_OBJ(self->text));
1541 self->text = value;
1542 Py_INCREF(self->text);
1543 } else if (strcmp(name, "tail") == 0) {
1544 Py_DECREF(JOIN_OBJ(self->tail));
1545 self->tail = value;
1546 Py_INCREF(self->tail);
1547 } else if (strcmp(name, "attrib") == 0) {
1548 if (!self->extra)
1549 element_new_extra(self, NULL);
1550 Py_DECREF(self->extra->attrib);
1551 self->extra->attrib = value;
1552 Py_INCREF(self->extra->attrib);
1553 } else {
1554 PyErr_SetString(PyExc_AttributeError, name);
1555 return -1;
1556 }
1557
1558 return 0;
1559}
1560
1561static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001562 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001563 0, /* sq_concat */
1564 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001565 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001566 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001567 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001568 0,
1569};
1570
1571static PyMappingMethods element_as_mapping = {
1572 (lenfunc) element_length,
1573 (binaryfunc) element_subscr,
1574 (objobjargproc) element_ass_subscr,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001575};
1576
Neal Norwitz227b5332006-03-22 09:28:35 +00001577static PyTypeObject Element_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001578 PyVarObject_HEAD_INIT(NULL, 0)
1579 "Element", sizeof(ElementObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001580 /* methods */
1581 (destructor)element_dealloc, /* tp_dealloc */
1582 0, /* tp_print */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001583 0, /* tp_getattr */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001584 (setattrfunc)element_setattr, /* tp_setattr */
Mark Dickinsone94c6792009-02-02 20:36:42 +00001585 0, /* tp_reserved */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001586 (reprfunc)element_repr, /* tp_repr */
1587 0, /* tp_as_number */
1588 &element_as_sequence, /* tp_as_sequence */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001589 &element_as_mapping, /* tp_as_mapping */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001590 0, /* tp_hash */
1591 0, /* tp_call */
1592 0, /* tp_str */
1593 (getattrofunc)element_getattro, /* tp_getattro */
1594 0, /* tp_setattro */
1595 0, /* tp_as_buffer */
1596 Py_TPFLAGS_DEFAULT, /* tp_flags */
1597 0, /* tp_doc */
1598 0, /* tp_traverse */
1599 0, /* tp_clear */
1600 0, /* tp_richcompare */
1601 0, /* tp_weaklistoffset */
1602 0, /* tp_iter */
1603 0, /* tp_iternext */
1604 element_methods, /* tp_methods */
1605 0, /* tp_members */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001606};
1607
1608/* ==================================================================== */
1609/* the tree builder type */
1610
1611typedef struct {
1612 PyObject_HEAD
1613
1614 PyObject* root; /* root node (first created node) */
1615
1616 ElementObject* this; /* current node */
1617 ElementObject* last; /* most recently created node */
1618
1619 PyObject* data; /* data collector (string or list), or NULL */
1620
1621 PyObject* stack; /* element stack */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001622 Py_ssize_t index; /* current stack size (0=empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001623
1624 /* element tracing */
1625 PyObject* events; /* list of events, or NULL if not collecting */
1626 PyObject* start_event_obj; /* event objects (NULL to ignore) */
1627 PyObject* end_event_obj;
1628 PyObject* start_ns_event_obj;
1629 PyObject* end_ns_event_obj;
1630
1631} TreeBuilderObject;
1632
Neal Norwitz227b5332006-03-22 09:28:35 +00001633static PyTypeObject TreeBuilder_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001634
Christian Heimes90aa7642007-12-19 02:45:37 +00001635#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001636
1637/* -------------------------------------------------------------------- */
1638/* constructor and destructor */
1639
1640LOCAL(PyObject*)
1641treebuilder_new(void)
1642{
1643 TreeBuilderObject* self;
1644
1645 self = PyObject_New(TreeBuilderObject, &TreeBuilder_Type);
1646 if (self == NULL)
1647 return NULL;
1648
1649 self->root = NULL;
1650
1651 Py_INCREF(Py_None);
1652 self->this = (ElementObject*) Py_None;
1653
1654 Py_INCREF(Py_None);
1655 self->last = (ElementObject*) Py_None;
1656
1657 self->data = NULL;
1658
1659 self->stack = PyList_New(20);
1660 self->index = 0;
1661
1662 self->events = NULL;
1663 self->start_event_obj = self->end_event_obj = NULL;
1664 self->start_ns_event_obj = self->end_ns_event_obj = NULL;
1665
1666 ALLOC(sizeof(TreeBuilderObject), "create treebuilder");
1667
1668 return (PyObject*) self;
1669}
1670
1671static PyObject*
Thomas Wouters73e5a5b2006-06-08 15:35:45 +00001672treebuilder(PyObject* self_, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001673{
1674 if (!PyArg_ParseTuple(args, ":TreeBuilder"))
1675 return NULL;
1676
1677 return treebuilder_new();
1678}
1679
1680static void
1681treebuilder_dealloc(TreeBuilderObject* self)
1682{
1683 Py_XDECREF(self->end_ns_event_obj);
1684 Py_XDECREF(self->start_ns_event_obj);
1685 Py_XDECREF(self->end_event_obj);
1686 Py_XDECREF(self->start_event_obj);
1687 Py_XDECREF(self->events);
1688 Py_DECREF(self->stack);
1689 Py_XDECREF(self->data);
1690 Py_DECREF(self->last);
1691 Py_DECREF(self->this);
1692 Py_XDECREF(self->root);
1693
1694 RELEASE(sizeof(TreeBuilderObject), "destroy treebuilder");
1695
1696 PyObject_Del(self);
1697}
1698
1699/* -------------------------------------------------------------------- */
1700/* handlers */
1701
1702LOCAL(PyObject*)
1703treebuilder_handle_xml(TreeBuilderObject* self, PyObject* encoding,
1704 PyObject* standalone)
1705{
1706 Py_RETURN_NONE;
1707}
1708
1709LOCAL(PyObject*)
1710treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
1711 PyObject* attrib)
1712{
1713 PyObject* node;
1714 PyObject* this;
1715
1716 if (self->data) {
1717 if (self->this == self->last) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001718 Py_DECREF(JOIN_OBJ(self->last->text));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001719 self->last->text = JOIN_SET(
1720 self->data, PyList_CheckExact(self->data)
1721 );
1722 } else {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001723 Py_DECREF(JOIN_OBJ(self->last->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001724 self->last->tail = JOIN_SET(
1725 self->data, PyList_CheckExact(self->data)
1726 );
1727 }
1728 self->data = NULL;
1729 }
1730
1731 node = element_new(tag, attrib);
1732 if (!node)
1733 return NULL;
1734
1735 this = (PyObject*) self->this;
1736
1737 if (this != Py_None) {
1738 if (element_add_subelement((ElementObject*) this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001739 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001740 } else {
1741 if (self->root) {
1742 PyErr_SetString(
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001743 elementtree_parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001744 "multiple elements on top level"
1745 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001746 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001747 }
1748 Py_INCREF(node);
1749 self->root = node;
1750 }
1751
1752 if (self->index < PyList_GET_SIZE(self->stack)) {
1753 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001754 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001755 Py_INCREF(this);
1756 } else {
1757 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001758 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001759 }
1760 self->index++;
1761
1762 Py_DECREF(this);
1763 Py_INCREF(node);
1764 self->this = (ElementObject*) node;
1765
1766 Py_DECREF(self->last);
1767 Py_INCREF(node);
1768 self->last = (ElementObject*) node;
1769
1770 if (self->start_event_obj) {
1771 PyObject* res;
1772 PyObject* action = self->start_event_obj;
1773 res = PyTuple_New(2);
1774 if (res) {
1775 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
1776 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
1777 PyList_Append(self->events, res);
1778 Py_DECREF(res);
1779 } else
1780 PyErr_Clear(); /* FIXME: propagate error */
1781 }
1782
1783 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001784
1785 error:
1786 Py_DECREF(node);
1787 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001788}
1789
1790LOCAL(PyObject*)
1791treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
1792{
1793 if (!self->data) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001794 if (self->last == (ElementObject*) Py_None) {
1795 /* ignore calls to data before the first call to start */
1796 Py_RETURN_NONE;
1797 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001798 /* store the first item as is */
1799 Py_INCREF(data); self->data = data;
1800 } else {
1801 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00001802 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
1803 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001804 /* expat often generates single character data sections; handle
1805 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00001806 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
1807 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001808 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00001809 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001810 } else if (PyList_CheckExact(self->data)) {
1811 if (PyList_Append(self->data, data) < 0)
1812 return NULL;
1813 } else {
1814 PyObject* list = PyList_New(2);
1815 if (!list)
1816 return NULL;
1817 PyList_SET_ITEM(list, 0, self->data);
1818 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
1819 self->data = list;
1820 }
1821 }
1822
1823 Py_RETURN_NONE;
1824}
1825
1826LOCAL(PyObject*)
1827treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
1828{
1829 PyObject* item;
1830
1831 if (self->data) {
1832 if (self->this == self->last) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001833 Py_DECREF(JOIN_OBJ(self->last->text));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001834 self->last->text = JOIN_SET(
1835 self->data, PyList_CheckExact(self->data)
1836 );
1837 } else {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001838 Py_DECREF(JOIN_OBJ(self->last->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001839 self->last->tail = JOIN_SET(
1840 self->data, PyList_CheckExact(self->data)
1841 );
1842 }
1843 self->data = NULL;
1844 }
1845
1846 if (self->index == 0) {
1847 PyErr_SetString(
1848 PyExc_IndexError,
1849 "pop from empty stack"
1850 );
1851 return NULL;
1852 }
1853
1854 self->index--;
1855
1856 item = PyList_GET_ITEM(self->stack, self->index);
1857 Py_INCREF(item);
1858
1859 Py_DECREF(self->last);
1860
1861 self->last = (ElementObject*) self->this;
1862 self->this = (ElementObject*) item;
1863
1864 if (self->end_event_obj) {
1865 PyObject* res;
1866 PyObject* action = self->end_event_obj;
1867 PyObject* node = (PyObject*) self->last;
1868 res = PyTuple_New(2);
1869 if (res) {
1870 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
1871 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
1872 PyList_Append(self->events, res);
1873 Py_DECREF(res);
1874 } else
1875 PyErr_Clear(); /* FIXME: propagate error */
1876 }
1877
1878 Py_INCREF(self->last);
1879 return (PyObject*) self->last;
1880}
1881
1882LOCAL(void)
1883treebuilder_handle_namespace(TreeBuilderObject* self, int start,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001884 PyObject *prefix, PyObject *uri)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001885{
1886 PyObject* res;
1887 PyObject* action;
1888 PyObject* parcel;
1889
1890 if (!self->events)
1891 return;
1892
1893 if (start) {
1894 if (!self->start_ns_event_obj)
1895 return;
1896 action = self->start_ns_event_obj;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001897 parcel = Py_BuildValue("OO", prefix, uri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001898 if (!parcel)
1899 return;
1900 Py_INCREF(action);
1901 } else {
1902 if (!self->end_ns_event_obj)
1903 return;
1904 action = self->end_ns_event_obj;
1905 Py_INCREF(action);
1906 parcel = Py_None;
1907 Py_INCREF(parcel);
1908 }
1909
1910 res = PyTuple_New(2);
1911
1912 if (res) {
1913 PyTuple_SET_ITEM(res, 0, action);
1914 PyTuple_SET_ITEM(res, 1, parcel);
1915 PyList_Append(self->events, res);
1916 Py_DECREF(res);
1917 } else
1918 PyErr_Clear(); /* FIXME: propagate error */
1919}
1920
1921/* -------------------------------------------------------------------- */
1922/* methods (in alphabetical order) */
1923
1924static PyObject*
1925treebuilder_data(TreeBuilderObject* self, PyObject* args)
1926{
1927 PyObject* data;
1928 if (!PyArg_ParseTuple(args, "O:data", &data))
1929 return NULL;
1930
1931 return treebuilder_handle_data(self, data);
1932}
1933
1934static PyObject*
1935treebuilder_end(TreeBuilderObject* self, PyObject* args)
1936{
1937 PyObject* tag;
1938 if (!PyArg_ParseTuple(args, "O:end", &tag))
1939 return NULL;
1940
1941 return treebuilder_handle_end(self, tag);
1942}
1943
1944LOCAL(PyObject*)
1945treebuilder_done(TreeBuilderObject* self)
1946{
1947 PyObject* res;
1948
1949 /* FIXME: check stack size? */
1950
1951 if (self->root)
1952 res = self->root;
1953 else
1954 res = Py_None;
1955
1956 Py_INCREF(res);
1957 return res;
1958}
1959
1960static PyObject*
1961treebuilder_close(TreeBuilderObject* self, PyObject* args)
1962{
1963 if (!PyArg_ParseTuple(args, ":close"))
1964 return NULL;
1965
1966 return treebuilder_done(self);
1967}
1968
1969static PyObject*
1970treebuilder_start(TreeBuilderObject* self, PyObject* args)
1971{
1972 PyObject* tag;
1973 PyObject* attrib = Py_None;
1974 if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib))
1975 return NULL;
1976
1977 return treebuilder_handle_start(self, tag, attrib);
1978}
1979
1980static PyObject*
1981treebuilder_xml(TreeBuilderObject* self, PyObject* args)
1982{
1983 PyObject* encoding;
1984 PyObject* standalone;
1985 if (!PyArg_ParseTuple(args, "OO:xml", &encoding, &standalone))
1986 return NULL;
1987
1988 return treebuilder_handle_xml(self, encoding, standalone);
1989}
1990
1991static PyMethodDef treebuilder_methods[] = {
1992 {"data", (PyCFunction) treebuilder_data, METH_VARARGS},
1993 {"start", (PyCFunction) treebuilder_start, METH_VARARGS},
1994 {"end", (PyCFunction) treebuilder_end, METH_VARARGS},
1995 {"xml", (PyCFunction) treebuilder_xml, METH_VARARGS},
1996 {"close", (PyCFunction) treebuilder_close, METH_VARARGS},
1997 {NULL, NULL}
1998};
1999
Neal Norwitz227b5332006-03-22 09:28:35 +00002000static PyTypeObject TreeBuilder_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002001 PyVarObject_HEAD_INIT(NULL, 0)
2002 "TreeBuilder", sizeof(TreeBuilderObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002003 /* methods */
2004 (destructor)treebuilder_dealloc, /* tp_dealloc */
2005 0, /* tp_print */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002006 0, /* tp_getattr */
2007 0, /* tp_setattr */
Mark Dickinsone94c6792009-02-02 20:36:42 +00002008 0, /* tp_reserved */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002009 0, /* tp_repr */
2010 0, /* tp_as_number */
2011 0, /* tp_as_sequence */
2012 0, /* tp_as_mapping */
2013 0, /* tp_hash */
2014 0, /* tp_call */
2015 0, /* tp_str */
2016 0, /* tp_getattro */
2017 0, /* tp_setattro */
2018 0, /* tp_as_buffer */
2019 Py_TPFLAGS_DEFAULT, /* tp_flags */
2020 0, /* tp_doc */
2021 0, /* tp_traverse */
2022 0, /* tp_clear */
2023 0, /* tp_richcompare */
2024 0, /* tp_weaklistoffset */
2025 0, /* tp_iter */
2026 0, /* tp_iternext */
2027 treebuilder_methods, /* tp_methods */
2028 0, /* tp_members */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002029};
2030
2031/* ==================================================================== */
2032/* the expat interface */
2033
2034#if defined(USE_EXPAT)
2035
2036#include "expat.h"
2037
2038#if defined(USE_PYEXPAT_CAPI)
2039#include "pyexpat.h"
2040static struct PyExpat_CAPI* expat_capi;
2041#define EXPAT(func) (expat_capi->func)
2042#else
2043#define EXPAT(func) (XML_##func)
2044#endif
2045
2046typedef struct {
2047 PyObject_HEAD
2048
2049 XML_Parser parser;
2050
2051 PyObject* target;
2052 PyObject* entity;
2053
2054 PyObject* names;
2055
2056 PyObject* handle_xml;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002057
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002058 PyObject* handle_start;
2059 PyObject* handle_data;
2060 PyObject* handle_end;
2061
2062 PyObject* handle_comment;
2063 PyObject* handle_pi;
2064
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002065 PyObject* handle_close;
2066
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002067} XMLParserObject;
2068
Neal Norwitz227b5332006-03-22 09:28:35 +00002069static PyTypeObject XMLParser_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002070
2071/* helpers */
2072
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002073LOCAL(PyObject*)
2074makeuniversal(XMLParserObject* self, const char* string)
2075{
2076 /* convert a UTF-8 tag/attribute name from the expat parser
2077 to a universal name string */
2078
2079 int size = strlen(string);
2080 PyObject* key;
2081 PyObject* value;
2082
2083 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00002084 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002085 if (!key)
2086 return NULL;
2087
2088 value = PyDict_GetItem(self->names, key);
2089
2090 if (value) {
2091 Py_INCREF(value);
2092 } else {
2093 /* new name. convert to universal name, and decode as
2094 necessary */
2095
2096 PyObject* tag;
2097 char* p;
2098 int i;
2099
2100 /* look for namespace separator */
2101 for (i = 0; i < size; i++)
2102 if (string[i] == '}')
2103 break;
2104 if (i != size) {
2105 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002106 tag = PyBytes_FromStringAndSize(NULL, size+1);
2107 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002108 p[0] = '{';
2109 memcpy(p+1, string, size);
2110 size++;
2111 } else {
2112 /* plain name; use key as tag */
2113 Py_INCREF(key);
2114 tag = key;
2115 }
2116
2117 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002118 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00002119 value = PyUnicode_DecodeUTF8(p, size, "strict");
2120 Py_DECREF(tag);
2121 if (!value) {
2122 Py_DECREF(key);
2123 return NULL;
2124 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002125
2126 /* add to names dictionary */
2127 if (PyDict_SetItem(self->names, key, value) < 0) {
2128 Py_DECREF(key);
2129 Py_DECREF(value);
2130 return NULL;
2131 }
2132 }
2133
2134 Py_DECREF(key);
2135 return value;
2136}
2137
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002138static void
2139expat_set_error(const char* message, int line, int column)
2140{
2141 PyObject *error;
2142 PyObject *position;
2143 char buffer[256];
2144
Alexander Belopolskye239d232010-12-08 23:31:48 +00002145 sprintf(buffer, "%.100s: line %d, column %d", message, line, column);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002146
2147 error = PyObject_CallFunction(elementtree_parseerror_obj, "s", buffer);
2148 if (!error)
2149 return;
2150
2151 /* add position attribute */
2152 position = Py_BuildValue("(ii)", line, column);
2153 if (!position) {
2154 Py_DECREF(error);
2155 return;
2156 }
2157 if (PyObject_SetAttrString(error, "position", position) == -1) {
2158 Py_DECREF(error);
2159 Py_DECREF(position);
2160 return;
2161 }
2162 Py_DECREF(position);
2163
2164 PyErr_SetObject(elementtree_parseerror_obj, error);
2165 Py_DECREF(error);
2166}
2167
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002168/* -------------------------------------------------------------------- */
2169/* handlers */
2170
2171static void
2172expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2173 int data_len)
2174{
2175 PyObject* key;
2176 PyObject* value;
2177 PyObject* res;
2178
2179 if (data_len < 2 || data_in[0] != '&')
2180 return;
2181
Neal Norwitz0269b912007-08-08 06:56:02 +00002182 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002183 if (!key)
2184 return;
2185
2186 value = PyDict_GetItem(self->entity, key);
2187
2188 if (value) {
2189 if (TreeBuilder_CheckExact(self->target))
2190 res = treebuilder_handle_data(
2191 (TreeBuilderObject*) self->target, value
2192 );
2193 else if (self->handle_data)
2194 res = PyObject_CallFunction(self->handle_data, "O", value);
2195 else
2196 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002197 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002198 } else if (!PyErr_Occurred()) {
2199 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00002200 char message[128] = "undefined entity ";
2201 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002202 expat_set_error(
2203 message,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002204 EXPAT(GetErrorLineNumber)(self->parser),
2205 EXPAT(GetErrorColumnNumber)(self->parser)
2206 );
2207 }
2208
2209 Py_DECREF(key);
2210}
2211
2212static void
2213expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2214 const XML_Char **attrib_in)
2215{
2216 PyObject* res;
2217 PyObject* tag;
2218 PyObject* attrib;
2219 int ok;
2220
2221 /* tag name */
2222 tag = makeuniversal(self, tag_in);
2223 if (!tag)
2224 return; /* parser will look for errors */
2225
2226 /* attributes */
2227 if (attrib_in[0]) {
2228 attrib = PyDict_New();
2229 if (!attrib)
2230 return;
2231 while (attrib_in[0] && attrib_in[1]) {
2232 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00002233 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002234 if (!key || !value) {
2235 Py_XDECREF(value);
2236 Py_XDECREF(key);
2237 Py_DECREF(attrib);
2238 return;
2239 }
2240 ok = PyDict_SetItem(attrib, key, value);
2241 Py_DECREF(value);
2242 Py_DECREF(key);
2243 if (ok < 0) {
2244 Py_DECREF(attrib);
2245 return;
2246 }
2247 attrib_in += 2;
2248 }
2249 } else {
2250 Py_INCREF(Py_None);
2251 attrib = Py_None;
2252 }
2253
2254 if (TreeBuilder_CheckExact(self->target))
2255 /* shortcut */
2256 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2257 tag, attrib);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002258 else if (self->handle_start) {
2259 if (attrib == Py_None) {
2260 Py_DECREF(attrib);
2261 attrib = PyDict_New();
2262 if (!attrib)
2263 return;
2264 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002265 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002266 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002267 res = NULL;
2268
2269 Py_DECREF(tag);
2270 Py_DECREF(attrib);
2271
2272 Py_XDECREF(res);
2273}
2274
2275static void
2276expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2277 int data_len)
2278{
2279 PyObject* data;
2280 PyObject* res;
2281
Neal Norwitz0269b912007-08-08 06:56:02 +00002282 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002283 if (!data)
2284 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002285
2286 if (TreeBuilder_CheckExact(self->target))
2287 /* shortcut */
2288 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
2289 else if (self->handle_data)
2290 res = PyObject_CallFunction(self->handle_data, "O", data);
2291 else
2292 res = NULL;
2293
2294 Py_DECREF(data);
2295
2296 Py_XDECREF(res);
2297}
2298
2299static void
2300expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
2301{
2302 PyObject* tag;
2303 PyObject* res = NULL;
2304
2305 if (TreeBuilder_CheckExact(self->target))
2306 /* shortcut */
2307 /* the standard tree builder doesn't look at the end tag */
2308 res = treebuilder_handle_end(
2309 (TreeBuilderObject*) self->target, Py_None
2310 );
2311 else if (self->handle_end) {
2312 tag = makeuniversal(self, tag_in);
2313 if (tag) {
2314 res = PyObject_CallFunction(self->handle_end, "O", tag);
2315 Py_DECREF(tag);
2316 }
2317 }
2318
2319 Py_XDECREF(res);
2320}
2321
2322static void
2323expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
2324 const XML_Char *uri)
2325{
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002326 PyObject* sprefix = NULL;
2327 PyObject* suri = NULL;
2328
2329 suri = PyUnicode_DecodeUTF8(uri, strlen(uri), "strict");
2330 if (!suri)
2331 return;
2332
2333 if (prefix)
2334 sprefix = PyUnicode_DecodeUTF8(prefix, strlen(prefix), "strict");
2335 else
2336 sprefix = PyUnicode_FromString("");
2337 if (!sprefix) {
2338 Py_DECREF(suri);
2339 return;
2340 }
2341
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002342 treebuilder_handle_namespace(
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002343 (TreeBuilderObject*) self->target, 1, sprefix, suri
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002344 );
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002345
2346 Py_DECREF(sprefix);
2347 Py_DECREF(suri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002348}
2349
2350static void
2351expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
2352{
2353 treebuilder_handle_namespace(
2354 (TreeBuilderObject*) self->target, 0, NULL, NULL
2355 );
2356}
2357
2358static void
2359expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
2360{
2361 PyObject* comment;
2362 PyObject* res;
2363
2364 if (self->handle_comment) {
Neal Norwitz0269b912007-08-08 06:56:02 +00002365 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002366 if (comment) {
2367 res = PyObject_CallFunction(self->handle_comment, "O", comment);
2368 Py_XDECREF(res);
2369 Py_DECREF(comment);
2370 }
2371 }
2372}
2373
2374static void
2375expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
2376 const XML_Char* data_in)
2377{
2378 PyObject* target;
2379 PyObject* data;
2380 PyObject* res;
2381
2382 if (self->handle_pi) {
Neal Norwitz0269b912007-08-08 06:56:02 +00002383 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
2384 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002385 if (target && data) {
2386 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
2387 Py_XDECREF(res);
2388 Py_DECREF(data);
2389 Py_DECREF(target);
2390 } else {
2391 Py_XDECREF(data);
2392 Py_XDECREF(target);
2393 }
2394 }
2395}
2396
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002397static int
2398expat_unknown_encoding_handler(XMLParserObject *self, const XML_Char *name,
2399 XML_Encoding *info)
2400{
2401 PyObject* u;
2402 Py_UNICODE* p;
2403 unsigned char s[256];
2404 int i;
2405
2406 memset(info, 0, sizeof(XML_Encoding));
2407
2408 for (i = 0; i < 256; i++)
2409 s[i] = i;
2410
Fredrik Lundhc3389992005-12-25 11:40:19 +00002411 u = PyUnicode_Decode((char*) s, 256, name, "replace");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002412 if (!u)
2413 return XML_STATUS_ERROR;
2414
2415 if (PyUnicode_GET_SIZE(u) != 256) {
2416 Py_DECREF(u);
2417 return XML_STATUS_ERROR;
2418 }
2419
2420 p = PyUnicode_AS_UNICODE(u);
2421
2422 for (i = 0; i < 256; i++) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002423 if (p[i] != Py_UNICODE_REPLACEMENT_CHARACTER)
2424 info->map[i] = p[i];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002425 else
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002426 info->map[i] = -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002427 }
2428
2429 Py_DECREF(u);
2430
2431 return XML_STATUS_OK;
2432}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002433
2434/* -------------------------------------------------------------------- */
2435/* constructor and destructor */
2436
2437static PyObject*
Thomas Wouters73e5a5b2006-06-08 15:35:45 +00002438xmlparser(PyObject* self_, PyObject* args, PyObject* kw)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002439{
2440 XMLParserObject* self;
2441 /* FIXME: does this need to be static? */
2442 static XML_Memory_Handling_Suite memory_handler;
2443
2444 PyObject* target = NULL;
2445 char* encoding = NULL;
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +00002446 static char* kwlist[] = { "target", "encoding", NULL };
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002447 if (!PyArg_ParseTupleAndKeywords(args, kw, "|Oz:XMLParser", kwlist,
2448 &target, &encoding))
2449 return NULL;
2450
2451#if defined(USE_PYEXPAT_CAPI)
2452 if (!expat_capi) {
2453 PyErr_SetString(
2454 PyExc_RuntimeError, "cannot load dispatch table from pyexpat"
2455 );
2456 return NULL;
2457 }
2458#endif
2459
2460 self = PyObject_New(XMLParserObject, &XMLParser_Type);
2461 if (self == NULL)
2462 return NULL;
2463
2464 self->entity = PyDict_New();
2465 if (!self->entity) {
2466 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002467 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002468 }
2469
2470 self->names = PyDict_New();
2471 if (!self->names) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002472 PyObject_Del(self->entity);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002473 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002474 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002475 }
2476
2477 memory_handler.malloc_fcn = PyObject_Malloc;
2478 memory_handler.realloc_fcn = PyObject_Realloc;
2479 memory_handler.free_fcn = PyObject_Free;
2480
2481 self->parser = EXPAT(ParserCreate_MM)(encoding, &memory_handler, "}");
2482 if (!self->parser) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002483 PyObject_Del(self->names);
2484 PyObject_Del(self->entity);
2485 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002486 PyErr_NoMemory();
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002487 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002488 }
2489
2490 /* setup target handlers */
2491 if (!target) {
2492 target = treebuilder_new();
2493 if (!target) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002494 EXPAT(ParserFree)(self->parser);
2495 PyObject_Del(self->names);
2496 PyObject_Del(self->entity);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002497 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002498 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002499 }
2500 } else
2501 Py_INCREF(target);
2502 self->target = target;
2503
2504 self->handle_xml = PyObject_GetAttrString(target, "xml");
2505 self->handle_start = PyObject_GetAttrString(target, "start");
2506 self->handle_data = PyObject_GetAttrString(target, "data");
2507 self->handle_end = PyObject_GetAttrString(target, "end");
2508 self->handle_comment = PyObject_GetAttrString(target, "comment");
2509 self->handle_pi = PyObject_GetAttrString(target, "pi");
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002510 self->handle_close = PyObject_GetAttrString(target, "close");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002511
2512 PyErr_Clear();
2513
2514 /* configure parser */
2515 EXPAT(SetUserData)(self->parser, self);
2516 EXPAT(SetElementHandler)(
2517 self->parser,
2518 (XML_StartElementHandler) expat_start_handler,
2519 (XML_EndElementHandler) expat_end_handler
2520 );
2521 EXPAT(SetDefaultHandlerExpand)(
2522 self->parser,
2523 (XML_DefaultHandler) expat_default_handler
2524 );
2525 EXPAT(SetCharacterDataHandler)(
2526 self->parser,
2527 (XML_CharacterDataHandler) expat_data_handler
2528 );
2529 if (self->handle_comment)
2530 EXPAT(SetCommentHandler)(
2531 self->parser,
2532 (XML_CommentHandler) expat_comment_handler
2533 );
2534 if (self->handle_pi)
2535 EXPAT(SetProcessingInstructionHandler)(
2536 self->parser,
2537 (XML_ProcessingInstructionHandler) expat_pi_handler
2538 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002539 EXPAT(SetUnknownEncodingHandler)(
2540 self->parser,
2541 (XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL
2542 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002543
2544 ALLOC(sizeof(XMLParserObject), "create expatparser");
2545
2546 return (PyObject*) self;
2547}
2548
2549static void
2550xmlparser_dealloc(XMLParserObject* self)
2551{
2552 EXPAT(ParserFree)(self->parser);
2553
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002554 Py_XDECREF(self->handle_close);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002555 Py_XDECREF(self->handle_pi);
2556 Py_XDECREF(self->handle_comment);
2557 Py_XDECREF(self->handle_end);
2558 Py_XDECREF(self->handle_data);
2559 Py_XDECREF(self->handle_start);
2560 Py_XDECREF(self->handle_xml);
2561
2562 Py_DECREF(self->target);
2563 Py_DECREF(self->entity);
2564 Py_DECREF(self->names);
2565
2566 RELEASE(sizeof(XMLParserObject), "destroy expatparser");
2567
2568 PyObject_Del(self);
2569}
2570
2571/* -------------------------------------------------------------------- */
2572/* methods (in alphabetical order) */
2573
2574LOCAL(PyObject*)
2575expat_parse(XMLParserObject* self, char* data, int data_len, int final)
2576{
2577 int ok;
2578
2579 ok = EXPAT(Parse)(self->parser, data, data_len, final);
2580
2581 if (PyErr_Occurred())
2582 return NULL;
2583
2584 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002585 expat_set_error(
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002586 EXPAT(ErrorString)(EXPAT(GetErrorCode)(self->parser)),
2587 EXPAT(GetErrorLineNumber)(self->parser),
2588 EXPAT(GetErrorColumnNumber)(self->parser)
2589 );
2590 return NULL;
2591 }
2592
2593 Py_RETURN_NONE;
2594}
2595
2596static PyObject*
2597xmlparser_close(XMLParserObject* self, PyObject* args)
2598{
2599 /* end feeding data to parser */
2600
2601 PyObject* res;
2602 if (!PyArg_ParseTuple(args, ":close"))
2603 return NULL;
2604
2605 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002606 if (!res)
2607 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002608
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002609 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002610 Py_DECREF(res);
2611 return treebuilder_done((TreeBuilderObject*) self->target);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002612 } if (self->handle_close) {
2613 Py_DECREF(res);
2614 return PyObject_CallFunction(self->handle_close, "");
2615 } else
2616 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002617}
2618
2619static PyObject*
2620xmlparser_feed(XMLParserObject* self, PyObject* args)
2621{
2622 /* feed data to parser */
2623
2624 char* data;
2625 int data_len;
2626 if (!PyArg_ParseTuple(args, "s#:feed", &data, &data_len))
2627 return NULL;
2628
2629 return expat_parse(self, data, data_len, 0);
2630}
2631
2632static PyObject*
2633xmlparser_parse(XMLParserObject* self, PyObject* args)
2634{
2635 /* (internal) parse until end of input stream */
2636
2637 PyObject* reader;
2638 PyObject* buffer;
2639 PyObject* res;
2640
2641 PyObject* fileobj;
2642 if (!PyArg_ParseTuple(args, "O:_parse", &fileobj))
2643 return NULL;
2644
2645 reader = PyObject_GetAttrString(fileobj, "read");
2646 if (!reader)
2647 return NULL;
2648
2649 /* read from open file object */
2650 for (;;) {
2651
2652 buffer = PyObject_CallFunction(reader, "i", 64*1024);
2653
2654 if (!buffer) {
2655 /* read failed (e.g. due to KeyboardInterrupt) */
2656 Py_DECREF(reader);
2657 return NULL;
2658 }
2659
Christian Heimes72b710a2008-05-26 13:28:38 +00002660 if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002661 Py_DECREF(buffer);
2662 break;
2663 }
2664
2665 res = expat_parse(
Christian Heimes72b710a2008-05-26 13:28:38 +00002666 self, PyBytes_AS_STRING(buffer), PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002667 );
2668
2669 Py_DECREF(buffer);
2670
2671 if (!res) {
2672 Py_DECREF(reader);
2673 return NULL;
2674 }
2675 Py_DECREF(res);
2676
2677 }
2678
2679 Py_DECREF(reader);
2680
2681 res = expat_parse(self, "", 0, 1);
2682
2683 if (res && TreeBuilder_CheckExact(self->target)) {
2684 Py_DECREF(res);
2685 return treebuilder_done((TreeBuilderObject*) self->target);
2686 }
2687
2688 return res;
2689}
2690
2691static PyObject*
2692xmlparser_setevents(XMLParserObject* self, PyObject* args)
2693{
2694 /* activate element event reporting */
2695
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002696 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002697 TreeBuilderObject* target;
2698
2699 PyObject* events; /* event collector */
2700 PyObject* event_set = Py_None;
2701 if (!PyArg_ParseTuple(args, "O!|O:_setevents", &PyList_Type, &events,
2702 &event_set))
2703 return NULL;
2704
2705 if (!TreeBuilder_CheckExact(self->target)) {
2706 PyErr_SetString(
2707 PyExc_TypeError,
2708 "event handling only supported for cElementTree.Treebuilder "
2709 "targets"
2710 );
2711 return NULL;
2712 }
2713
2714 target = (TreeBuilderObject*) self->target;
2715
2716 Py_INCREF(events);
2717 Py_XDECREF(target->events);
2718 target->events = events;
2719
2720 /* clear out existing events */
2721 Py_XDECREF(target->start_event_obj); target->start_event_obj = NULL;
2722 Py_XDECREF(target->end_event_obj); target->end_event_obj = NULL;
2723 Py_XDECREF(target->start_ns_event_obj); target->start_ns_event_obj = NULL;
2724 Py_XDECREF(target->end_ns_event_obj); target->end_ns_event_obj = NULL;
2725
2726 if (event_set == Py_None) {
2727 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002728 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002729 Py_RETURN_NONE;
2730 }
2731
2732 if (!PyTuple_Check(event_set)) /* FIXME: handle arbitrary sequences */
2733 goto error;
2734
2735 for (i = 0; i < PyTuple_GET_SIZE(event_set); i++) {
2736 PyObject* item = PyTuple_GET_ITEM(event_set, i);
2737 char* event;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002738 if (PyUnicode_Check(item)) {
2739 event = _PyUnicode_AsString(item);
Victor Stinner0477bf32010-03-22 12:11:44 +00002740 if (event == NULL)
2741 goto error;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002742 } else if (PyBytes_Check(item))
2743 event = PyBytes_AS_STRING(item);
2744 else {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002745 goto error;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002746 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002747 if (strcmp(event, "start") == 0) {
2748 Py_INCREF(item);
2749 target->start_event_obj = item;
2750 } else if (strcmp(event, "end") == 0) {
2751 Py_INCREF(item);
2752 Py_XDECREF(target->end_event_obj);
2753 target->end_event_obj = item;
2754 } else if (strcmp(event, "start-ns") == 0) {
2755 Py_INCREF(item);
2756 Py_XDECREF(target->start_ns_event_obj);
2757 target->start_ns_event_obj = item;
2758 EXPAT(SetNamespaceDeclHandler)(
2759 self->parser,
2760 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2761 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2762 );
2763 } else if (strcmp(event, "end-ns") == 0) {
2764 Py_INCREF(item);
2765 Py_XDECREF(target->end_ns_event_obj);
2766 target->end_ns_event_obj = item;
2767 EXPAT(SetNamespaceDeclHandler)(
2768 self->parser,
2769 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2770 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2771 );
2772 } else {
2773 PyErr_Format(
2774 PyExc_ValueError,
2775 "unknown event '%s'", event
2776 );
2777 return NULL;
2778 }
2779 }
2780
2781 Py_RETURN_NONE;
2782
2783 error:
2784 PyErr_SetString(
2785 PyExc_TypeError,
2786 "invalid event tuple"
2787 );
2788 return NULL;
2789}
2790
2791static PyMethodDef xmlparser_methods[] = {
2792 {"feed", (PyCFunction) xmlparser_feed, METH_VARARGS},
2793 {"close", (PyCFunction) xmlparser_close, METH_VARARGS},
2794 {"_parse", (PyCFunction) xmlparser_parse, METH_VARARGS},
2795 {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
2796 {NULL, NULL}
2797};
2798
2799static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002800xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002801{
Alexander Belopolskye239d232010-12-08 23:31:48 +00002802 if (PyUnicode_Check(nameobj)) {
2803 PyObject* res;
2804 if (PyUnicode_CompareWithASCIIString(nameobj, "entity") == 0)
2805 res = self->entity;
2806 else if (PyUnicode_CompareWithASCIIString(nameobj, "target") == 0)
2807 res = self->target;
2808 else if (PyUnicode_CompareWithASCIIString(nameobj, "version") == 0) {
2809 return PyUnicode_FromFormat(
2810 "Expat %d.%d.%d", XML_MAJOR_VERSION,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002811 XML_MINOR_VERSION, XML_MICRO_VERSION);
Alexander Belopolskye239d232010-12-08 23:31:48 +00002812 }
2813 else
2814 goto generic;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002815
Alexander Belopolskye239d232010-12-08 23:31:48 +00002816 Py_INCREF(res);
2817 return res;
2818 }
2819 generic:
2820 return PyObject_GenericGetAttr((PyObject*) self, nameobj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002821}
2822
Neal Norwitz227b5332006-03-22 09:28:35 +00002823static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002824 PyVarObject_HEAD_INIT(NULL, 0)
2825 "XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002826 /* methods */
2827 (destructor)xmlparser_dealloc, /* tp_dealloc */
2828 0, /* tp_print */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002829 0, /* tp_getattr */
2830 0, /* tp_setattr */
Mark Dickinsone94c6792009-02-02 20:36:42 +00002831 0, /* tp_reserved */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002832 0, /* tp_repr */
2833 0, /* tp_as_number */
2834 0, /* tp_as_sequence */
2835 0, /* tp_as_mapping */
2836 0, /* tp_hash */
2837 0, /* tp_call */
2838 0, /* tp_str */
2839 (getattrofunc)xmlparser_getattro, /* tp_getattro */
2840 0, /* tp_setattro */
2841 0, /* tp_as_buffer */
2842 Py_TPFLAGS_DEFAULT, /* tp_flags */
2843 0, /* tp_doc */
2844 0, /* tp_traverse */
2845 0, /* tp_clear */
2846 0, /* tp_richcompare */
2847 0, /* tp_weaklistoffset */
2848 0, /* tp_iter */
2849 0, /* tp_iternext */
2850 xmlparser_methods, /* tp_methods */
2851 0, /* tp_members */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002852};
2853
2854#endif
2855
2856/* ==================================================================== */
2857/* python module interface */
2858
2859static PyMethodDef _functions[] = {
2860 {"Element", (PyCFunction) element, METH_VARARGS|METH_KEYWORDS},
2861 {"SubElement", (PyCFunction) subelement, METH_VARARGS|METH_KEYWORDS},
2862 {"TreeBuilder", (PyCFunction) treebuilder, METH_VARARGS},
2863#if defined(USE_EXPAT)
2864 {"XMLParser", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
2865 {"XMLTreeBuilder", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
2866#endif
2867 {NULL, NULL}
2868};
2869
Martin v. Löwis1a214512008-06-11 05:26:20 +00002870
2871static struct PyModuleDef _elementtreemodule = {
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00002872 PyModuleDef_HEAD_INIT,
2873 "_elementtree",
2874 NULL,
2875 -1,
2876 _functions,
2877 NULL,
2878 NULL,
2879 NULL,
2880 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00002881};
2882
Neal Norwitzf6657e62006-12-28 04:47:50 +00002883PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00002884PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002885{
2886 PyObject* m;
2887 PyObject* g;
2888 char* bootstrap;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002889
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002890 /* Initialize object types */
2891 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00002892 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002893 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00002894 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002895#if defined(USE_EXPAT)
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002896 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00002897 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002898#endif
2899
Martin v. Löwis1a214512008-06-11 05:26:20 +00002900 m = PyModule_Create(&_elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002901 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00002902 return NULL;
2903
2904 /* The code below requires that the module gets already added
2905 to sys.modules. */
2906 PyDict_SetItemString(PyImport_GetModuleDict(),
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00002907 _elementtreemodule.m_name,
2908 m);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002909
2910 /* python glue code */
2911
2912 g = PyDict_New();
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002913 if (!g)
Martin v. Löwis1a214512008-06-11 05:26:20 +00002914 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002915
2916 PyDict_SetItemString(g, "__builtins__", PyEval_GetBuiltins());
2917
2918 bootstrap = (
2919
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002920 "from copy import copy, deepcopy\n"
2921
2922 "try:\n"
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002923 " from xml.etree import ElementTree\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002924 "except ImportError:\n"
2925 " import ElementTree\n"
2926 "ET = ElementTree\n"
2927 "del ElementTree\n"
2928
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002929 "import _elementtree as cElementTree\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002930
2931 "try:\n" /* check if copy works as is */
2932 " copy(cElementTree.Element('x'))\n"
2933 "except:\n"
2934 " def copyelement(elem):\n"
2935 " return elem\n"
2936
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002937 "class CommentProxy:\n"
2938 " def __call__(self, text=None):\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002939 " element = cElementTree.Element(ET.Comment)\n"
2940 " element.text = text\n"
2941 " return element\n"
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002942 " def __eq__(self, other):\n"
2943 " return ET.Comment == other\n"
2944 "cElementTree.Comment = CommentProxy()\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002945
2946 "class ElementTree(ET.ElementTree):\n" /* public */
2947 " def parse(self, source, parser=None):\n"
Antoine Pitroue033e062010-10-29 10:38:18 +00002948 " close_source = False\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002949 " if not hasattr(source, 'read'):\n"
2950 " source = open(source, 'rb')\n"
Antoine Pitroue033e062010-10-29 10:38:18 +00002951 " close_source = True\n"
2952 " try:\n"
2953 " if parser is not None:\n"
2954 " while 1:\n"
2955 " data = source.read(65536)\n"
2956 " if not data:\n"
2957 " break\n"
2958 " parser.feed(data)\n"
2959 " self._root = parser.close()\n"
2960 " else:\n"
2961 " parser = cElementTree.XMLParser()\n"
2962 " self._root = parser._parse(source)\n"
2963 " return self._root\n"
2964 " finally:\n"
2965 " if close_source:\n"
2966 " source.close()\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002967 "cElementTree.ElementTree = ElementTree\n"
2968
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002969 "def iter(node, tag=None):\n" /* helper */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002970 " if tag == '*':\n"
2971 " tag = None\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002972 " if tag is None or node.tag == tag:\n"
2973 " yield node\n"
2974 " for node in node:\n"
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002975 " for node in iter(node, tag):\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002976 " yield node\n"
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002977
2978 "def itertext(node):\n" /* helper */
2979 " if node.text:\n"
2980 " yield node.text\n"
2981 " for e in node:\n"
2982 " for s in e.itertext():\n"
2983 " yield s\n"
2984 " if e.tail:\n"
2985 " yield e.tail\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002986
2987 "def parse(source, parser=None):\n" /* public */
2988 " tree = ElementTree()\n"
2989 " tree.parse(source, parser)\n"
2990 " return tree\n"
2991 "cElementTree.parse = parse\n"
2992
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002993 "class iterparse:\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002994 " root = None\n"
2995 " def __init__(self, file, events=None):\n"
Antoine Pitroue033e062010-10-29 10:38:18 +00002996 " self._close_file = False\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002997 " if not hasattr(file, 'read'):\n"
2998 " file = open(file, 'rb')\n"
Antoine Pitroue033e062010-10-29 10:38:18 +00002999 " self._close_file = True\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003000 " self._file = file\n"
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003001 " self._events = []\n"
3002 " self._index = 0\n"
Florent Xicluna91d51932011-11-01 23:31:09 +01003003 " self._error = None\n"
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003004 " self.root = self._root = None\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003005 " b = cElementTree.TreeBuilder()\n"
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003006 " self._parser = cElementTree.XMLParser(b)\n"
3007 " self._parser._setevents(self._events, events)\n"
3008 " def __next__(self):\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003009 " while 1:\n"
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003010 " try:\n"
3011 " item = self._events[self._index]\n"
Florent Xicluna91d51932011-11-01 23:31:09 +01003012 " self._index += 1\n"
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003013 " return item\n"
Florent Xicluna91d51932011-11-01 23:31:09 +01003014 " except IndexError:\n"
3015 " pass\n"
3016 " if self._error:\n"
3017 " e = self._error\n"
3018 " self._error = None\n"
3019 " raise e\n"
3020 " if self._parser is None:\n"
3021 " self.root = self._root\n"
3022 " if self._close_file:\n"
3023 " self._file.close()\n"
3024 " raise StopIteration\n"
3025 " # load event buffer\n"
3026 " del self._events[:]\n"
3027 " self._index = 0\n"
3028 " data = self._file.read(16384)\n"
3029 " if data:\n"
3030 " try:\n"
3031 " self._parser.feed(data)\n"
3032 " except SyntaxError as exc:\n"
3033 " self._error = exc\n"
3034 " else:\n"
3035 " self._root = self._parser.close()\n"
3036 " self._parser = None\n"
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003037 " def __iter__(self):\n"
3038 " return self\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003039 "cElementTree.iterparse = iterparse\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003040
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003041 "class PIProxy:\n"
3042 " def __call__(self, target, text=None):\n"
3043 " element = cElementTree.Element(ET.PI)\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003044 " element.text = target\n"
3045 " if text:\n"
3046 " element.text = element.text + ' ' + text\n"
3047 " return element\n"
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003048 " def __eq__(self, other):\n"
3049 " return ET.PI == other\n"
3050 "cElementTree.PI = cElementTree.ProcessingInstruction = PIProxy()\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003051
3052 "def XML(text):\n" /* public */
3053 " parser = cElementTree.XMLParser()\n"
3054 " parser.feed(text)\n"
3055 " return parser.close()\n"
3056 "cElementTree.XML = cElementTree.fromstring = XML\n"
3057
3058 "def XMLID(text):\n" /* public */
3059 " tree = XML(text)\n"
3060 " ids = {}\n"
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003061 " for elem in tree.iter():\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003062 " id = elem.get('id')\n"
3063 " if id:\n"
3064 " ids[id] = elem\n"
3065 " return tree, ids\n"
3066 "cElementTree.XMLID = XMLID\n"
3067
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003068 "try:\n"
3069 " register_namespace = ET.register_namespace\n"
3070 "except AttributeError:\n"
3071 " def register_namespace(prefix, uri):\n"
3072 " ET._namespace_map[uri] = prefix\n"
3073 "cElementTree.register_namespace = register_namespace\n"
3074
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003075 "cElementTree.dump = ET.dump\n"
3076 "cElementTree.ElementPath = ElementPath = ET.ElementPath\n"
3077 "cElementTree.iselement = ET.iselement\n"
3078 "cElementTree.QName = ET.QName\n"
3079 "cElementTree.tostring = ET.tostring\n"
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003080 "cElementTree.fromstringlist = ET.fromstringlist\n"
3081 "cElementTree.tostringlist = ET.tostringlist\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003082 "cElementTree.VERSION = '" VERSION "'\n"
3083 "cElementTree.__version__ = '" VERSION "'\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003084
3085 );
3086
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003087 if (!PyRun_String(bootstrap, Py_file_input, g, NULL))
3088 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003089
3090 elementpath_obj = PyDict_GetItemString(g, "ElementPath");
3091
3092 elementtree_copyelement_obj = PyDict_GetItemString(g, "copyelement");
3093 if (elementtree_copyelement_obj) {
3094 /* reduce hack needed; enable reduce method */
3095 PyMethodDef* mp;
3096 for (mp = element_methods; mp->ml_name; mp++)
3097 if (mp->ml_meth == (PyCFunction) element_reduce) {
3098 mp->ml_name = "__reduce__";
3099 break;
3100 }
3101 } else
3102 PyErr_Clear();
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003103
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003104 elementtree_deepcopy_obj = PyDict_GetItemString(g, "deepcopy");
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003105 elementtree_iter_obj = PyDict_GetItemString(g, "iter");
3106 elementtree_itertext_obj = PyDict_GetItemString(g, "itertext");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003107
3108#if defined(USE_PYEXPAT_CAPI)
3109 /* link against pyexpat, if possible */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003110 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
3111 if (expat_capi) {
3112 /* check that it's usable */
3113 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
3114 expat_capi->size < sizeof(struct PyExpat_CAPI) ||
3115 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
3116 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
3117 expat_capi->MICRO_VERSION != XML_MICRO_VERSION)
3118 expat_capi = NULL;
3119 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003120#endif
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003121
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003122 elementtree_parseerror_obj = PyErr_NewException(
3123 "cElementTree.ParseError", PyExc_SyntaxError, NULL
3124 );
3125 Py_INCREF(elementtree_parseerror_obj);
3126 PyModule_AddObject(m, "ParseError", elementtree_parseerror_obj);
3127
3128 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003129}