blob: 876ab3a769637cd775675681bc36aaea8884294e [file] [log] [blame]
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001/*
2 * ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003 * $Id: _elementtree.c 3473 2009-01-11 22:53:55Z fredrik $
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
5 * elementtree accelerator
6 *
7 * History:
8 * 1999-06-20 fl created (as part of sgmlop)
9 * 2001-05-29 fl effdom edition
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000010 * 2003-02-27 fl elementtree edition (alpha)
11 * 2004-06-03 fl updates for elementtree 1.2
Florent Xiclunaf15351d2010-03-13 23:24:31 +000012 * 2005-01-05 fl major optimization effort
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000013 * 2005-01-11 fl first public release (cElementTree 0.8)
14 * 2005-01-12 fl split element object into base and extras
15 * 2005-01-13 fl use tagged pointers for tail/text (cElementTree 0.9)
16 * 2005-01-17 fl added treebuilder close method
17 * 2005-01-17 fl fixed crash in getchildren
18 * 2005-01-18 fl removed observer api, added iterparse (cElementTree 0.9.3)
19 * 2005-01-23 fl revised iterparse api; added namespace event support (0.9.8)
20 * 2005-01-26 fl added VERSION module property (cElementTree 1.0)
21 * 2005-01-28 fl added remove method (1.0.1)
22 * 2005-03-01 fl added iselement function; fixed makeelement aliasing (1.0.2)
23 * 2005-03-13 fl export Comment and ProcessingInstruction/PI helpers
24 * 2005-03-26 fl added Comment and PI support to XMLParser
25 * 2005-03-27 fl event optimizations; complain about bogus events
26 * 2005-08-08 fl fixed read error handling in parse
27 * 2005-08-11 fl added runtime test for copy workaround (1.0.3)
28 * 2005-12-13 fl added expat_capi support (for xml.etree) (1.0.4)
29 * 2005-12-16 fl added support for non-standard encodings
Fredrik Lundh44ed4db2006-03-12 21:06:35 +000030 * 2006-03-08 fl fixed a couple of potential null-refs and leaks
31 * 2006-03-12 fl merge in 2.5 ssize_t changes
Florent Xiclunaf15351d2010-03-13 23:24:31 +000032 * 2007-08-25 fl call custom builder's close method from XMLParser
33 * 2007-08-31 fl added iter, extend from ET 1.3
34 * 2007-09-01 fl fixed ParseError exception, setslice source type, etc
35 * 2007-09-03 fl fixed handling of negative insert indexes
36 * 2007-09-04 fl added itertext from ET 1.3
37 * 2007-09-06 fl added position attribute to ParseError exception
38 * 2008-06-06 fl delay error reporting in iterparse (from Hrvoje Niksic)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000039 *
Florent Xiclunaf15351d2010-03-13 23:24:31 +000040 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
41 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000042 *
43 * info@pythonware.com
44 * http://www.pythonware.com
45 */
46
Fredrik Lundh6d52b552005-12-16 22:06:43 +000047/* Licensed to PSF under a Contributor Agreement. */
Florent Xiclunaf15351d2010-03-13 23:24:31 +000048/* See http://www.python.org/psf/license for licensing details. */
Fredrik Lundh6d52b552005-12-16 22:06:43 +000049
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000050#include "Python.h"
51
Thomas Wouters00ee7ba2006-08-21 19:07:27 +000052#define VERSION "1.0.6"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000053
54/* -------------------------------------------------------------------- */
55/* configuration */
56
57/* Leave defined to include the expat-based XMLParser type */
58#define USE_EXPAT
59
Florent Xiclunaf15351d2010-03-13 23:24:31 +000060/* Define to do all expat calls via pyexpat's embedded expat library */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000061/* #define USE_PYEXPAT_CAPI */
62
63/* An element can hold this many children without extra memory
64 allocations. */
65#define STATIC_CHILDREN 4
66
67/* For best performance, chose a value so that 80-90% of all nodes
68 have no more than the given number of children. Set this to zero
69 to minimize the size of the element structure itself (this only
70 helps if you have lots of leaf nodes with attributes). */
71
72/* Also note that pymalloc always allocates blocks in multiples of
73 eight bytes. For the current version of cElementTree, this means
74 that the number of children should be an even number, at least on
75 32-bit platforms. */
76
77/* -------------------------------------------------------------------- */
78
79#if 0
80static int memory = 0;
81#define ALLOC(size, comment)\
82do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
83#define RELEASE(size, comment)\
84do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
85#else
86#define ALLOC(size, comment)
87#define RELEASE(size, comment)
88#endif
89
90/* compiler tweaks */
91#if defined(_MSC_VER)
92#define LOCAL(type) static __inline type __fastcall
93#else
94#define LOCAL(type) static type
95#endif
96
Florent Xiclunaf15351d2010-03-13 23:24:31 +000097/* compatibility macros */
98#if (PY_VERSION_HEX < 0x02060000)
99#define Py_REFCNT(ob) (((PyObject*)(ob))->ob_refcnt)
100#define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)
101#endif
102
103#if (PY_VERSION_HEX < 0x02050000)
104typedef int Py_ssize_t;
105#define lenfunc inquiry
106#endif
107
108#if (PY_VERSION_HEX < 0x02040000)
109#define PyDict_CheckExact PyDict_Check
110
111#if !defined(Py_RETURN_NONE)
112#define Py_RETURN_NONE return Py_INCREF(Py_None), Py_None
113#endif
114#endif
115
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000116/* macros used to store 'join' flags in string object pointers. note
117 that all use of text and tail as object pointers must be wrapped in
118 JOIN_OBJ. see comments in the ElementObject definition for more
119 info. */
120#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
121#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
122#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~1))
123
124/* glue functions (see the init function for details) */
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000125static PyObject* elementtree_parseerror_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000126static PyObject* elementtree_copyelement_obj;
127static PyObject* elementtree_deepcopy_obj;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000128static PyObject* elementtree_iter_obj;
129static PyObject* elementtree_itertext_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000130static PyObject* elementpath_obj;
131
132/* helpers */
133
134LOCAL(PyObject*)
135deepcopy(PyObject* object, PyObject* memo)
136{
137 /* do a deep copy of the given object */
138
139 PyObject* args;
140 PyObject* result;
141
142 if (!elementtree_deepcopy_obj) {
143 PyErr_SetString(
144 PyExc_RuntimeError,
145 "deepcopy helper not found"
146 );
147 return NULL;
148 }
149
150 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000151 if (!args)
152 return NULL;
153
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000154 Py_INCREF(object); PyTuple_SET_ITEM(args, 0, (PyObject*) object);
155 Py_INCREF(memo); PyTuple_SET_ITEM(args, 1, (PyObject*) memo);
156
157 result = PyObject_CallObject(elementtree_deepcopy_obj, args);
158
159 Py_DECREF(args);
160
161 return result;
162}
163
164LOCAL(PyObject*)
165list_join(PyObject* list)
166{
167 /* join list elements (destroying the list in the process) */
168
169 PyObject* joiner;
170 PyObject* function;
171 PyObject* args;
172 PyObject* result;
173
174 switch (PyList_GET_SIZE(list)) {
175 case 0:
176 Py_DECREF(list);
Christian Heimes72b710a2008-05-26 13:28:38 +0000177 return PyBytes_FromString("");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000178 case 1:
179 result = PyList_GET_ITEM(list, 0);
180 Py_INCREF(result);
181 Py_DECREF(list);
182 return result;
183 }
184
185 /* two or more elements: slice out a suitable separator from the
186 first member, and use that to join the entire list */
187
188 joiner = PySequence_GetSlice(PyList_GET_ITEM(list, 0), 0, 0);
189 if (!joiner)
190 return NULL;
191
192 function = PyObject_GetAttrString(joiner, "join");
193 if (!function) {
194 Py_DECREF(joiner);
195 return NULL;
196 }
197
198 args = PyTuple_New(1);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000199 if (!args)
200 return NULL;
201
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000202 PyTuple_SET_ITEM(args, 0, list);
203
204 result = PyObject_CallObject(function, args);
205
206 Py_DECREF(args); /* also removes list */
207 Py_DECREF(function);
208 Py_DECREF(joiner);
209
210 return result;
211}
212
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000213/* -------------------------------------------------------------------- */
214/* the element type */
215
216typedef struct {
217
218 /* attributes (a dictionary object), or None if no attributes */
219 PyObject* attrib;
220
221 /* child elements */
222 int length; /* actual number of items */
223 int allocated; /* allocated items */
224
225 /* this either points to _children or to a malloced buffer */
226 PyObject* *children;
227
228 PyObject* _children[STATIC_CHILDREN];
229
230} ElementObjectExtra;
231
232typedef struct {
233 PyObject_HEAD
234
235 /* element tag (a string). */
236 PyObject* tag;
237
238 /* text before first child. note that this is a tagged pointer;
239 use JOIN_OBJ to get the object pointer. the join flag is used
240 to distinguish lists created by the tree builder from lists
241 assigned to the attribute by application code; the former
242 should be joined before being returned to the user, the latter
243 should be left intact. */
244 PyObject* text;
245
246 /* text after this element, in parent. note that this is a tagged
247 pointer; use JOIN_OBJ to get the object pointer. */
248 PyObject* tail;
249
250 ElementObjectExtra* extra;
251
252} ElementObject;
253
Neal Norwitz227b5332006-03-22 09:28:35 +0000254static PyTypeObject Element_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000255
Christian Heimes90aa7642007-12-19 02:45:37 +0000256#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000257
258/* -------------------------------------------------------------------- */
259/* element constructor and destructor */
260
261LOCAL(int)
262element_new_extra(ElementObject* self, PyObject* attrib)
263{
264 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
265 if (!self->extra)
266 return -1;
267
268 if (!attrib)
269 attrib = Py_None;
270
271 Py_INCREF(attrib);
272 self->extra->attrib = attrib;
273
274 self->extra->length = 0;
275 self->extra->allocated = STATIC_CHILDREN;
276 self->extra->children = self->extra->_children;
277
278 return 0;
279}
280
281LOCAL(void)
282element_dealloc_extra(ElementObject* self)
283{
284 int i;
285
286 Py_DECREF(self->extra->attrib);
287
288 for (i = 0; i < self->extra->length; i++)
289 Py_DECREF(self->extra->children[i]);
290
291 if (self->extra->children != self->extra->_children)
292 PyObject_Free(self->extra->children);
293
294 PyObject_Free(self->extra);
295}
296
297LOCAL(PyObject*)
298element_new(PyObject* tag, PyObject* attrib)
299{
300 ElementObject* self;
301
302 self = PyObject_New(ElementObject, &Element_Type);
303 if (self == NULL)
304 return NULL;
305
306 /* use None for empty dictionaries */
307 if (PyDict_CheckExact(attrib) && !PyDict_Size(attrib))
308 attrib = Py_None;
309
310 self->extra = NULL;
311
312 if (attrib != Py_None) {
313
Thomas Wouters477c8d52006-05-27 19:21:47 +0000314 if (element_new_extra(self, attrib) < 0) {
315 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000316 return NULL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000317 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000318
319 self->extra->length = 0;
320 self->extra->allocated = STATIC_CHILDREN;
321 self->extra->children = self->extra->_children;
322
323 }
324
325 Py_INCREF(tag);
326 self->tag = tag;
327
328 Py_INCREF(Py_None);
329 self->text = Py_None;
330
331 Py_INCREF(Py_None);
332 self->tail = Py_None;
333
334 ALLOC(sizeof(ElementObject), "create element");
335
336 return (PyObject*) self;
337}
338
339LOCAL(int)
340element_resize(ElementObject* self, int extra)
341{
342 int size;
343 PyObject* *children;
344
345 /* make sure self->children can hold the given number of extra
346 elements. set an exception and return -1 if allocation failed */
347
348 if (!self->extra)
349 element_new_extra(self, NULL);
350
351 size = self->extra->length + extra;
352
353 if (size > self->extra->allocated) {
354 /* use Python 2.4's list growth strategy */
355 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000356 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
357 * which needs at least 4 bytes.
358 * Although it's a false alarm always assume at least one child to
359 * be safe.
360 */
361 size = size ? size : 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000362 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000363 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
364 * "children", which needs at least 4 bytes. Although it's a
365 * false alarm always assume at least one child to be safe.
366 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000367 children = PyObject_Realloc(self->extra->children,
368 size * sizeof(PyObject*));
369 if (!children)
370 goto nomemory;
371 } else {
372 children = PyObject_Malloc(size * sizeof(PyObject*));
373 if (!children)
374 goto nomemory;
375 /* copy existing children from static area to malloc buffer */
376 memcpy(children, self->extra->children,
377 self->extra->length * sizeof(PyObject*));
378 }
379 self->extra->children = children;
380 self->extra->allocated = size;
381 }
382
383 return 0;
384
385 nomemory:
386 PyErr_NoMemory();
387 return -1;
388}
389
390LOCAL(int)
391element_add_subelement(ElementObject* self, PyObject* element)
392{
393 /* add a child element to a parent */
394
395 if (element_resize(self, 1) < 0)
396 return -1;
397
398 Py_INCREF(element);
399 self->extra->children[self->extra->length] = element;
400
401 self->extra->length++;
402
403 return 0;
404}
405
406LOCAL(PyObject*)
407element_get_attrib(ElementObject* self)
408{
409 /* return borrowed reference to attrib dictionary */
410 /* note: this function assumes that the extra section exists */
411
412 PyObject* res = self->extra->attrib;
413
414 if (res == Py_None) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000415 Py_DECREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000416 /* create missing dictionary */
417 res = PyDict_New();
418 if (!res)
419 return NULL;
420 self->extra->attrib = res;
421 }
422
423 return res;
424}
425
426LOCAL(PyObject*)
427element_get_text(ElementObject* self)
428{
429 /* return borrowed reference to text attribute */
430
431 PyObject* res = self->text;
432
433 if (JOIN_GET(res)) {
434 res = JOIN_OBJ(res);
435 if (PyList_CheckExact(res)) {
436 res = list_join(res);
437 if (!res)
438 return NULL;
439 self->text = res;
440 }
441 }
442
443 return res;
444}
445
446LOCAL(PyObject*)
447element_get_tail(ElementObject* self)
448{
449 /* return borrowed reference to text attribute */
450
451 PyObject* res = self->tail;
452
453 if (JOIN_GET(res)) {
454 res = JOIN_OBJ(res);
455 if (PyList_CheckExact(res)) {
456 res = list_join(res);
457 if (!res)
458 return NULL;
459 self->tail = res;
460 }
461 }
462
463 return res;
464}
465
466static PyObject*
467element(PyObject* self, PyObject* args, PyObject* kw)
468{
469 PyObject* elem;
470
471 PyObject* tag;
472 PyObject* attrib = NULL;
473 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag,
474 &PyDict_Type, &attrib))
475 return NULL;
476
477 if (attrib || kw) {
478 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
479 if (!attrib)
480 return NULL;
481 if (kw)
482 PyDict_Update(attrib, kw);
483 } else {
484 Py_INCREF(Py_None);
485 attrib = Py_None;
486 }
487
488 elem = element_new(tag, attrib);
489
490 Py_DECREF(attrib);
491
492 return elem;
493}
494
495static PyObject*
496subelement(PyObject* self, PyObject* args, PyObject* kw)
497{
498 PyObject* elem;
499
500 ElementObject* parent;
501 PyObject* tag;
502 PyObject* attrib = NULL;
503 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
504 &Element_Type, &parent, &tag,
505 &PyDict_Type, &attrib))
506 return NULL;
507
508 if (attrib || kw) {
509 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
510 if (!attrib)
511 return NULL;
512 if (kw)
513 PyDict_Update(attrib, kw);
514 } else {
515 Py_INCREF(Py_None);
516 attrib = Py_None;
517 }
518
519 elem = element_new(tag, attrib);
520
521 Py_DECREF(attrib);
522
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000523 if (element_add_subelement(parent, elem) < 0) {
524 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000525 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000526 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000527
528 return elem;
529}
530
531static void
532element_dealloc(ElementObject* self)
533{
534 if (self->extra)
535 element_dealloc_extra(self);
536
537 /* discard attributes */
538 Py_DECREF(self->tag);
539 Py_DECREF(JOIN_OBJ(self->text));
540 Py_DECREF(JOIN_OBJ(self->tail));
541
542 RELEASE(sizeof(ElementObject), "destroy element");
543
544 PyObject_Del(self);
545}
546
547/* -------------------------------------------------------------------- */
548/* methods (in alphabetical order) */
549
550static PyObject*
551element_append(ElementObject* self, PyObject* args)
552{
553 PyObject* element;
554 if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element))
555 return NULL;
556
557 if (element_add_subelement(self, element) < 0)
558 return NULL;
559
560 Py_RETURN_NONE;
561}
562
563static PyObject*
564element_clear(ElementObject* self, PyObject* args)
565{
566 if (!PyArg_ParseTuple(args, ":clear"))
567 return NULL;
568
569 if (self->extra) {
570 element_dealloc_extra(self);
571 self->extra = NULL;
572 }
573
574 Py_INCREF(Py_None);
575 Py_DECREF(JOIN_OBJ(self->text));
576 self->text = Py_None;
577
578 Py_INCREF(Py_None);
579 Py_DECREF(JOIN_OBJ(self->tail));
580 self->tail = Py_None;
581
582 Py_RETURN_NONE;
583}
584
585static PyObject*
586element_copy(ElementObject* self, PyObject* args)
587{
588 int i;
589 ElementObject* element;
590
591 if (!PyArg_ParseTuple(args, ":__copy__"))
592 return NULL;
593
594 element = (ElementObject*) element_new(
595 self->tag, (self->extra) ? self->extra->attrib : Py_None
596 );
597 if (!element)
598 return NULL;
599
600 Py_DECREF(JOIN_OBJ(element->text));
601 element->text = self->text;
602 Py_INCREF(JOIN_OBJ(element->text));
603
604 Py_DECREF(JOIN_OBJ(element->tail));
605 element->tail = self->tail;
606 Py_INCREF(JOIN_OBJ(element->tail));
607
608 if (self->extra) {
609
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000610 if (element_resize(element, self->extra->length) < 0) {
611 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000612 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000613 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000614
615 for (i = 0; i < self->extra->length; i++) {
616 Py_INCREF(self->extra->children[i]);
617 element->extra->children[i] = self->extra->children[i];
618 }
619
620 element->extra->length = self->extra->length;
621
622 }
623
624 return (PyObject*) element;
625}
626
627static PyObject*
628element_deepcopy(ElementObject* self, PyObject* args)
629{
630 int i;
631 ElementObject* element;
632 PyObject* tag;
633 PyObject* attrib;
634 PyObject* text;
635 PyObject* tail;
636 PyObject* id;
637
638 PyObject* memo;
639 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
640 return NULL;
641
642 tag = deepcopy(self->tag, memo);
643 if (!tag)
644 return NULL;
645
646 if (self->extra) {
647 attrib = deepcopy(self->extra->attrib, memo);
648 if (!attrib) {
649 Py_DECREF(tag);
650 return NULL;
651 }
652 } else {
653 Py_INCREF(Py_None);
654 attrib = Py_None;
655 }
656
657 element = (ElementObject*) element_new(tag, attrib);
658
659 Py_DECREF(tag);
660 Py_DECREF(attrib);
661
662 if (!element)
663 return NULL;
664
665 text = deepcopy(JOIN_OBJ(self->text), memo);
666 if (!text)
667 goto error;
668 Py_DECREF(element->text);
669 element->text = JOIN_SET(text, JOIN_GET(self->text));
670
671 tail = deepcopy(JOIN_OBJ(self->tail), memo);
672 if (!tail)
673 goto error;
674 Py_DECREF(element->tail);
675 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
676
677 if (self->extra) {
678
679 if (element_resize(element, self->extra->length) < 0)
680 goto error;
681
682 for (i = 0; i < self->extra->length; i++) {
683 PyObject* child = deepcopy(self->extra->children[i], memo);
684 if (!child) {
685 element->extra->length = i;
686 goto error;
687 }
688 element->extra->children[i] = child;
689 }
690
691 element->extra->length = self->extra->length;
692
693 }
694
695 /* add object to memo dictionary (so deepcopy won't visit it again) */
Christian Heimes217cfd12007-12-02 14:31:20 +0000696 id = PyLong_FromLong((Py_uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000697 if (!id)
698 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000699
700 i = PyDict_SetItem(memo, id, (PyObject*) element);
701
702 Py_DECREF(id);
703
704 if (i < 0)
705 goto error;
706
707 return (PyObject*) element;
708
709 error:
710 Py_DECREF(element);
711 return NULL;
712}
713
714LOCAL(int)
715checkpath(PyObject* tag)
716{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000717 Py_ssize_t i;
718 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000719
720 /* check if a tag contains an xpath character */
721
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000722#define PATHCHAR(ch) \
723 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000724
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000725 if (PyUnicode_Check(tag)) {
726 Py_UNICODE *p = PyUnicode_AS_UNICODE(tag);
727 for (i = 0; i < PyUnicode_GET_SIZE(tag); i++) {
728 if (p[i] == '{')
729 check = 0;
730 else if (p[i] == '}')
731 check = 1;
732 else if (check && PATHCHAR(p[i]))
733 return 1;
734 }
735 return 0;
736 }
Christian Heimes72b710a2008-05-26 13:28:38 +0000737 if (PyBytes_Check(tag)) {
738 char *p = PyBytes_AS_STRING(tag);
739 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000740 if (p[i] == '{')
741 check = 0;
742 else if (p[i] == '}')
743 check = 1;
744 else if (check && PATHCHAR(p[i]))
745 return 1;
746 }
747 return 0;
748 }
749
750 return 1; /* unknown type; might be path expression */
751}
752
753static PyObject*
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000754element_extend(ElementObject* self, PyObject* args)
755{
756 PyObject* seq;
757 Py_ssize_t i, seqlen = 0;
758
759 PyObject* seq_in;
760 if (!PyArg_ParseTuple(args, "O:extend", &seq_in))
761 return NULL;
762
763 seq = PySequence_Fast(seq_in, "");
764 if (!seq) {
765 PyErr_Format(
766 PyExc_TypeError,
767 "expected sequence, not \"%.200s\"", Py_TYPE(seq_in)->tp_name
768 );
769 return NULL;
770 }
771
772 seqlen = PySequence_Size(seq);
773 for (i = 0; i < seqlen; i++) {
774 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
775 if (element_add_subelement(self, element) < 0) {
776 Py_DECREF(seq);
777 return NULL;
778 }
779 }
780
781 Py_DECREF(seq);
782
783 Py_RETURN_NONE;
784}
785
786static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000787element_find(ElementObject* self, PyObject* args)
788{
789 int i;
790
791 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000792 PyObject* namespaces = Py_None;
793 if (!PyArg_ParseTuple(args, "O|O:find", &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000794 return NULL;
795
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000796 if (checkpath(tag) || namespaces != Py_None)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000797 return PyObject_CallMethod(
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000798 elementpath_obj, "find", "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000799 );
800
801 if (!self->extra)
802 Py_RETURN_NONE;
803
804 for (i = 0; i < self->extra->length; i++) {
805 PyObject* item = self->extra->children[i];
806 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +0000807 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000808 Py_INCREF(item);
809 return item;
810 }
811 }
812
813 Py_RETURN_NONE;
814}
815
816static PyObject*
817element_findtext(ElementObject* self, PyObject* args)
818{
819 int i;
820
821 PyObject* tag;
822 PyObject* default_value = Py_None;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000823 PyObject* namespaces = Py_None;
824 if (!PyArg_ParseTuple(args, "O|OO:findtext", &tag, &default_value, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000825 return NULL;
826
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000827 if (checkpath(tag) || namespaces != Py_None)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000828 return PyObject_CallMethod(
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000829 elementpath_obj, "findtext", "OOOO", self, tag, default_value, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000830 );
831
832 if (!self->extra) {
833 Py_INCREF(default_value);
834 return default_value;
835 }
836
837 for (i = 0; i < self->extra->length; i++) {
838 ElementObject* item = (ElementObject*) self->extra->children[i];
Mark Dickinson211c6252009-02-01 10:28:51 +0000839 if (Element_CheckExact(item) && (PyObject_RichCompareBool(item->tag, tag, Py_EQ) == 1)) {
840
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000841 PyObject* text = element_get_text(item);
842 if (text == Py_None)
Christian Heimes72b710a2008-05-26 13:28:38 +0000843 return PyBytes_FromString("");
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000844 Py_XINCREF(text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000845 return text;
846 }
847 }
848
849 Py_INCREF(default_value);
850 return default_value;
851}
852
853static PyObject*
854element_findall(ElementObject* self, PyObject* args)
855{
856 int i;
857 PyObject* out;
858
859 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000860 PyObject* namespaces = Py_None;
861 if (!PyArg_ParseTuple(args, "O|O:findall", &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000862 return NULL;
863
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000864 if (checkpath(tag) || namespaces != Py_None)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000865 return PyObject_CallMethod(
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000866 elementpath_obj, "findall", "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000867 );
868
869 out = PyList_New(0);
870 if (!out)
871 return NULL;
872
873 if (!self->extra)
874 return out;
875
876 for (i = 0; i < self->extra->length; i++) {
877 PyObject* item = self->extra->children[i];
878 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +0000879 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000880 if (PyList_Append(out, item) < 0) {
881 Py_DECREF(out);
882 return NULL;
883 }
884 }
885 }
886
887 return out;
888}
889
890static PyObject*
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000891element_iterfind(ElementObject* self, PyObject* args)
892{
893 PyObject* tag;
894 PyObject* namespaces = Py_None;
895 if (!PyArg_ParseTuple(args, "O|O:iterfind", &tag, &namespaces))
896 return NULL;
897
898 return PyObject_CallMethod(
899 elementpath_obj, "iterfind", "OOO", self, tag, namespaces
900 );
901}
902
903static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000904element_get(ElementObject* self, PyObject* args)
905{
906 PyObject* value;
907
908 PyObject* key;
909 PyObject* default_value = Py_None;
910 if (!PyArg_ParseTuple(args, "O|O:get", &key, &default_value))
911 return NULL;
912
913 if (!self->extra || self->extra->attrib == Py_None)
914 value = default_value;
915 else {
916 value = PyDict_GetItem(self->extra->attrib, key);
917 if (!value)
918 value = default_value;
919 }
920
921 Py_INCREF(value);
922 return value;
923}
924
925static PyObject*
926element_getchildren(ElementObject* self, PyObject* args)
927{
928 int i;
929 PyObject* list;
930
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000931 /* FIXME: report as deprecated? */
932
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000933 if (!PyArg_ParseTuple(args, ":getchildren"))
934 return NULL;
935
936 if (!self->extra)
937 return PyList_New(0);
938
939 list = PyList_New(self->extra->length);
940 if (!list)
941 return NULL;
942
943 for (i = 0; i < self->extra->length; i++) {
944 PyObject* item = self->extra->children[i];
945 Py_INCREF(item);
946 PyList_SET_ITEM(list, i, item);
947 }
948
949 return list;
950}
951
952static PyObject*
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000953element_iter(ElementObject* self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000954{
955 PyObject* result;
956
957 PyObject* tag = Py_None;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000958 if (!PyArg_ParseTuple(args, "|O:iter", &tag))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000959 return NULL;
960
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000961 if (!elementtree_iter_obj) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000962 PyErr_SetString(
963 PyExc_RuntimeError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000964 "iter helper not found"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000965 );
966 return NULL;
967 }
968
969 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000970 if (!args)
971 return NULL;
Neal Norwitz02876df2006-02-07 06:58:52 +0000972
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000973 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
974 Py_INCREF(tag); PyTuple_SET_ITEM(args, 1, (PyObject*) tag);
975
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000976 result = PyObject_CallObject(elementtree_iter_obj, args);
977
978 Py_DECREF(args);
979
980 return result;
981}
982
983
984static PyObject*
985element_itertext(ElementObject* self, PyObject* args)
986{
987 PyObject* result;
988
989 if (!PyArg_ParseTuple(args, ":itertext"))
990 return NULL;
991
992 if (!elementtree_itertext_obj) {
993 PyErr_SetString(
994 PyExc_RuntimeError,
995 "itertext helper not found"
996 );
997 return NULL;
998 }
999
1000 args = PyTuple_New(1);
1001 if (!args)
1002 return NULL;
1003
1004 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
1005
1006 result = PyObject_CallObject(elementtree_itertext_obj, args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001007
1008 Py_DECREF(args);
1009
1010 return result;
1011}
1012
1013static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001014element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001015{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001016 ElementObject* self = (ElementObject*) self_;
1017
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001018 if (!self->extra || index < 0 || index >= self->extra->length) {
1019 PyErr_SetString(
1020 PyExc_IndexError,
1021 "child index out of range"
1022 );
1023 return NULL;
1024 }
1025
1026 Py_INCREF(self->extra->children[index]);
1027 return self->extra->children[index];
1028}
1029
1030static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001031element_insert(ElementObject* self, PyObject* args)
1032{
1033 int i;
1034
1035 int index;
1036 PyObject* element;
1037 if (!PyArg_ParseTuple(args, "iO!:insert", &index,
1038 &Element_Type, &element))
1039 return NULL;
1040
1041 if (!self->extra)
1042 element_new_extra(self, NULL);
1043
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001044 if (index < 0) {
1045 index += self->extra->length;
1046 if (index < 0)
1047 index = 0;
1048 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001049 if (index > self->extra->length)
1050 index = self->extra->length;
1051
1052 if (element_resize(self, 1) < 0)
1053 return NULL;
1054
1055 for (i = self->extra->length; i > index; i--)
1056 self->extra->children[i] = self->extra->children[i-1];
1057
1058 Py_INCREF(element);
1059 self->extra->children[index] = element;
1060
1061 self->extra->length++;
1062
1063 Py_RETURN_NONE;
1064}
1065
1066static PyObject*
1067element_items(ElementObject* self, PyObject* args)
1068{
1069 if (!PyArg_ParseTuple(args, ":items"))
1070 return NULL;
1071
1072 if (!self->extra || self->extra->attrib == Py_None)
1073 return PyList_New(0);
1074
1075 return PyDict_Items(self->extra->attrib);
1076}
1077
1078static PyObject*
1079element_keys(ElementObject* self, PyObject* args)
1080{
1081 if (!PyArg_ParseTuple(args, ":keys"))
1082 return NULL;
1083
1084 if (!self->extra || self->extra->attrib == Py_None)
1085 return PyList_New(0);
1086
1087 return PyDict_Keys(self->extra->attrib);
1088}
1089
Martin v. Löwis18e16552006-02-15 17:27:45 +00001090static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001091element_length(ElementObject* self)
1092{
1093 if (!self->extra)
1094 return 0;
1095
1096 return self->extra->length;
1097}
1098
1099static PyObject*
1100element_makeelement(PyObject* self, PyObject* args, PyObject* kw)
1101{
1102 PyObject* elem;
1103
1104 PyObject* tag;
1105 PyObject* attrib;
1106 if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib))
1107 return NULL;
1108
1109 attrib = PyDict_Copy(attrib);
1110 if (!attrib)
1111 return NULL;
1112
1113 elem = element_new(tag, attrib);
1114
1115 Py_DECREF(attrib);
1116
1117 return elem;
1118}
1119
1120static PyObject*
1121element_reduce(ElementObject* self, PyObject* args)
1122{
1123 if (!PyArg_ParseTuple(args, ":__reduce__"))
1124 return NULL;
1125
1126 /* Hack alert: This method is used to work around a __copy__
1127 problem on certain 2.3 and 2.4 versions. To save time and
1128 simplify the code, we create the copy in here, and use a dummy
1129 copyelement helper to trick the copy module into doing the
1130 right thing. */
1131
1132 if (!elementtree_copyelement_obj) {
1133 PyErr_SetString(
1134 PyExc_RuntimeError,
1135 "copyelement helper not found"
1136 );
1137 return NULL;
1138 }
1139
1140 return Py_BuildValue(
1141 "O(N)", elementtree_copyelement_obj, element_copy(self, args)
1142 );
1143}
1144
1145static PyObject*
1146element_remove(ElementObject* self, PyObject* args)
1147{
1148 int i;
1149
1150 PyObject* element;
1151 if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element))
1152 return NULL;
1153
1154 if (!self->extra) {
1155 /* element has no children, so raise exception */
1156 PyErr_SetString(
1157 PyExc_ValueError,
1158 "list.remove(x): x not in list"
1159 );
1160 return NULL;
1161 }
1162
1163 for (i = 0; i < self->extra->length; i++) {
1164 if (self->extra->children[i] == element)
1165 break;
Mark Dickinson211c6252009-02-01 10:28:51 +00001166 if (PyObject_RichCompareBool(self->extra->children[i], element, Py_EQ) == 1)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001167 break;
1168 }
1169
1170 if (i == self->extra->length) {
1171 /* element is not in children, so raise exception */
1172 PyErr_SetString(
1173 PyExc_ValueError,
1174 "list.remove(x): x not in list"
1175 );
1176 return NULL;
1177 }
1178
1179 Py_DECREF(self->extra->children[i]);
1180
1181 self->extra->length--;
1182
1183 for (; i < self->extra->length; i++)
1184 self->extra->children[i] = self->extra->children[i+1];
1185
1186 Py_RETURN_NONE;
1187}
1188
1189static PyObject*
1190element_repr(ElementObject* self)
1191{
Walter Dörwald7569dfe2007-05-19 21:49:49 +00001192 return PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001193}
1194
1195static PyObject*
1196element_set(ElementObject* self, PyObject* args)
1197{
1198 PyObject* attrib;
1199
1200 PyObject* key;
1201 PyObject* value;
1202 if (!PyArg_ParseTuple(args, "OO:set", &key, &value))
1203 return NULL;
1204
1205 if (!self->extra)
1206 element_new_extra(self, NULL);
1207
1208 attrib = element_get_attrib(self);
1209 if (!attrib)
1210 return NULL;
1211
1212 if (PyDict_SetItem(attrib, key, value) < 0)
1213 return NULL;
1214
1215 Py_RETURN_NONE;
1216}
1217
1218static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001219element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001220{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001221 ElementObject* self = (ElementObject*) self_;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001222 int i;
1223 PyObject* old;
1224
1225 if (!self->extra || index < 0 || index >= self->extra->length) {
1226 PyErr_SetString(
1227 PyExc_IndexError,
1228 "child assignment index out of range");
1229 return -1;
1230 }
1231
1232 old = self->extra->children[index];
1233
1234 if (item) {
1235 Py_INCREF(item);
1236 self->extra->children[index] = item;
1237 } else {
1238 self->extra->length--;
1239 for (i = index; i < self->extra->length; i++)
1240 self->extra->children[i] = self->extra->children[i+1];
1241 }
1242
1243 Py_DECREF(old);
1244
1245 return 0;
1246}
1247
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001248static PyObject*
1249element_subscr(PyObject* self_, PyObject* item)
1250{
1251 ElementObject* self = (ElementObject*) self_;
1252
1253#if (PY_VERSION_HEX < 0x02050000)
1254 if (PyInt_Check(item) || PyLong_Check(item)) {
1255 long i = PyInt_AsLong(item);
1256#else
1257 if (PyIndex_Check(item)) {
1258 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1259#endif
1260
1261 if (i == -1 && PyErr_Occurred()) {
1262 return NULL;
1263 }
1264 if (i < 0 && self->extra)
1265 i += self->extra->length;
1266 return element_getitem(self_, i);
1267 }
1268 else if (PySlice_Check(item)) {
1269 Py_ssize_t start, stop, step, slicelen, cur, i;
1270 PyObject* list;
1271
1272 if (!self->extra)
1273 return PyList_New(0);
1274
1275 if (PySlice_GetIndicesEx((PySliceObject *)item,
1276 self->extra->length,
1277 &start, &stop, &step, &slicelen) < 0) {
1278 return NULL;
1279 }
1280
1281 if (slicelen <= 0)
1282 return PyList_New(0);
1283 else {
1284 list = PyList_New(slicelen);
1285 if (!list)
1286 return NULL;
1287
1288 for (cur = start, i = 0; i < slicelen;
1289 cur += step, i++) {
1290 PyObject* item = self->extra->children[cur];
1291 Py_INCREF(item);
1292 PyList_SET_ITEM(list, i, item);
1293 }
1294
1295 return list;
1296 }
1297 }
1298 else {
1299 PyErr_SetString(PyExc_TypeError,
1300 "element indices must be integers");
1301 return NULL;
1302 }
1303}
1304
1305static int
1306element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1307{
1308 ElementObject* self = (ElementObject*) self_;
1309
1310#if (PY_VERSION_HEX < 0x02050000)
1311 if (PyInt_Check(item) || PyLong_Check(item)) {
1312 long i = PyInt_AsLong(item);
1313#else
1314 if (PyIndex_Check(item)) {
1315 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1316#endif
1317
1318 if (i == -1 && PyErr_Occurred()) {
1319 return -1;
1320 }
1321 if (i < 0 && self->extra)
1322 i += self->extra->length;
1323 return element_setitem(self_, i, value);
1324 }
1325 else if (PySlice_Check(item)) {
1326 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1327
1328 PyObject* recycle = NULL;
1329 PyObject* seq = NULL;
1330
1331 if (!self->extra)
1332 element_new_extra(self, NULL);
1333
1334 if (PySlice_GetIndicesEx((PySliceObject *)item,
1335 self->extra->length,
1336 &start, &stop, &step, &slicelen) < 0) {
1337 return -1;
1338 }
1339
1340 if (value == NULL)
1341 newlen = 0;
1342 else {
1343 seq = PySequence_Fast(value, "");
1344 if (!seq) {
1345 PyErr_Format(
1346 PyExc_TypeError,
1347 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1348 );
1349 return -1;
1350 }
1351 newlen = PySequence_Size(seq);
1352 }
1353
1354 if (step != 1 && newlen != slicelen)
1355 {
1356 PyErr_Format(PyExc_ValueError,
1357#if (PY_VERSION_HEX < 0x02050000)
1358 "attempt to assign sequence of size %d "
1359 "to extended slice of size %d",
1360#else
1361 "attempt to assign sequence of size %zd "
1362 "to extended slice of size %zd",
1363#endif
1364 newlen, slicelen
1365 );
1366 return -1;
1367 }
1368
1369
1370 /* Resize before creating the recycle bin, to prevent refleaks. */
1371 if (newlen > slicelen) {
1372 if (element_resize(self, newlen - slicelen) < 0) {
1373 if (seq) {
1374 Py_DECREF(seq);
1375 }
1376 return -1;
1377 }
1378 }
1379
1380 if (slicelen > 0) {
1381 /* to avoid recursive calls to this method (via decref), move
1382 old items to the recycle bin here, and get rid of them when
1383 we're done modifying the element */
1384 recycle = PyList_New(slicelen);
1385 if (!recycle) {
1386 if (seq) {
1387 Py_DECREF(seq);
1388 }
1389 return -1;
1390 }
1391 for (cur = start, i = 0; i < slicelen;
1392 cur += step, i++)
1393 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1394 }
1395
1396 if (newlen < slicelen) {
1397 /* delete slice */
1398 for (i = stop; i < self->extra->length; i++)
1399 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1400 } else if (newlen > slicelen) {
1401 /* insert slice */
1402 for (i = self->extra->length-1; i >= stop; i--)
1403 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1404 }
1405
1406 /* replace the slice */
1407 for (cur = start, i = 0; i < newlen;
1408 cur += step, i++) {
1409 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1410 Py_INCREF(element);
1411 self->extra->children[cur] = element;
1412 }
1413
1414 self->extra->length += newlen - slicelen;
1415
1416 if (seq) {
1417 Py_DECREF(seq);
1418 }
1419
1420 /* discard the recycle bin, and everything in it */
1421 Py_XDECREF(recycle);
1422
1423 return 0;
1424 }
1425 else {
1426 PyErr_SetString(PyExc_TypeError,
1427 "element indices must be integers");
1428 return -1;
1429 }
1430}
1431
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001432static PyMethodDef element_methods[] = {
1433
1434 {"clear", (PyCFunction) element_clear, METH_VARARGS},
1435
1436 {"get", (PyCFunction) element_get, METH_VARARGS},
1437 {"set", (PyCFunction) element_set, METH_VARARGS},
1438
1439 {"find", (PyCFunction) element_find, METH_VARARGS},
1440 {"findtext", (PyCFunction) element_findtext, METH_VARARGS},
1441 {"findall", (PyCFunction) element_findall, METH_VARARGS},
1442
1443 {"append", (PyCFunction) element_append, METH_VARARGS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001444 {"extend", (PyCFunction) element_extend, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001445 {"insert", (PyCFunction) element_insert, METH_VARARGS},
1446 {"remove", (PyCFunction) element_remove, METH_VARARGS},
1447
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001448 {"iter", (PyCFunction) element_iter, METH_VARARGS},
1449 {"itertext", (PyCFunction) element_itertext, METH_VARARGS},
1450 {"iterfind", (PyCFunction) element_iterfind, METH_VARARGS},
1451
1452 {"getiterator", (PyCFunction) element_iter, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001453 {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS},
1454
1455 {"items", (PyCFunction) element_items, METH_VARARGS},
1456 {"keys", (PyCFunction) element_keys, METH_VARARGS},
1457
1458 {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS},
1459
1460 {"__copy__", (PyCFunction) element_copy, METH_VARARGS},
1461 {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS},
1462
1463 /* Some 2.3 and 2.4 versions do not handle the __copy__ method on
1464 C objects correctly, so we have to fake it using a __reduce__-
1465 based hack (see the element_reduce implementation above for
1466 details). */
1467
1468 /* The behaviour has been changed in 2.3.5 and 2.4.1, so we're
1469 using a runtime test to figure out if we need to fake things
1470 or now (see the init code below). The following entry is
1471 enabled only if the hack is needed. */
1472
1473 {"!__reduce__", (PyCFunction) element_reduce, METH_VARARGS},
1474
1475 {NULL, NULL}
1476};
1477
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001478static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001479element_getattro(ElementObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001480{
1481 PyObject* res;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001482 char *name = "";
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001483
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001484 if (PyUnicode_Check(nameobj))
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001485 name = _PyUnicode_AsString(nameobj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001486
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001487 /* handle common attributes first */
1488 if (strcmp(name, "tag") == 0) {
1489 res = self->tag;
1490 Py_INCREF(res);
1491 return res;
1492 } else if (strcmp(name, "text") == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001493 res = element_get_text(self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001494 Py_INCREF(res);
1495 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001496 }
1497
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001498 /* methods */
1499 res = PyObject_GenericGetAttr((PyObject*) self, nameobj);
1500 if (res)
1501 return res;
1502
1503 /* less common attributes */
1504 if (strcmp(name, "tail") == 0) {
1505 PyErr_Clear();
1506 res = element_get_tail(self);
1507 } else if (strcmp(name, "attrib") == 0) {
1508 PyErr_Clear();
1509 if (!self->extra)
1510 element_new_extra(self, NULL);
1511 res = element_get_attrib(self);
1512 }
1513
1514 if (!res)
1515 return NULL;
1516
1517 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001518 return res;
1519}
1520
1521static int
1522element_setattr(ElementObject* self, const char* name, PyObject* value)
1523{
1524 if (value == NULL) {
1525 PyErr_SetString(
1526 PyExc_AttributeError,
1527 "can't delete element attributes"
1528 );
1529 return -1;
1530 }
1531
1532 if (strcmp(name, "tag") == 0) {
1533 Py_DECREF(self->tag);
1534 self->tag = value;
1535 Py_INCREF(self->tag);
1536 } else if (strcmp(name, "text") == 0) {
1537 Py_DECREF(JOIN_OBJ(self->text));
1538 self->text = value;
1539 Py_INCREF(self->text);
1540 } else if (strcmp(name, "tail") == 0) {
1541 Py_DECREF(JOIN_OBJ(self->tail));
1542 self->tail = value;
1543 Py_INCREF(self->tail);
1544 } else if (strcmp(name, "attrib") == 0) {
1545 if (!self->extra)
1546 element_new_extra(self, NULL);
1547 Py_DECREF(self->extra->attrib);
1548 self->extra->attrib = value;
1549 Py_INCREF(self->extra->attrib);
1550 } else {
1551 PyErr_SetString(PyExc_AttributeError, name);
1552 return -1;
1553 }
1554
1555 return 0;
1556}
1557
1558static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001559 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001560 0, /* sq_concat */
1561 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001562 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001563 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001564 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001565 0,
1566};
1567
1568static PyMappingMethods element_as_mapping = {
1569 (lenfunc) element_length,
1570 (binaryfunc) element_subscr,
1571 (objobjargproc) element_ass_subscr,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001572};
1573
Neal Norwitz227b5332006-03-22 09:28:35 +00001574static PyTypeObject Element_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001575 PyVarObject_HEAD_INIT(NULL, 0)
1576 "Element", sizeof(ElementObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001577 /* methods */
1578 (destructor)element_dealloc, /* tp_dealloc */
1579 0, /* tp_print */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001580 0, /* tp_getattr */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001581 (setattrfunc)element_setattr, /* tp_setattr */
Mark Dickinsone94c6792009-02-02 20:36:42 +00001582 0, /* tp_reserved */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001583 (reprfunc)element_repr, /* tp_repr */
1584 0, /* tp_as_number */
1585 &element_as_sequence, /* tp_as_sequence */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001586 &element_as_mapping, /* tp_as_mapping */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001587 0, /* tp_hash */
1588 0, /* tp_call */
1589 0, /* tp_str */
1590 (getattrofunc)element_getattro, /* tp_getattro */
1591 0, /* tp_setattro */
1592 0, /* tp_as_buffer */
1593 Py_TPFLAGS_DEFAULT, /* tp_flags */
1594 0, /* tp_doc */
1595 0, /* tp_traverse */
1596 0, /* tp_clear */
1597 0, /* tp_richcompare */
1598 0, /* tp_weaklistoffset */
1599 0, /* tp_iter */
1600 0, /* tp_iternext */
1601 element_methods, /* tp_methods */
1602 0, /* tp_members */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001603};
1604
1605/* ==================================================================== */
1606/* the tree builder type */
1607
1608typedef struct {
1609 PyObject_HEAD
1610
1611 PyObject* root; /* root node (first created node) */
1612
1613 ElementObject* this; /* current node */
1614 ElementObject* last; /* most recently created node */
1615
1616 PyObject* data; /* data collector (string or list), or NULL */
1617
1618 PyObject* stack; /* element stack */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001619 Py_ssize_t index; /* current stack size (0=empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001620
1621 /* element tracing */
1622 PyObject* events; /* list of events, or NULL if not collecting */
1623 PyObject* start_event_obj; /* event objects (NULL to ignore) */
1624 PyObject* end_event_obj;
1625 PyObject* start_ns_event_obj;
1626 PyObject* end_ns_event_obj;
1627
1628} TreeBuilderObject;
1629
Neal Norwitz227b5332006-03-22 09:28:35 +00001630static PyTypeObject TreeBuilder_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001631
Christian Heimes90aa7642007-12-19 02:45:37 +00001632#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001633
1634/* -------------------------------------------------------------------- */
1635/* constructor and destructor */
1636
1637LOCAL(PyObject*)
1638treebuilder_new(void)
1639{
1640 TreeBuilderObject* self;
1641
1642 self = PyObject_New(TreeBuilderObject, &TreeBuilder_Type);
1643 if (self == NULL)
1644 return NULL;
1645
1646 self->root = NULL;
1647
1648 Py_INCREF(Py_None);
1649 self->this = (ElementObject*) Py_None;
1650
1651 Py_INCREF(Py_None);
1652 self->last = (ElementObject*) Py_None;
1653
1654 self->data = NULL;
1655
1656 self->stack = PyList_New(20);
1657 self->index = 0;
1658
1659 self->events = NULL;
1660 self->start_event_obj = self->end_event_obj = NULL;
1661 self->start_ns_event_obj = self->end_ns_event_obj = NULL;
1662
1663 ALLOC(sizeof(TreeBuilderObject), "create treebuilder");
1664
1665 return (PyObject*) self;
1666}
1667
1668static PyObject*
Thomas Wouters73e5a5b2006-06-08 15:35:45 +00001669treebuilder(PyObject* self_, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001670{
1671 if (!PyArg_ParseTuple(args, ":TreeBuilder"))
1672 return NULL;
1673
1674 return treebuilder_new();
1675}
1676
1677static void
1678treebuilder_dealloc(TreeBuilderObject* self)
1679{
1680 Py_XDECREF(self->end_ns_event_obj);
1681 Py_XDECREF(self->start_ns_event_obj);
1682 Py_XDECREF(self->end_event_obj);
1683 Py_XDECREF(self->start_event_obj);
1684 Py_XDECREF(self->events);
1685 Py_DECREF(self->stack);
1686 Py_XDECREF(self->data);
1687 Py_DECREF(self->last);
1688 Py_DECREF(self->this);
1689 Py_XDECREF(self->root);
1690
1691 RELEASE(sizeof(TreeBuilderObject), "destroy treebuilder");
1692
1693 PyObject_Del(self);
1694}
1695
1696/* -------------------------------------------------------------------- */
1697/* handlers */
1698
1699LOCAL(PyObject*)
1700treebuilder_handle_xml(TreeBuilderObject* self, PyObject* encoding,
1701 PyObject* standalone)
1702{
1703 Py_RETURN_NONE;
1704}
1705
1706LOCAL(PyObject*)
1707treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
1708 PyObject* attrib)
1709{
1710 PyObject* node;
1711 PyObject* this;
1712
1713 if (self->data) {
1714 if (self->this == self->last) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001715 Py_DECREF(JOIN_OBJ(self->last->text));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001716 self->last->text = JOIN_SET(
1717 self->data, PyList_CheckExact(self->data)
1718 );
1719 } else {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001720 Py_DECREF(JOIN_OBJ(self->last->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001721 self->last->tail = JOIN_SET(
1722 self->data, PyList_CheckExact(self->data)
1723 );
1724 }
1725 self->data = NULL;
1726 }
1727
1728 node = element_new(tag, attrib);
1729 if (!node)
1730 return NULL;
1731
1732 this = (PyObject*) self->this;
1733
1734 if (this != Py_None) {
1735 if (element_add_subelement((ElementObject*) this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001736 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001737 } else {
1738 if (self->root) {
1739 PyErr_SetString(
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001740 elementtree_parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001741 "multiple elements on top level"
1742 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001743 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001744 }
1745 Py_INCREF(node);
1746 self->root = node;
1747 }
1748
1749 if (self->index < PyList_GET_SIZE(self->stack)) {
1750 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001751 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001752 Py_INCREF(this);
1753 } else {
1754 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001755 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001756 }
1757 self->index++;
1758
1759 Py_DECREF(this);
1760 Py_INCREF(node);
1761 self->this = (ElementObject*) node;
1762
1763 Py_DECREF(self->last);
1764 Py_INCREF(node);
1765 self->last = (ElementObject*) node;
1766
1767 if (self->start_event_obj) {
1768 PyObject* res;
1769 PyObject* action = self->start_event_obj;
1770 res = PyTuple_New(2);
1771 if (res) {
1772 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
1773 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
1774 PyList_Append(self->events, res);
1775 Py_DECREF(res);
1776 } else
1777 PyErr_Clear(); /* FIXME: propagate error */
1778 }
1779
1780 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001781
1782 error:
1783 Py_DECREF(node);
1784 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001785}
1786
1787LOCAL(PyObject*)
1788treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
1789{
1790 if (!self->data) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001791 if (self->last == (ElementObject*) Py_None) {
1792 /* ignore calls to data before the first call to start */
1793 Py_RETURN_NONE;
1794 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001795 /* store the first item as is */
1796 Py_INCREF(data); self->data = data;
1797 } else {
1798 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00001799 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
1800 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001801 /* expat often generates single character data sections; handle
1802 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00001803 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
1804 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001805 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00001806 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001807 } else if (PyList_CheckExact(self->data)) {
1808 if (PyList_Append(self->data, data) < 0)
1809 return NULL;
1810 } else {
1811 PyObject* list = PyList_New(2);
1812 if (!list)
1813 return NULL;
1814 PyList_SET_ITEM(list, 0, self->data);
1815 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
1816 self->data = list;
1817 }
1818 }
1819
1820 Py_RETURN_NONE;
1821}
1822
1823LOCAL(PyObject*)
1824treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
1825{
1826 PyObject* item;
1827
1828 if (self->data) {
1829 if (self->this == self->last) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001830 Py_DECREF(JOIN_OBJ(self->last->text));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001831 self->last->text = JOIN_SET(
1832 self->data, PyList_CheckExact(self->data)
1833 );
1834 } else {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001835 Py_DECREF(JOIN_OBJ(self->last->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001836 self->last->tail = JOIN_SET(
1837 self->data, PyList_CheckExact(self->data)
1838 );
1839 }
1840 self->data = NULL;
1841 }
1842
1843 if (self->index == 0) {
1844 PyErr_SetString(
1845 PyExc_IndexError,
1846 "pop from empty stack"
1847 );
1848 return NULL;
1849 }
1850
1851 self->index--;
1852
1853 item = PyList_GET_ITEM(self->stack, self->index);
1854 Py_INCREF(item);
1855
1856 Py_DECREF(self->last);
1857
1858 self->last = (ElementObject*) self->this;
1859 self->this = (ElementObject*) item;
1860
1861 if (self->end_event_obj) {
1862 PyObject* res;
1863 PyObject* action = self->end_event_obj;
1864 PyObject* node = (PyObject*) self->last;
1865 res = PyTuple_New(2);
1866 if (res) {
1867 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
1868 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
1869 PyList_Append(self->events, res);
1870 Py_DECREF(res);
1871 } else
1872 PyErr_Clear(); /* FIXME: propagate error */
1873 }
1874
1875 Py_INCREF(self->last);
1876 return (PyObject*) self->last;
1877}
1878
1879LOCAL(void)
1880treebuilder_handle_namespace(TreeBuilderObject* self, int start,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001881 PyObject *prefix, PyObject *uri)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001882{
1883 PyObject* res;
1884 PyObject* action;
1885 PyObject* parcel;
1886
1887 if (!self->events)
1888 return;
1889
1890 if (start) {
1891 if (!self->start_ns_event_obj)
1892 return;
1893 action = self->start_ns_event_obj;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001894 parcel = Py_BuildValue("OO", prefix, uri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001895 if (!parcel)
1896 return;
1897 Py_INCREF(action);
1898 } else {
1899 if (!self->end_ns_event_obj)
1900 return;
1901 action = self->end_ns_event_obj;
1902 Py_INCREF(action);
1903 parcel = Py_None;
1904 Py_INCREF(parcel);
1905 }
1906
1907 res = PyTuple_New(2);
1908
1909 if (res) {
1910 PyTuple_SET_ITEM(res, 0, action);
1911 PyTuple_SET_ITEM(res, 1, parcel);
1912 PyList_Append(self->events, res);
1913 Py_DECREF(res);
1914 } else
1915 PyErr_Clear(); /* FIXME: propagate error */
1916}
1917
1918/* -------------------------------------------------------------------- */
1919/* methods (in alphabetical order) */
1920
1921static PyObject*
1922treebuilder_data(TreeBuilderObject* self, PyObject* args)
1923{
1924 PyObject* data;
1925 if (!PyArg_ParseTuple(args, "O:data", &data))
1926 return NULL;
1927
1928 return treebuilder_handle_data(self, data);
1929}
1930
1931static PyObject*
1932treebuilder_end(TreeBuilderObject* self, PyObject* args)
1933{
1934 PyObject* tag;
1935 if (!PyArg_ParseTuple(args, "O:end", &tag))
1936 return NULL;
1937
1938 return treebuilder_handle_end(self, tag);
1939}
1940
1941LOCAL(PyObject*)
1942treebuilder_done(TreeBuilderObject* self)
1943{
1944 PyObject* res;
1945
1946 /* FIXME: check stack size? */
1947
1948 if (self->root)
1949 res = self->root;
1950 else
1951 res = Py_None;
1952
1953 Py_INCREF(res);
1954 return res;
1955}
1956
1957static PyObject*
1958treebuilder_close(TreeBuilderObject* self, PyObject* args)
1959{
1960 if (!PyArg_ParseTuple(args, ":close"))
1961 return NULL;
1962
1963 return treebuilder_done(self);
1964}
1965
1966static PyObject*
1967treebuilder_start(TreeBuilderObject* self, PyObject* args)
1968{
1969 PyObject* tag;
1970 PyObject* attrib = Py_None;
1971 if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib))
1972 return NULL;
1973
1974 return treebuilder_handle_start(self, tag, attrib);
1975}
1976
1977static PyObject*
1978treebuilder_xml(TreeBuilderObject* self, PyObject* args)
1979{
1980 PyObject* encoding;
1981 PyObject* standalone;
1982 if (!PyArg_ParseTuple(args, "OO:xml", &encoding, &standalone))
1983 return NULL;
1984
1985 return treebuilder_handle_xml(self, encoding, standalone);
1986}
1987
1988static PyMethodDef treebuilder_methods[] = {
1989 {"data", (PyCFunction) treebuilder_data, METH_VARARGS},
1990 {"start", (PyCFunction) treebuilder_start, METH_VARARGS},
1991 {"end", (PyCFunction) treebuilder_end, METH_VARARGS},
1992 {"xml", (PyCFunction) treebuilder_xml, METH_VARARGS},
1993 {"close", (PyCFunction) treebuilder_close, METH_VARARGS},
1994 {NULL, NULL}
1995};
1996
Neal Norwitz227b5332006-03-22 09:28:35 +00001997static PyTypeObject TreeBuilder_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001998 PyVarObject_HEAD_INIT(NULL, 0)
1999 "TreeBuilder", sizeof(TreeBuilderObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002000 /* methods */
2001 (destructor)treebuilder_dealloc, /* tp_dealloc */
2002 0, /* tp_print */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002003 0, /* tp_getattr */
2004 0, /* tp_setattr */
Mark Dickinsone94c6792009-02-02 20:36:42 +00002005 0, /* tp_reserved */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002006 0, /* tp_repr */
2007 0, /* tp_as_number */
2008 0, /* tp_as_sequence */
2009 0, /* tp_as_mapping */
2010 0, /* tp_hash */
2011 0, /* tp_call */
2012 0, /* tp_str */
2013 0, /* tp_getattro */
2014 0, /* tp_setattro */
2015 0, /* tp_as_buffer */
2016 Py_TPFLAGS_DEFAULT, /* tp_flags */
2017 0, /* tp_doc */
2018 0, /* tp_traverse */
2019 0, /* tp_clear */
2020 0, /* tp_richcompare */
2021 0, /* tp_weaklistoffset */
2022 0, /* tp_iter */
2023 0, /* tp_iternext */
2024 treebuilder_methods, /* tp_methods */
2025 0, /* tp_members */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002026};
2027
2028/* ==================================================================== */
2029/* the expat interface */
2030
2031#if defined(USE_EXPAT)
2032
2033#include "expat.h"
2034
2035#if defined(USE_PYEXPAT_CAPI)
2036#include "pyexpat.h"
2037static struct PyExpat_CAPI* expat_capi;
2038#define EXPAT(func) (expat_capi->func)
2039#else
2040#define EXPAT(func) (XML_##func)
2041#endif
2042
2043typedef struct {
2044 PyObject_HEAD
2045
2046 XML_Parser parser;
2047
2048 PyObject* target;
2049 PyObject* entity;
2050
2051 PyObject* names;
2052
2053 PyObject* handle_xml;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002054
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002055 PyObject* handle_start;
2056 PyObject* handle_data;
2057 PyObject* handle_end;
2058
2059 PyObject* handle_comment;
2060 PyObject* handle_pi;
2061
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002062 PyObject* handle_close;
2063
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002064} XMLParserObject;
2065
Neal Norwitz227b5332006-03-22 09:28:35 +00002066static PyTypeObject XMLParser_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002067
2068/* helpers */
2069
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002070LOCAL(PyObject*)
2071makeuniversal(XMLParserObject* self, const char* string)
2072{
2073 /* convert a UTF-8 tag/attribute name from the expat parser
2074 to a universal name string */
2075
2076 int size = strlen(string);
2077 PyObject* key;
2078 PyObject* value;
2079
2080 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00002081 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002082 if (!key)
2083 return NULL;
2084
2085 value = PyDict_GetItem(self->names, key);
2086
2087 if (value) {
2088 Py_INCREF(value);
2089 } else {
2090 /* new name. convert to universal name, and decode as
2091 necessary */
2092
2093 PyObject* tag;
2094 char* p;
2095 int i;
2096
2097 /* look for namespace separator */
2098 for (i = 0; i < size; i++)
2099 if (string[i] == '}')
2100 break;
2101 if (i != size) {
2102 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002103 tag = PyBytes_FromStringAndSize(NULL, size+1);
2104 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002105 p[0] = '{';
2106 memcpy(p+1, string, size);
2107 size++;
2108 } else {
2109 /* plain name; use key as tag */
2110 Py_INCREF(key);
2111 tag = key;
2112 }
2113
2114 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002115 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00002116 value = PyUnicode_DecodeUTF8(p, size, "strict");
2117 Py_DECREF(tag);
2118 if (!value) {
2119 Py_DECREF(key);
2120 return NULL;
2121 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002122
2123 /* add to names dictionary */
2124 if (PyDict_SetItem(self->names, key, value) < 0) {
2125 Py_DECREF(key);
2126 Py_DECREF(value);
2127 return NULL;
2128 }
2129 }
2130
2131 Py_DECREF(key);
2132 return value;
2133}
2134
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002135static void
2136expat_set_error(const char* message, int line, int column)
2137{
2138 PyObject *error;
2139 PyObject *position;
2140 char buffer[256];
2141
2142 sprintf(buffer, "%s: line %d, column %d", message, line, column);
2143
2144 error = PyObject_CallFunction(elementtree_parseerror_obj, "s", buffer);
2145 if (!error)
2146 return;
2147
2148 /* add position attribute */
2149 position = Py_BuildValue("(ii)", line, column);
2150 if (!position) {
2151 Py_DECREF(error);
2152 return;
2153 }
2154 if (PyObject_SetAttrString(error, "position", position) == -1) {
2155 Py_DECREF(error);
2156 Py_DECREF(position);
2157 return;
2158 }
2159 Py_DECREF(position);
2160
2161 PyErr_SetObject(elementtree_parseerror_obj, error);
2162 Py_DECREF(error);
2163}
2164
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002165/* -------------------------------------------------------------------- */
2166/* handlers */
2167
2168static void
2169expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2170 int data_len)
2171{
2172 PyObject* key;
2173 PyObject* value;
2174 PyObject* res;
2175
2176 if (data_len < 2 || data_in[0] != '&')
2177 return;
2178
Neal Norwitz0269b912007-08-08 06:56:02 +00002179 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002180 if (!key)
2181 return;
2182
2183 value = PyDict_GetItem(self->entity, key);
2184
2185 if (value) {
2186 if (TreeBuilder_CheckExact(self->target))
2187 res = treebuilder_handle_data(
2188 (TreeBuilderObject*) self->target, value
2189 );
2190 else if (self->handle_data)
2191 res = PyObject_CallFunction(self->handle_data, "O", value);
2192 else
2193 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002194 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002195 } else if (!PyErr_Occurred()) {
2196 /* Report the first error, not the last */
2197 char message[128];
2198 sprintf(message, "undefined entity &%.100s;", _PyUnicode_AsString(key));
2199 expat_set_error(
2200 message,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002201 EXPAT(GetErrorLineNumber)(self->parser),
2202 EXPAT(GetErrorColumnNumber)(self->parser)
2203 );
2204 }
2205
2206 Py_DECREF(key);
2207}
2208
2209static void
2210expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2211 const XML_Char **attrib_in)
2212{
2213 PyObject* res;
2214 PyObject* tag;
2215 PyObject* attrib;
2216 int ok;
2217
2218 /* tag name */
2219 tag = makeuniversal(self, tag_in);
2220 if (!tag)
2221 return; /* parser will look for errors */
2222
2223 /* attributes */
2224 if (attrib_in[0]) {
2225 attrib = PyDict_New();
2226 if (!attrib)
2227 return;
2228 while (attrib_in[0] && attrib_in[1]) {
2229 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00002230 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002231 if (!key || !value) {
2232 Py_XDECREF(value);
2233 Py_XDECREF(key);
2234 Py_DECREF(attrib);
2235 return;
2236 }
2237 ok = PyDict_SetItem(attrib, key, value);
2238 Py_DECREF(value);
2239 Py_DECREF(key);
2240 if (ok < 0) {
2241 Py_DECREF(attrib);
2242 return;
2243 }
2244 attrib_in += 2;
2245 }
2246 } else {
2247 Py_INCREF(Py_None);
2248 attrib = Py_None;
2249 }
2250
2251 if (TreeBuilder_CheckExact(self->target))
2252 /* shortcut */
2253 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2254 tag, attrib);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002255 else if (self->handle_start) {
2256 if (attrib == Py_None) {
2257 Py_DECREF(attrib);
2258 attrib = PyDict_New();
2259 if (!attrib)
2260 return;
2261 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002262 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002263 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002264 res = NULL;
2265
2266 Py_DECREF(tag);
2267 Py_DECREF(attrib);
2268
2269 Py_XDECREF(res);
2270}
2271
2272static void
2273expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2274 int data_len)
2275{
2276 PyObject* data;
2277 PyObject* res;
2278
Neal Norwitz0269b912007-08-08 06:56:02 +00002279 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002280 if (!data)
2281 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002282
2283 if (TreeBuilder_CheckExact(self->target))
2284 /* shortcut */
2285 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
2286 else if (self->handle_data)
2287 res = PyObject_CallFunction(self->handle_data, "O", data);
2288 else
2289 res = NULL;
2290
2291 Py_DECREF(data);
2292
2293 Py_XDECREF(res);
2294}
2295
2296static void
2297expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
2298{
2299 PyObject* tag;
2300 PyObject* res = NULL;
2301
2302 if (TreeBuilder_CheckExact(self->target))
2303 /* shortcut */
2304 /* the standard tree builder doesn't look at the end tag */
2305 res = treebuilder_handle_end(
2306 (TreeBuilderObject*) self->target, Py_None
2307 );
2308 else if (self->handle_end) {
2309 tag = makeuniversal(self, tag_in);
2310 if (tag) {
2311 res = PyObject_CallFunction(self->handle_end, "O", tag);
2312 Py_DECREF(tag);
2313 }
2314 }
2315
2316 Py_XDECREF(res);
2317}
2318
2319static void
2320expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
2321 const XML_Char *uri)
2322{
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002323 PyObject* sprefix = NULL;
2324 PyObject* suri = NULL;
2325
2326 suri = PyUnicode_DecodeUTF8(uri, strlen(uri), "strict");
2327 if (!suri)
2328 return;
2329
2330 if (prefix)
2331 sprefix = PyUnicode_DecodeUTF8(prefix, strlen(prefix), "strict");
2332 else
2333 sprefix = PyUnicode_FromString("");
2334 if (!sprefix) {
2335 Py_DECREF(suri);
2336 return;
2337 }
2338
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002339 treebuilder_handle_namespace(
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002340 (TreeBuilderObject*) self->target, 1, sprefix, suri
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002341 );
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002342
2343 Py_DECREF(sprefix);
2344 Py_DECREF(suri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002345}
2346
2347static void
2348expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
2349{
2350 treebuilder_handle_namespace(
2351 (TreeBuilderObject*) self->target, 0, NULL, NULL
2352 );
2353}
2354
2355static void
2356expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
2357{
2358 PyObject* comment;
2359 PyObject* res;
2360
2361 if (self->handle_comment) {
Neal Norwitz0269b912007-08-08 06:56:02 +00002362 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002363 if (comment) {
2364 res = PyObject_CallFunction(self->handle_comment, "O", comment);
2365 Py_XDECREF(res);
2366 Py_DECREF(comment);
2367 }
2368 }
2369}
2370
2371static void
2372expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
2373 const XML_Char* data_in)
2374{
2375 PyObject* target;
2376 PyObject* data;
2377 PyObject* res;
2378
2379 if (self->handle_pi) {
Neal Norwitz0269b912007-08-08 06:56:02 +00002380 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
2381 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002382 if (target && data) {
2383 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
2384 Py_XDECREF(res);
2385 Py_DECREF(data);
2386 Py_DECREF(target);
2387 } else {
2388 Py_XDECREF(data);
2389 Py_XDECREF(target);
2390 }
2391 }
2392}
2393
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002394static int
2395expat_unknown_encoding_handler(XMLParserObject *self, const XML_Char *name,
2396 XML_Encoding *info)
2397{
2398 PyObject* u;
2399 Py_UNICODE* p;
2400 unsigned char s[256];
2401 int i;
2402
2403 memset(info, 0, sizeof(XML_Encoding));
2404
2405 for (i = 0; i < 256; i++)
2406 s[i] = i;
2407
Fredrik Lundhc3389992005-12-25 11:40:19 +00002408 u = PyUnicode_Decode((char*) s, 256, name, "replace");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002409 if (!u)
2410 return XML_STATUS_ERROR;
2411
2412 if (PyUnicode_GET_SIZE(u) != 256) {
2413 Py_DECREF(u);
2414 return XML_STATUS_ERROR;
2415 }
2416
2417 p = PyUnicode_AS_UNICODE(u);
2418
2419 for (i = 0; i < 256; i++) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002420 if (p[i] != Py_UNICODE_REPLACEMENT_CHARACTER)
2421 info->map[i] = p[i];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002422 else
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002423 info->map[i] = -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002424 }
2425
2426 Py_DECREF(u);
2427
2428 return XML_STATUS_OK;
2429}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002430
2431/* -------------------------------------------------------------------- */
2432/* constructor and destructor */
2433
2434static PyObject*
Thomas Wouters73e5a5b2006-06-08 15:35:45 +00002435xmlparser(PyObject* self_, PyObject* args, PyObject* kw)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002436{
2437 XMLParserObject* self;
2438 /* FIXME: does this need to be static? */
2439 static XML_Memory_Handling_Suite memory_handler;
2440
2441 PyObject* target = NULL;
2442 char* encoding = NULL;
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +00002443 static char* kwlist[] = { "target", "encoding", NULL };
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002444 if (!PyArg_ParseTupleAndKeywords(args, kw, "|Oz:XMLParser", kwlist,
2445 &target, &encoding))
2446 return NULL;
2447
2448#if defined(USE_PYEXPAT_CAPI)
2449 if (!expat_capi) {
2450 PyErr_SetString(
2451 PyExc_RuntimeError, "cannot load dispatch table from pyexpat"
2452 );
2453 return NULL;
2454 }
2455#endif
2456
2457 self = PyObject_New(XMLParserObject, &XMLParser_Type);
2458 if (self == NULL)
2459 return NULL;
2460
2461 self->entity = PyDict_New();
2462 if (!self->entity) {
2463 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002464 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002465 }
2466
2467 self->names = PyDict_New();
2468 if (!self->names) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002469 PyObject_Del(self->entity);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002470 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002471 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002472 }
2473
2474 memory_handler.malloc_fcn = PyObject_Malloc;
2475 memory_handler.realloc_fcn = PyObject_Realloc;
2476 memory_handler.free_fcn = PyObject_Free;
2477
2478 self->parser = EXPAT(ParserCreate_MM)(encoding, &memory_handler, "}");
2479 if (!self->parser) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002480 PyObject_Del(self->names);
2481 PyObject_Del(self->entity);
2482 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002483 PyErr_NoMemory();
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002484 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002485 }
2486
2487 /* setup target handlers */
2488 if (!target) {
2489 target = treebuilder_new();
2490 if (!target) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002491 EXPAT(ParserFree)(self->parser);
2492 PyObject_Del(self->names);
2493 PyObject_Del(self->entity);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002494 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002495 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002496 }
2497 } else
2498 Py_INCREF(target);
2499 self->target = target;
2500
2501 self->handle_xml = PyObject_GetAttrString(target, "xml");
2502 self->handle_start = PyObject_GetAttrString(target, "start");
2503 self->handle_data = PyObject_GetAttrString(target, "data");
2504 self->handle_end = PyObject_GetAttrString(target, "end");
2505 self->handle_comment = PyObject_GetAttrString(target, "comment");
2506 self->handle_pi = PyObject_GetAttrString(target, "pi");
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002507 self->handle_close = PyObject_GetAttrString(target, "close");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002508
2509 PyErr_Clear();
2510
2511 /* configure parser */
2512 EXPAT(SetUserData)(self->parser, self);
2513 EXPAT(SetElementHandler)(
2514 self->parser,
2515 (XML_StartElementHandler) expat_start_handler,
2516 (XML_EndElementHandler) expat_end_handler
2517 );
2518 EXPAT(SetDefaultHandlerExpand)(
2519 self->parser,
2520 (XML_DefaultHandler) expat_default_handler
2521 );
2522 EXPAT(SetCharacterDataHandler)(
2523 self->parser,
2524 (XML_CharacterDataHandler) expat_data_handler
2525 );
2526 if (self->handle_comment)
2527 EXPAT(SetCommentHandler)(
2528 self->parser,
2529 (XML_CommentHandler) expat_comment_handler
2530 );
2531 if (self->handle_pi)
2532 EXPAT(SetProcessingInstructionHandler)(
2533 self->parser,
2534 (XML_ProcessingInstructionHandler) expat_pi_handler
2535 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002536 EXPAT(SetUnknownEncodingHandler)(
2537 self->parser,
2538 (XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL
2539 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002540
2541 ALLOC(sizeof(XMLParserObject), "create expatparser");
2542
2543 return (PyObject*) self;
2544}
2545
2546static void
2547xmlparser_dealloc(XMLParserObject* self)
2548{
2549 EXPAT(ParserFree)(self->parser);
2550
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002551 Py_XDECREF(self->handle_close);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002552 Py_XDECREF(self->handle_pi);
2553 Py_XDECREF(self->handle_comment);
2554 Py_XDECREF(self->handle_end);
2555 Py_XDECREF(self->handle_data);
2556 Py_XDECREF(self->handle_start);
2557 Py_XDECREF(self->handle_xml);
2558
2559 Py_DECREF(self->target);
2560 Py_DECREF(self->entity);
2561 Py_DECREF(self->names);
2562
2563 RELEASE(sizeof(XMLParserObject), "destroy expatparser");
2564
2565 PyObject_Del(self);
2566}
2567
2568/* -------------------------------------------------------------------- */
2569/* methods (in alphabetical order) */
2570
2571LOCAL(PyObject*)
2572expat_parse(XMLParserObject* self, char* data, int data_len, int final)
2573{
2574 int ok;
2575
2576 ok = EXPAT(Parse)(self->parser, data, data_len, final);
2577
2578 if (PyErr_Occurred())
2579 return NULL;
2580
2581 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002582 expat_set_error(
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002583 EXPAT(ErrorString)(EXPAT(GetErrorCode)(self->parser)),
2584 EXPAT(GetErrorLineNumber)(self->parser),
2585 EXPAT(GetErrorColumnNumber)(self->parser)
2586 );
2587 return NULL;
2588 }
2589
2590 Py_RETURN_NONE;
2591}
2592
2593static PyObject*
2594xmlparser_close(XMLParserObject* self, PyObject* args)
2595{
2596 /* end feeding data to parser */
2597
2598 PyObject* res;
2599 if (!PyArg_ParseTuple(args, ":close"))
2600 return NULL;
2601
2602 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002603 if (!res)
2604 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002605
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002606 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002607 Py_DECREF(res);
2608 return treebuilder_done((TreeBuilderObject*) self->target);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002609 } if (self->handle_close) {
2610 Py_DECREF(res);
2611 return PyObject_CallFunction(self->handle_close, "");
2612 } else
2613 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002614}
2615
2616static PyObject*
2617xmlparser_feed(XMLParserObject* self, PyObject* args)
2618{
2619 /* feed data to parser */
2620
2621 char* data;
2622 int data_len;
2623 if (!PyArg_ParseTuple(args, "s#:feed", &data, &data_len))
2624 return NULL;
2625
2626 return expat_parse(self, data, data_len, 0);
2627}
2628
2629static PyObject*
2630xmlparser_parse(XMLParserObject* self, PyObject* args)
2631{
2632 /* (internal) parse until end of input stream */
2633
2634 PyObject* reader;
2635 PyObject* buffer;
2636 PyObject* res;
2637
2638 PyObject* fileobj;
2639 if (!PyArg_ParseTuple(args, "O:_parse", &fileobj))
2640 return NULL;
2641
2642 reader = PyObject_GetAttrString(fileobj, "read");
2643 if (!reader)
2644 return NULL;
2645
2646 /* read from open file object */
2647 for (;;) {
2648
2649 buffer = PyObject_CallFunction(reader, "i", 64*1024);
2650
2651 if (!buffer) {
2652 /* read failed (e.g. due to KeyboardInterrupt) */
2653 Py_DECREF(reader);
2654 return NULL;
2655 }
2656
Christian Heimes72b710a2008-05-26 13:28:38 +00002657 if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002658 Py_DECREF(buffer);
2659 break;
2660 }
2661
2662 res = expat_parse(
Christian Heimes72b710a2008-05-26 13:28:38 +00002663 self, PyBytes_AS_STRING(buffer), PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002664 );
2665
2666 Py_DECREF(buffer);
2667
2668 if (!res) {
2669 Py_DECREF(reader);
2670 return NULL;
2671 }
2672 Py_DECREF(res);
2673
2674 }
2675
2676 Py_DECREF(reader);
2677
2678 res = expat_parse(self, "", 0, 1);
2679
2680 if (res && TreeBuilder_CheckExact(self->target)) {
2681 Py_DECREF(res);
2682 return treebuilder_done((TreeBuilderObject*) self->target);
2683 }
2684
2685 return res;
2686}
2687
2688static PyObject*
2689xmlparser_setevents(XMLParserObject* self, PyObject* args)
2690{
2691 /* activate element event reporting */
2692
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002693 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002694 TreeBuilderObject* target;
2695
2696 PyObject* events; /* event collector */
2697 PyObject* event_set = Py_None;
2698 if (!PyArg_ParseTuple(args, "O!|O:_setevents", &PyList_Type, &events,
2699 &event_set))
2700 return NULL;
2701
2702 if (!TreeBuilder_CheckExact(self->target)) {
2703 PyErr_SetString(
2704 PyExc_TypeError,
2705 "event handling only supported for cElementTree.Treebuilder "
2706 "targets"
2707 );
2708 return NULL;
2709 }
2710
2711 target = (TreeBuilderObject*) self->target;
2712
2713 Py_INCREF(events);
2714 Py_XDECREF(target->events);
2715 target->events = events;
2716
2717 /* clear out existing events */
2718 Py_XDECREF(target->start_event_obj); target->start_event_obj = NULL;
2719 Py_XDECREF(target->end_event_obj); target->end_event_obj = NULL;
2720 Py_XDECREF(target->start_ns_event_obj); target->start_ns_event_obj = NULL;
2721 Py_XDECREF(target->end_ns_event_obj); target->end_ns_event_obj = NULL;
2722
2723 if (event_set == Py_None) {
2724 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002725 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002726 Py_RETURN_NONE;
2727 }
2728
2729 if (!PyTuple_Check(event_set)) /* FIXME: handle arbitrary sequences */
2730 goto error;
2731
2732 for (i = 0; i < PyTuple_GET_SIZE(event_set); i++) {
2733 PyObject* item = PyTuple_GET_ITEM(event_set, i);
2734 char* event;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002735 if (PyUnicode_Check(item)) {
2736 event = _PyUnicode_AsString(item);
Victor Stinner0477bf32010-03-22 12:11:44 +00002737 if (event == NULL)
2738 goto error;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002739 } else if (PyBytes_Check(item))
2740 event = PyBytes_AS_STRING(item);
2741 else {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002742 goto error;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002743 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002744 if (strcmp(event, "start") == 0) {
2745 Py_INCREF(item);
2746 target->start_event_obj = item;
2747 } else if (strcmp(event, "end") == 0) {
2748 Py_INCREF(item);
2749 Py_XDECREF(target->end_event_obj);
2750 target->end_event_obj = item;
2751 } else if (strcmp(event, "start-ns") == 0) {
2752 Py_INCREF(item);
2753 Py_XDECREF(target->start_ns_event_obj);
2754 target->start_ns_event_obj = item;
2755 EXPAT(SetNamespaceDeclHandler)(
2756 self->parser,
2757 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2758 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2759 );
2760 } else if (strcmp(event, "end-ns") == 0) {
2761 Py_INCREF(item);
2762 Py_XDECREF(target->end_ns_event_obj);
2763 target->end_ns_event_obj = item;
2764 EXPAT(SetNamespaceDeclHandler)(
2765 self->parser,
2766 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2767 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2768 );
2769 } else {
2770 PyErr_Format(
2771 PyExc_ValueError,
2772 "unknown event '%s'", event
2773 );
2774 return NULL;
2775 }
2776 }
2777
2778 Py_RETURN_NONE;
2779
2780 error:
2781 PyErr_SetString(
2782 PyExc_TypeError,
2783 "invalid event tuple"
2784 );
2785 return NULL;
2786}
2787
2788static PyMethodDef xmlparser_methods[] = {
2789 {"feed", (PyCFunction) xmlparser_feed, METH_VARARGS},
2790 {"close", (PyCFunction) xmlparser_close, METH_VARARGS},
2791 {"_parse", (PyCFunction) xmlparser_parse, METH_VARARGS},
2792 {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
2793 {NULL, NULL}
2794};
2795
2796static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002797xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002798{
2799 PyObject* res;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002800 char *name = "";
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002801
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002802 if (PyUnicode_Check(nameobj))
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002803 name = _PyUnicode_AsString(nameobj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002804
2805 PyErr_Clear();
2806
2807 if (strcmp(name, "entity") == 0)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002808 res = self->entity;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002809 else if (strcmp(name, "target") == 0)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002810 res = self->target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002811 else if (strcmp(name, "version") == 0) {
2812 char buffer[100];
2813 sprintf(buffer, "Expat %d.%d.%d", XML_MAJOR_VERSION,
2814 XML_MINOR_VERSION, XML_MICRO_VERSION);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002815 return PyUnicode_DecodeUTF8(buffer, strlen(buffer), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002816 } else {
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002817 return PyObject_GenericGetAttr((PyObject*) self, nameobj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002818 }
2819
2820 Py_INCREF(res);
2821 return res;
2822}
2823
Neal Norwitz227b5332006-03-22 09:28:35 +00002824static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002825 PyVarObject_HEAD_INIT(NULL, 0)
2826 "XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002827 /* methods */
2828 (destructor)xmlparser_dealloc, /* tp_dealloc */
2829 0, /* tp_print */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002830 0, /* tp_getattr */
2831 0, /* tp_setattr */
Mark Dickinsone94c6792009-02-02 20:36:42 +00002832 0, /* tp_reserved */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002833 0, /* tp_repr */
2834 0, /* tp_as_number */
2835 0, /* tp_as_sequence */
2836 0, /* tp_as_mapping */
2837 0, /* tp_hash */
2838 0, /* tp_call */
2839 0, /* tp_str */
2840 (getattrofunc)xmlparser_getattro, /* tp_getattro */
2841 0, /* tp_setattro */
2842 0, /* tp_as_buffer */
2843 Py_TPFLAGS_DEFAULT, /* tp_flags */
2844 0, /* tp_doc */
2845 0, /* tp_traverse */
2846 0, /* tp_clear */
2847 0, /* tp_richcompare */
2848 0, /* tp_weaklistoffset */
2849 0, /* tp_iter */
2850 0, /* tp_iternext */
2851 xmlparser_methods, /* tp_methods */
2852 0, /* tp_members */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002853};
2854
2855#endif
2856
2857/* ==================================================================== */
2858/* python module interface */
2859
2860static PyMethodDef _functions[] = {
2861 {"Element", (PyCFunction) element, METH_VARARGS|METH_KEYWORDS},
2862 {"SubElement", (PyCFunction) subelement, METH_VARARGS|METH_KEYWORDS},
2863 {"TreeBuilder", (PyCFunction) treebuilder, METH_VARARGS},
2864#if defined(USE_EXPAT)
2865 {"XMLParser", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
2866 {"XMLTreeBuilder", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
2867#endif
2868 {NULL, NULL}
2869};
2870
Martin v. Löwis1a214512008-06-11 05:26:20 +00002871
2872static struct PyModuleDef _elementtreemodule = {
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00002873 PyModuleDef_HEAD_INIT,
2874 "_elementtree",
2875 NULL,
2876 -1,
2877 _functions,
2878 NULL,
2879 NULL,
2880 NULL,
2881 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00002882};
2883
Neal Norwitzf6657e62006-12-28 04:47:50 +00002884PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00002885PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002886{
2887 PyObject* m;
2888 PyObject* g;
2889 char* bootstrap;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002890
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002891 /* Initialize object types */
2892 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00002893 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002894 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00002895 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002896#if defined(USE_EXPAT)
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002897 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00002898 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002899#endif
2900
Martin v. Löwis1a214512008-06-11 05:26:20 +00002901 m = PyModule_Create(&_elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002902 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00002903 return NULL;
2904
2905 /* The code below requires that the module gets already added
2906 to sys.modules. */
2907 PyDict_SetItemString(PyImport_GetModuleDict(),
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00002908 _elementtreemodule.m_name,
2909 m);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002910
2911 /* python glue code */
2912
2913 g = PyDict_New();
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002914 if (!g)
Martin v. Löwis1a214512008-06-11 05:26:20 +00002915 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002916
2917 PyDict_SetItemString(g, "__builtins__", PyEval_GetBuiltins());
2918
2919 bootstrap = (
2920
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002921 "from copy import copy, deepcopy\n"
2922
2923 "try:\n"
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002924 " from xml.etree import ElementTree\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002925 "except ImportError:\n"
2926 " import ElementTree\n"
2927 "ET = ElementTree\n"
2928 "del ElementTree\n"
2929
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002930 "import _elementtree as cElementTree\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002931
2932 "try:\n" /* check if copy works as is */
2933 " copy(cElementTree.Element('x'))\n"
2934 "except:\n"
2935 " def copyelement(elem):\n"
2936 " return elem\n"
2937
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002938 "class CommentProxy:\n"
2939 " def __call__(self, text=None):\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002940 " element = cElementTree.Element(ET.Comment)\n"
2941 " element.text = text\n"
2942 " return element\n"
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002943 " def __eq__(self, other):\n"
2944 " return ET.Comment == other\n"
2945 "cElementTree.Comment = CommentProxy()\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002946
2947 "class ElementTree(ET.ElementTree):\n" /* public */
2948 " def parse(self, source, parser=None):\n"
2949 " if not hasattr(source, 'read'):\n"
2950 " source = open(source, 'rb')\n"
2951 " if parser is not None:\n"
2952 " while 1:\n"
2953 " data = source.read(65536)\n"
2954 " if not data:\n"
2955 " break\n"
2956 " parser.feed(data)\n"
2957 " self._root = parser.close()\n"
2958 " else:\n"
2959 " parser = cElementTree.XMLParser()\n"
2960 " self._root = parser._parse(source)\n"
2961 " return self._root\n"
2962 "cElementTree.ElementTree = ElementTree\n"
2963
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002964 "def iter(node, tag=None):\n" /* helper */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002965 " if tag == '*':\n"
2966 " tag = None\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002967 " if tag is None or node.tag == tag:\n"
2968 " yield node\n"
2969 " for node in node:\n"
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002970 " for node in iter(node, tag):\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002971 " yield node\n"
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002972
2973 "def itertext(node):\n" /* helper */
2974 " if node.text:\n"
2975 " yield node.text\n"
2976 " for e in node:\n"
2977 " for s in e.itertext():\n"
2978 " yield s\n"
2979 " if e.tail:\n"
2980 " yield e.tail\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002981
2982 "def parse(source, parser=None):\n" /* public */
2983 " tree = ElementTree()\n"
2984 " tree.parse(source, parser)\n"
2985 " return tree\n"
2986 "cElementTree.parse = parse\n"
2987
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002988 "class iterparse:\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002989 " root = None\n"
2990 " def __init__(self, file, events=None):\n"
2991 " if not hasattr(file, 'read'):\n"
2992 " file = open(file, 'rb')\n"
2993 " self._file = file\n"
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002994 " self._events = []\n"
2995 " self._index = 0\n"
2996 " self.root = self._root = None\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002997 " b = cElementTree.TreeBuilder()\n"
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002998 " self._parser = cElementTree.XMLParser(b)\n"
2999 " self._parser._setevents(self._events, events)\n"
3000 " def __next__(self):\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003001 " while 1:\n"
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003002 " try:\n"
3003 " item = self._events[self._index]\n"
3004 " except IndexError:\n"
3005 " if self._parser is None:\n"
3006 " self.root = self._root\n"
3007 " raise StopIteration\n"
3008 " # load event buffer\n"
3009 " del self._events[:]\n"
3010 " self._index = 0\n"
3011 " data = self._file.read(16384)\n"
3012 " if data:\n"
3013 " self._parser.feed(data)\n"
3014 " else:\n"
3015 " self._root = self._parser.close()\n"
3016 " self._parser = None\n"
3017 " else:\n"
3018 " self._index = self._index + 1\n"
3019 " return item\n"
3020 " def __iter__(self):\n"
3021 " return self\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003022 "cElementTree.iterparse = iterparse\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003023
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003024 "class PIProxy:\n"
3025 " def __call__(self, target, text=None):\n"
3026 " element = cElementTree.Element(ET.PI)\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003027 " element.text = target\n"
3028 " if text:\n"
3029 " element.text = element.text + ' ' + text\n"
3030 " return element\n"
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003031 " def __eq__(self, other):\n"
3032 " return ET.PI == other\n"
3033 "cElementTree.PI = cElementTree.ProcessingInstruction = PIProxy()\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003034
3035 "def XML(text):\n" /* public */
3036 " parser = cElementTree.XMLParser()\n"
3037 " parser.feed(text)\n"
3038 " return parser.close()\n"
3039 "cElementTree.XML = cElementTree.fromstring = XML\n"
3040
3041 "def XMLID(text):\n" /* public */
3042 " tree = XML(text)\n"
3043 " ids = {}\n"
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003044 " for elem in tree.iter():\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003045 " id = elem.get('id')\n"
3046 " if id:\n"
3047 " ids[id] = elem\n"
3048 " return tree, ids\n"
3049 "cElementTree.XMLID = XMLID\n"
3050
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003051 "try:\n"
3052 " register_namespace = ET.register_namespace\n"
3053 "except AttributeError:\n"
3054 " def register_namespace(prefix, uri):\n"
3055 " ET._namespace_map[uri] = prefix\n"
3056 "cElementTree.register_namespace = register_namespace\n"
3057
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003058 "cElementTree.dump = ET.dump\n"
3059 "cElementTree.ElementPath = ElementPath = ET.ElementPath\n"
3060 "cElementTree.iselement = ET.iselement\n"
3061 "cElementTree.QName = ET.QName\n"
3062 "cElementTree.tostring = ET.tostring\n"
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003063 "cElementTree.fromstringlist = ET.fromstringlist\n"
3064 "cElementTree.tostringlist = ET.tostringlist\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003065 "cElementTree.VERSION = '" VERSION "'\n"
3066 "cElementTree.__version__ = '" VERSION "'\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003067
3068 );
3069
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003070 if (!PyRun_String(bootstrap, Py_file_input, g, NULL))
3071 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003072
3073 elementpath_obj = PyDict_GetItemString(g, "ElementPath");
3074
3075 elementtree_copyelement_obj = PyDict_GetItemString(g, "copyelement");
3076 if (elementtree_copyelement_obj) {
3077 /* reduce hack needed; enable reduce method */
3078 PyMethodDef* mp;
3079 for (mp = element_methods; mp->ml_name; mp++)
3080 if (mp->ml_meth == (PyCFunction) element_reduce) {
3081 mp->ml_name = "__reduce__";
3082 break;
3083 }
3084 } else
3085 PyErr_Clear();
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003086
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003087 elementtree_deepcopy_obj = PyDict_GetItemString(g, "deepcopy");
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003088 elementtree_iter_obj = PyDict_GetItemString(g, "iter");
3089 elementtree_itertext_obj = PyDict_GetItemString(g, "itertext");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003090
3091#if defined(USE_PYEXPAT_CAPI)
3092 /* link against pyexpat, if possible */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003093 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
3094 if (expat_capi) {
3095 /* check that it's usable */
3096 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
3097 expat_capi->size < sizeof(struct PyExpat_CAPI) ||
3098 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
3099 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
3100 expat_capi->MICRO_VERSION != XML_MICRO_VERSION)
3101 expat_capi = NULL;
3102 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003103#endif
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003104
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003105 elementtree_parseerror_obj = PyErr_NewException(
3106 "cElementTree.ParseError", PyExc_SyntaxError, NULL
3107 );
3108 Py_INCREF(elementtree_parseerror_obj);
3109 PyModule_AddObject(m, "ParseError", elementtree_parseerror_obj);
3110
3111 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003112}