blob: 684081c7c926db489778003b5975cbfff3fcb32c [file] [log] [blame]
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001/*
2 * ElementTree
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003 * $Id: _elementtree.c 2657 2006-03-12 20:50:32Z fredrik $
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
5 * elementtree accelerator
6 *
7 * History:
8 * 1999-06-20 fl created (as part of sgmlop)
9 * 2001-05-29 fl effdom edition
10 * 2001-06-05 fl backported to unix; fixed bogus free in clear
11 * 2001-07-10 fl added findall helper
12 * 2003-02-27 fl elementtree edition (alpha)
13 * 2004-06-03 fl updates for elementtree 1.2
14 * 2005-01-05 fl added universal name cache, Element/SubElement factories
15 * 2005-01-06 fl moved python helpers into C module; removed 1.5.2 support
16 * 2005-01-07 fl added 2.1 support; work around broken __copy__ in 2.3
17 * 2005-01-08 fl added makeelement method; fixed path support
18 * 2005-01-10 fl optimized memory usage
19 * 2005-01-11 fl first public release (cElementTree 0.8)
20 * 2005-01-12 fl split element object into base and extras
21 * 2005-01-13 fl use tagged pointers for tail/text (cElementTree 0.9)
22 * 2005-01-17 fl added treebuilder close method
23 * 2005-01-17 fl fixed crash in getchildren
24 * 2005-01-18 fl removed observer api, added iterparse (cElementTree 0.9.3)
25 * 2005-01-23 fl revised iterparse api; added namespace event support (0.9.8)
26 * 2005-01-26 fl added VERSION module property (cElementTree 1.0)
27 * 2005-01-28 fl added remove method (1.0.1)
28 * 2005-03-01 fl added iselement function; fixed makeelement aliasing (1.0.2)
29 * 2005-03-13 fl export Comment and ProcessingInstruction/PI helpers
30 * 2005-03-26 fl added Comment and PI support to XMLParser
31 * 2005-03-27 fl event optimizations; complain about bogus events
32 * 2005-08-08 fl fixed read error handling in parse
33 * 2005-08-11 fl added runtime test for copy workaround (1.0.3)
34 * 2005-12-13 fl added expat_capi support (for xml.etree) (1.0.4)
35 * 2005-12-16 fl added support for non-standard encodings
Fredrik Lundh44ed4db2006-03-12 21:06:35 +000036 * 2006-03-08 fl fixed a couple of potential null-refs and leaks
37 * 2006-03-12 fl merge in 2.5 ssize_t changes
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000038 *
Fredrik Lundh44ed4db2006-03-12 21:06:35 +000039 * Copyright (c) 1999-2006 by Secret Labs AB. All rights reserved.
40 * Copyright (c) 1999-2006 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000041 *
42 * info@pythonware.com
43 * http://www.pythonware.com
44 */
45
Fredrik Lundh6d52b552005-12-16 22:06:43 +000046/* Licensed to PSF under a Contributor Agreement. */
47/* See http://www.python.org/2.4/license for licensing details. */
48
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000049#include "Python.h"
50
Fredrik Lundhdc075b92006-08-16 16:47:07 +000051#define VERSION "1.0.6"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000052
53/* -------------------------------------------------------------------- */
54/* configuration */
55
56/* Leave defined to include the expat-based XMLParser type */
57#define USE_EXPAT
58
59/* Define to to all expat calls via pyexpat's embedded expat library */
60/* #define USE_PYEXPAT_CAPI */
61
62/* An element can hold this many children without extra memory
63 allocations. */
64#define STATIC_CHILDREN 4
65
66/* For best performance, chose a value so that 80-90% of all nodes
67 have no more than the given number of children. Set this to zero
68 to minimize the size of the element structure itself (this only
69 helps if you have lots of leaf nodes with attributes). */
70
71/* Also note that pymalloc always allocates blocks in multiples of
72 eight bytes. For the current version of cElementTree, this means
73 that the number of children should be an even number, at least on
74 32-bit platforms. */
75
76/* -------------------------------------------------------------------- */
77
78#if 0
79static int memory = 0;
80#define ALLOC(size, comment)\
81do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
82#define RELEASE(size, comment)\
83do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
84#else
85#define ALLOC(size, comment)
86#define RELEASE(size, comment)
87#endif
88
89/* compiler tweaks */
90#if defined(_MSC_VER)
91#define LOCAL(type) static __inline type __fastcall
92#else
93#define LOCAL(type) static type
94#endif
95
96/* compatibility macros */
Martin v. Löwis18e16552006-02-15 17:27:45 +000097#if (PY_VERSION_HEX < 0x02050000)
98typedef int Py_ssize_t;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +000099#define lenfunc inquiry
Martin v. Löwis18e16552006-02-15 17:27:45 +0000100#endif
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000101
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000102#if (PY_VERSION_HEX < 0x02040000)
103#define PyDict_CheckExact PyDict_Check
104#if (PY_VERSION_HEX < 0x02020000)
105#define PyList_CheckExact PyList_Check
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000106#define PyString_CheckExact PyString_Check
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000107#if (PY_VERSION_HEX >= 0x01060000)
108#define Py_USING_UNICODE /* always enabled for 2.0 and 2.1 */
109#endif
110#endif
111#endif
112
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000113#if !defined(Py_RETURN_NONE)
114#define Py_RETURN_NONE return Py_INCREF(Py_None), Py_None
115#endif
116
117/* macros used to store 'join' flags in string object pointers. note
118 that all use of text and tail as object pointers must be wrapped in
119 JOIN_OBJ. see comments in the ElementObject definition for more
120 info. */
121#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
122#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
123#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~1))
124
125/* glue functions (see the init function for details) */
126static PyObject* elementtree_copyelement_obj;
127static PyObject* elementtree_deepcopy_obj;
128static PyObject* elementtree_getiterator_obj;
129static PyObject* elementpath_obj;
130
131/* helpers */
132
133LOCAL(PyObject*)
134deepcopy(PyObject* object, PyObject* memo)
135{
136 /* do a deep copy of the given object */
137
138 PyObject* args;
139 PyObject* result;
140
141 if (!elementtree_deepcopy_obj) {
142 PyErr_SetString(
143 PyExc_RuntimeError,
144 "deepcopy helper not found"
145 );
146 return NULL;
147 }
148
149 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000150 if (!args)
151 return NULL;
152
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000153 Py_INCREF(object); PyTuple_SET_ITEM(args, 0, (PyObject*) object);
154 Py_INCREF(memo); PyTuple_SET_ITEM(args, 1, (PyObject*) memo);
155
156 result = PyObject_CallObject(elementtree_deepcopy_obj, args);
157
158 Py_DECREF(args);
159
160 return result;
161}
162
163LOCAL(PyObject*)
164list_join(PyObject* list)
165{
166 /* join list elements (destroying the list in the process) */
167
168 PyObject* joiner;
169 PyObject* function;
170 PyObject* args;
171 PyObject* result;
172
173 switch (PyList_GET_SIZE(list)) {
174 case 0:
175 Py_DECREF(list);
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000176 return PyString_FromString("");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000177 case 1:
178 result = PyList_GET_ITEM(list, 0);
179 Py_INCREF(result);
180 Py_DECREF(list);
181 return result;
182 }
183
184 /* two or more elements: slice out a suitable separator from the
185 first member, and use that to join the entire list */
186
187 joiner = PySequence_GetSlice(PyList_GET_ITEM(list, 0), 0, 0);
188 if (!joiner)
189 return NULL;
190
191 function = PyObject_GetAttrString(joiner, "join");
192 if (!function) {
193 Py_DECREF(joiner);
194 return NULL;
195 }
196
197 args = PyTuple_New(1);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000198 if (!args)
199 return NULL;
200
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000201 PyTuple_SET_ITEM(args, 0, list);
202
203 result = PyObject_CallObject(function, args);
204
205 Py_DECREF(args); /* also removes list */
206 Py_DECREF(function);
207 Py_DECREF(joiner);
208
209 return result;
210}
211
212#if (PY_VERSION_HEX < 0x02020000)
213LOCAL(int)
214PyDict_Update(PyObject* dict, PyObject* other)
215{
216 /* PyDict_Update emulation for 2.1 and earlier */
217
218 PyObject* res;
219
220 res = PyObject_CallMethod(dict, "update", "O", other);
221 if (!res)
222 return -1;
223
224 Py_DECREF(res);
225 return 0;
226}
227#endif
228
229/* -------------------------------------------------------------------- */
230/* the element type */
231
232typedef struct {
233
234 /* attributes (a dictionary object), or None if no attributes */
235 PyObject* attrib;
236
237 /* child elements */
238 int length; /* actual number of items */
239 int allocated; /* allocated items */
240
241 /* this either points to _children or to a malloced buffer */
242 PyObject* *children;
243
244 PyObject* _children[STATIC_CHILDREN];
245
246} ElementObjectExtra;
247
248typedef struct {
249 PyObject_HEAD
250
251 /* element tag (a string). */
252 PyObject* tag;
253
254 /* text before first child. note that this is a tagged pointer;
255 use JOIN_OBJ to get the object pointer. the join flag is used
256 to distinguish lists created by the tree builder from lists
257 assigned to the attribute by application code; the former
258 should be joined before being returned to the user, the latter
259 should be left intact. */
260 PyObject* text;
261
262 /* text after this element, in parent. note that this is a tagged
263 pointer; use JOIN_OBJ to get the object pointer. */
264 PyObject* tail;
265
266 ElementObjectExtra* extra;
267
268} ElementObject;
269
270staticforward PyTypeObject Element_Type;
271
Christian Heimese93237d2007-12-19 02:37:44 +0000272#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000273
274/* -------------------------------------------------------------------- */
275/* element constructor and destructor */
276
277LOCAL(int)
278element_new_extra(ElementObject* self, PyObject* attrib)
279{
280 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
281 if (!self->extra)
282 return -1;
283
284 if (!attrib)
285 attrib = Py_None;
286
287 Py_INCREF(attrib);
288 self->extra->attrib = attrib;
289
290 self->extra->length = 0;
291 self->extra->allocated = STATIC_CHILDREN;
292 self->extra->children = self->extra->_children;
293
294 return 0;
295}
296
297LOCAL(void)
298element_dealloc_extra(ElementObject* self)
299{
300 int i;
301
302 Py_DECREF(self->extra->attrib);
303
304 for (i = 0; i < self->extra->length; i++)
305 Py_DECREF(self->extra->children[i]);
306
307 if (self->extra->children != self->extra->_children)
308 PyObject_Free(self->extra->children);
309
310 PyObject_Free(self->extra);
311}
312
313LOCAL(PyObject*)
314element_new(PyObject* tag, PyObject* attrib)
315{
316 ElementObject* self;
317
318 self = PyObject_New(ElementObject, &Element_Type);
319 if (self == NULL)
320 return NULL;
321
322 /* use None for empty dictionaries */
323 if (PyDict_CheckExact(attrib) && !PyDict_Size(attrib))
324 attrib = Py_None;
325
326 self->extra = NULL;
327
328 if (attrib != Py_None) {
329
Neal Norwitzc6a989a2006-05-10 06:57:58 +0000330 if (element_new_extra(self, attrib) < 0) {
331 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000332 return NULL;
Neal Norwitzc6a989a2006-05-10 06:57:58 +0000333 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000334
335 self->extra->length = 0;
336 self->extra->allocated = STATIC_CHILDREN;
337 self->extra->children = self->extra->_children;
338
339 }
340
341 Py_INCREF(tag);
342 self->tag = tag;
343
344 Py_INCREF(Py_None);
345 self->text = Py_None;
346
347 Py_INCREF(Py_None);
348 self->tail = Py_None;
349
350 ALLOC(sizeof(ElementObject), "create element");
351
352 return (PyObject*) self;
353}
354
355LOCAL(int)
356element_resize(ElementObject* self, int extra)
357{
358 int size;
359 PyObject* *children;
360
361 /* make sure self->children can hold the given number of extra
362 elements. set an exception and return -1 if allocation failed */
363
364 if (!self->extra)
365 element_new_extra(self, NULL);
366
367 size = self->extra->length + extra;
368
369 if (size > self->extra->allocated) {
370 /* use Python 2.4's list growth strategy */
371 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes87dcf3d2008-01-18 08:04:57 +0000372 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
373 * which needs at least 4 bytes.
374 * Although it's a false alarm always assume at least one child to
375 * be safe.
376 */
377 size = size ? size : 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000378 if (self->extra->children != self->extra->_children) {
Christian Heimes87dcf3d2008-01-18 08:04:57 +0000379 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
380 * "children", which needs at least 4 bytes. Although it's a
381 * false alarm always assume at least one child to be safe.
382 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000383 children = PyObject_Realloc(self->extra->children,
384 size * sizeof(PyObject*));
385 if (!children)
386 goto nomemory;
387 } else {
388 children = PyObject_Malloc(size * sizeof(PyObject*));
389 if (!children)
390 goto nomemory;
391 /* copy existing children from static area to malloc buffer */
392 memcpy(children, self->extra->children,
393 self->extra->length * sizeof(PyObject*));
394 }
395 self->extra->children = children;
396 self->extra->allocated = size;
397 }
398
399 return 0;
400
401 nomemory:
402 PyErr_NoMemory();
403 return -1;
404}
405
406LOCAL(int)
407element_add_subelement(ElementObject* self, PyObject* element)
408{
409 /* add a child element to a parent */
410
411 if (element_resize(self, 1) < 0)
412 return -1;
413
414 Py_INCREF(element);
415 self->extra->children[self->extra->length] = element;
416
417 self->extra->length++;
418
419 return 0;
420}
421
422LOCAL(PyObject*)
423element_get_attrib(ElementObject* self)
424{
425 /* return borrowed reference to attrib dictionary */
426 /* note: this function assumes that the extra section exists */
427
428 PyObject* res = self->extra->attrib;
429
430 if (res == Py_None) {
431 /* create missing dictionary */
432 res = PyDict_New();
433 if (!res)
434 return NULL;
435 self->extra->attrib = res;
436 }
437
438 return res;
439}
440
441LOCAL(PyObject*)
442element_get_text(ElementObject* self)
443{
444 /* return borrowed reference to text attribute */
445
446 PyObject* res = self->text;
447
448 if (JOIN_GET(res)) {
449 res = JOIN_OBJ(res);
450 if (PyList_CheckExact(res)) {
451 res = list_join(res);
452 if (!res)
453 return NULL;
454 self->text = res;
455 }
456 }
457
458 return res;
459}
460
461LOCAL(PyObject*)
462element_get_tail(ElementObject* self)
463{
464 /* return borrowed reference to text attribute */
465
466 PyObject* res = self->tail;
467
468 if (JOIN_GET(res)) {
469 res = JOIN_OBJ(res);
470 if (PyList_CheckExact(res)) {
471 res = list_join(res);
472 if (!res)
473 return NULL;
474 self->tail = res;
475 }
476 }
477
478 return res;
479}
480
481static PyObject*
482element(PyObject* self, PyObject* args, PyObject* kw)
483{
484 PyObject* elem;
485
486 PyObject* tag;
487 PyObject* attrib = NULL;
488 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag,
489 &PyDict_Type, &attrib))
490 return NULL;
491
492 if (attrib || kw) {
493 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
494 if (!attrib)
495 return NULL;
496 if (kw)
497 PyDict_Update(attrib, kw);
498 } else {
499 Py_INCREF(Py_None);
500 attrib = Py_None;
501 }
502
503 elem = element_new(tag, attrib);
504
505 Py_DECREF(attrib);
506
507 return elem;
508}
509
510static PyObject*
511subelement(PyObject* self, PyObject* args, PyObject* kw)
512{
513 PyObject* elem;
514
515 ElementObject* parent;
516 PyObject* tag;
517 PyObject* attrib = NULL;
518 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
519 &Element_Type, &parent, &tag,
520 &PyDict_Type, &attrib))
521 return NULL;
522
523 if (attrib || kw) {
524 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
525 if (!attrib)
526 return NULL;
527 if (kw)
528 PyDict_Update(attrib, kw);
529 } else {
530 Py_INCREF(Py_None);
531 attrib = Py_None;
532 }
533
534 elem = element_new(tag, attrib);
535
536 Py_DECREF(attrib);
537
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000538 if (element_add_subelement(parent, elem) < 0) {
539 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000540 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000541 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000542
543 return elem;
544}
545
546static void
547element_dealloc(ElementObject* self)
548{
549 if (self->extra)
550 element_dealloc_extra(self);
551
552 /* discard attributes */
553 Py_DECREF(self->tag);
554 Py_DECREF(JOIN_OBJ(self->text));
555 Py_DECREF(JOIN_OBJ(self->tail));
556
557 RELEASE(sizeof(ElementObject), "destroy element");
558
559 PyObject_Del(self);
560}
561
562/* -------------------------------------------------------------------- */
563/* methods (in alphabetical order) */
564
565static PyObject*
566element_append(ElementObject* self, PyObject* args)
567{
568 PyObject* element;
569 if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element))
570 return NULL;
571
572 if (element_add_subelement(self, element) < 0)
573 return NULL;
574
575 Py_RETURN_NONE;
576}
577
578static PyObject*
579element_clear(ElementObject* self, PyObject* args)
580{
581 if (!PyArg_ParseTuple(args, ":clear"))
582 return NULL;
583
584 if (self->extra) {
585 element_dealloc_extra(self);
586 self->extra = NULL;
587 }
588
589 Py_INCREF(Py_None);
590 Py_DECREF(JOIN_OBJ(self->text));
591 self->text = Py_None;
592
593 Py_INCREF(Py_None);
594 Py_DECREF(JOIN_OBJ(self->tail));
595 self->tail = Py_None;
596
597 Py_RETURN_NONE;
598}
599
600static PyObject*
601element_copy(ElementObject* self, PyObject* args)
602{
603 int i;
604 ElementObject* element;
605
606 if (!PyArg_ParseTuple(args, ":__copy__"))
607 return NULL;
608
609 element = (ElementObject*) element_new(
610 self->tag, (self->extra) ? self->extra->attrib : Py_None
611 );
612 if (!element)
613 return NULL;
614
615 Py_DECREF(JOIN_OBJ(element->text));
616 element->text = self->text;
617 Py_INCREF(JOIN_OBJ(element->text));
618
619 Py_DECREF(JOIN_OBJ(element->tail));
620 element->tail = self->tail;
621 Py_INCREF(JOIN_OBJ(element->tail));
622
623 if (self->extra) {
624
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000625 if (element_resize(element, self->extra->length) < 0) {
626 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000627 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000628 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000629
630 for (i = 0; i < self->extra->length; i++) {
631 Py_INCREF(self->extra->children[i]);
632 element->extra->children[i] = self->extra->children[i];
633 }
634
635 element->extra->length = self->extra->length;
636
637 }
638
639 return (PyObject*) element;
640}
641
642static PyObject*
643element_deepcopy(ElementObject* self, PyObject* args)
644{
645 int i;
646 ElementObject* element;
647 PyObject* tag;
648 PyObject* attrib;
649 PyObject* text;
650 PyObject* tail;
651 PyObject* id;
652
653 PyObject* memo;
654 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
655 return NULL;
656
657 tag = deepcopy(self->tag, memo);
658 if (!tag)
659 return NULL;
660
661 if (self->extra) {
662 attrib = deepcopy(self->extra->attrib, memo);
663 if (!attrib) {
664 Py_DECREF(tag);
665 return NULL;
666 }
667 } else {
668 Py_INCREF(Py_None);
669 attrib = Py_None;
670 }
671
672 element = (ElementObject*) element_new(tag, attrib);
673
674 Py_DECREF(tag);
675 Py_DECREF(attrib);
676
677 if (!element)
678 return NULL;
679
680 text = deepcopy(JOIN_OBJ(self->text), memo);
681 if (!text)
682 goto error;
683 Py_DECREF(element->text);
684 element->text = JOIN_SET(text, JOIN_GET(self->text));
685
686 tail = deepcopy(JOIN_OBJ(self->tail), memo);
687 if (!tail)
688 goto error;
689 Py_DECREF(element->tail);
690 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
691
692 if (self->extra) {
693
694 if (element_resize(element, self->extra->length) < 0)
695 goto error;
696
697 for (i = 0; i < self->extra->length; i++) {
698 PyObject* child = deepcopy(self->extra->children[i], memo);
699 if (!child) {
700 element->extra->length = i;
701 goto error;
702 }
703 element->extra->children[i] = child;
704 }
705
706 element->extra->length = self->extra->length;
707
708 }
709
710 /* add object to memo dictionary (so deepcopy won't visit it again) */
711 id = PyInt_FromLong((Py_uintptr_t) self);
712
713 i = PyDict_SetItem(memo, id, (PyObject*) element);
714
715 Py_DECREF(id);
716
717 if (i < 0)
718 goto error;
719
720 return (PyObject*) element;
721
722 error:
723 Py_DECREF(element);
724 return NULL;
725}
726
727LOCAL(int)
728checkpath(PyObject* tag)
729{
Neal Norwitzc7074382006-06-12 02:06:17 +0000730 Py_ssize_t i;
731 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000732
733 /* check if a tag contains an xpath character */
734
735#define PATHCHAR(ch) (ch == '/' || ch == '*' || ch == '[' || ch == '@')
736
737#if defined(Py_USING_UNICODE)
738 if (PyUnicode_Check(tag)) {
739 Py_UNICODE *p = PyUnicode_AS_UNICODE(tag);
740 for (i = 0; i < PyUnicode_GET_SIZE(tag); i++) {
741 if (p[i] == '{')
742 check = 0;
743 else if (p[i] == '}')
744 check = 1;
745 else if (check && PATHCHAR(p[i]))
746 return 1;
747 }
748 return 0;
749 }
750#endif
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000751 if (PyString_Check(tag)) {
752 char *p = PyString_AS_STRING(tag);
753 for (i = 0; i < PyString_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000754 if (p[i] == '{')
755 check = 0;
756 else if (p[i] == '}')
757 check = 1;
758 else if (check && PATHCHAR(p[i]))
759 return 1;
760 }
761 return 0;
762 }
763
764 return 1; /* unknown type; might be path expression */
765}
766
767static PyObject*
768element_find(ElementObject* self, PyObject* args)
769{
770 int i;
771
772 PyObject* tag;
773 if (!PyArg_ParseTuple(args, "O:find", &tag))
774 return NULL;
775
776 if (checkpath(tag))
777 return PyObject_CallMethod(
778 elementpath_obj, "find", "OO", self, tag
779 );
780
781 if (!self->extra)
782 Py_RETURN_NONE;
783
784 for (i = 0; i < self->extra->length; i++) {
785 PyObject* item = self->extra->children[i];
786 if (Element_CheckExact(item) &&
787 PyObject_Compare(((ElementObject*)item)->tag, tag) == 0) {
788 Py_INCREF(item);
789 return item;
790 }
791 }
792
793 Py_RETURN_NONE;
794}
795
796static PyObject*
797element_findtext(ElementObject* self, PyObject* args)
798{
799 int i;
800
801 PyObject* tag;
802 PyObject* default_value = Py_None;
803 if (!PyArg_ParseTuple(args, "O|O:findtext", &tag, &default_value))
804 return NULL;
805
806 if (checkpath(tag))
807 return PyObject_CallMethod(
808 elementpath_obj, "findtext", "OOO", self, tag, default_value
809 );
810
811 if (!self->extra) {
812 Py_INCREF(default_value);
813 return default_value;
814 }
815
816 for (i = 0; i < self->extra->length; i++) {
817 ElementObject* item = (ElementObject*) self->extra->children[i];
818 if (Element_CheckExact(item) && !PyObject_Compare(item->tag, tag)) {
819 PyObject* text = element_get_text(item);
820 if (text == Py_None)
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000821 return PyString_FromString("");
Neal Norwitz6f5ff3f2006-08-12 01:43:40 +0000822 Py_XINCREF(text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000823 return text;
824 }
825 }
826
827 Py_INCREF(default_value);
828 return default_value;
829}
830
831static PyObject*
832element_findall(ElementObject* self, PyObject* args)
833{
834 int i;
835 PyObject* out;
836
837 PyObject* tag;
838 if (!PyArg_ParseTuple(args, "O:findall", &tag))
839 return NULL;
840
841 if (checkpath(tag))
842 return PyObject_CallMethod(
843 elementpath_obj, "findall", "OO", self, tag
844 );
845
846 out = PyList_New(0);
847 if (!out)
848 return NULL;
849
850 if (!self->extra)
851 return out;
852
853 for (i = 0; i < self->extra->length; i++) {
854 PyObject* item = self->extra->children[i];
855 if (Element_CheckExact(item) &&
856 PyObject_Compare(((ElementObject*)item)->tag, tag) == 0) {
857 if (PyList_Append(out, item) < 0) {
858 Py_DECREF(out);
859 return NULL;
860 }
861 }
862 }
863
864 return out;
865}
866
867static PyObject*
868element_get(ElementObject* self, PyObject* args)
869{
870 PyObject* value;
871
872 PyObject* key;
873 PyObject* default_value = Py_None;
874 if (!PyArg_ParseTuple(args, "O|O:get", &key, &default_value))
875 return NULL;
876
877 if (!self->extra || self->extra->attrib == Py_None)
878 value = default_value;
879 else {
880 value = PyDict_GetItem(self->extra->attrib, key);
881 if (!value)
882 value = default_value;
883 }
884
885 Py_INCREF(value);
886 return value;
887}
888
889static PyObject*
890element_getchildren(ElementObject* self, PyObject* args)
891{
892 int i;
893 PyObject* list;
894
895 if (!PyArg_ParseTuple(args, ":getchildren"))
896 return NULL;
897
898 if (!self->extra)
899 return PyList_New(0);
900
901 list = PyList_New(self->extra->length);
902 if (!list)
903 return NULL;
904
905 for (i = 0; i < self->extra->length; i++) {
906 PyObject* item = self->extra->children[i];
907 Py_INCREF(item);
908 PyList_SET_ITEM(list, i, item);
909 }
910
911 return list;
912}
913
914static PyObject*
915element_getiterator(ElementObject* self, PyObject* args)
916{
917 PyObject* result;
918
919 PyObject* tag = Py_None;
920 if (!PyArg_ParseTuple(args, "|O:getiterator", &tag))
921 return NULL;
922
923 if (!elementtree_getiterator_obj) {
924 PyErr_SetString(
925 PyExc_RuntimeError,
926 "getiterator helper not found"
927 );
928 return NULL;
929 }
930
931 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000932 if (!args)
933 return NULL;
Neal Norwitz02876df2006-02-07 06:58:52 +0000934
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000935 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
936 Py_INCREF(tag); PyTuple_SET_ITEM(args, 1, (PyObject*) tag);
937
938 result = PyObject_CallObject(elementtree_getiterator_obj, args);
939
940 Py_DECREF(args);
941
942 return result;
943}
944
945static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000946element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000947{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000948 ElementObject* self = (ElementObject*) self_;
949
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000950 if (!self->extra || index < 0 || index >= self->extra->length) {
951 PyErr_SetString(
952 PyExc_IndexError,
953 "child index out of range"
954 );
955 return NULL;
956 }
957
958 Py_INCREF(self->extra->children[index]);
959 return self->extra->children[index];
960}
961
962static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000963element_getslice(PyObject* self_, Py_ssize_t start, Py_ssize_t end)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000964{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000965 ElementObject* self = (ElementObject*) self_;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000966 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000967 PyObject* list;
968
969 if (!self->extra)
970 return PyList_New(0);
971
972 /* standard clamping */
973 if (start < 0)
974 start = 0;
975 if (end < 0)
976 end = 0;
977 if (end > self->extra->length)
978 end = self->extra->length;
979 if (start > end)
980 start = end;
981
982 list = PyList_New(end - start);
983 if (!list)
984 return NULL;
985
986 for (i = start; i < end; i++) {
987 PyObject* item = self->extra->children[i];
988 Py_INCREF(item);
989 PyList_SET_ITEM(list, i - start, item);
990 }
991
992 return list;
993}
994
995static PyObject*
996element_insert(ElementObject* self, PyObject* args)
997{
998 int i;
999
1000 int index;
1001 PyObject* element;
1002 if (!PyArg_ParseTuple(args, "iO!:insert", &index,
1003 &Element_Type, &element))
1004 return NULL;
1005
1006 if (!self->extra)
1007 element_new_extra(self, NULL);
1008
1009 if (index < 0)
1010 index = 0;
1011 if (index > self->extra->length)
1012 index = self->extra->length;
1013
1014 if (element_resize(self, 1) < 0)
1015 return NULL;
1016
1017 for (i = self->extra->length; i > index; i--)
1018 self->extra->children[i] = self->extra->children[i-1];
1019
1020 Py_INCREF(element);
1021 self->extra->children[index] = element;
1022
1023 self->extra->length++;
1024
1025 Py_RETURN_NONE;
1026}
1027
1028static PyObject*
1029element_items(ElementObject* self, PyObject* args)
1030{
1031 if (!PyArg_ParseTuple(args, ":items"))
1032 return NULL;
1033
1034 if (!self->extra || self->extra->attrib == Py_None)
1035 return PyList_New(0);
1036
1037 return PyDict_Items(self->extra->attrib);
1038}
1039
1040static PyObject*
1041element_keys(ElementObject* self, PyObject* args)
1042{
1043 if (!PyArg_ParseTuple(args, ":keys"))
1044 return NULL;
1045
1046 if (!self->extra || self->extra->attrib == Py_None)
1047 return PyList_New(0);
1048
1049 return PyDict_Keys(self->extra->attrib);
1050}
1051
Martin v. Löwis18e16552006-02-15 17:27:45 +00001052static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001053element_length(ElementObject* self)
1054{
1055 if (!self->extra)
1056 return 0;
1057
1058 return self->extra->length;
1059}
1060
1061static PyObject*
1062element_makeelement(PyObject* self, PyObject* args, PyObject* kw)
1063{
1064 PyObject* elem;
1065
1066 PyObject* tag;
1067 PyObject* attrib;
1068 if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib))
1069 return NULL;
1070
1071 attrib = PyDict_Copy(attrib);
1072 if (!attrib)
1073 return NULL;
1074
1075 elem = element_new(tag, attrib);
1076
1077 Py_DECREF(attrib);
1078
1079 return elem;
1080}
1081
1082static PyObject*
1083element_reduce(ElementObject* self, PyObject* args)
1084{
1085 if (!PyArg_ParseTuple(args, ":__reduce__"))
1086 return NULL;
1087
1088 /* Hack alert: This method is used to work around a __copy__
1089 problem on certain 2.3 and 2.4 versions. To save time and
1090 simplify the code, we create the copy in here, and use a dummy
1091 copyelement helper to trick the copy module into doing the
1092 right thing. */
1093
1094 if (!elementtree_copyelement_obj) {
1095 PyErr_SetString(
1096 PyExc_RuntimeError,
1097 "copyelement helper not found"
1098 );
1099 return NULL;
1100 }
1101
1102 return Py_BuildValue(
1103 "O(N)", elementtree_copyelement_obj, element_copy(self, args)
1104 );
1105}
1106
1107static PyObject*
1108element_remove(ElementObject* self, PyObject* args)
1109{
1110 int i;
1111
1112 PyObject* element;
1113 if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element))
1114 return NULL;
1115
1116 if (!self->extra) {
1117 /* element has no children, so raise exception */
1118 PyErr_SetString(
1119 PyExc_ValueError,
1120 "list.remove(x): x not in list"
1121 );
1122 return NULL;
1123 }
1124
1125 for (i = 0; i < self->extra->length; i++) {
1126 if (self->extra->children[i] == element)
1127 break;
1128 if (PyObject_Compare(self->extra->children[i], element) == 0)
1129 break;
1130 }
1131
1132 if (i == self->extra->length) {
1133 /* element is not in children, so raise exception */
1134 PyErr_SetString(
1135 PyExc_ValueError,
1136 "list.remove(x): x not in list"
1137 );
1138 return NULL;
1139 }
1140
1141 Py_DECREF(self->extra->children[i]);
1142
1143 self->extra->length--;
1144
1145 for (; i < self->extra->length; i++)
1146 self->extra->children[i] = self->extra->children[i+1];
1147
1148 Py_RETURN_NONE;
1149}
1150
1151static PyObject*
1152element_repr(ElementObject* self)
1153{
1154 PyObject* repr;
1155 char buffer[100];
1156
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001157 repr = PyString_FromString("<Element ");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001158
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001159 PyString_ConcatAndDel(&repr, PyObject_Repr(self->tag));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001160
1161 sprintf(buffer, " at %p>", self);
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001162 PyString_ConcatAndDel(&repr, PyString_FromString(buffer));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001163
1164 return repr;
1165}
1166
1167static PyObject*
1168element_set(ElementObject* self, PyObject* args)
1169{
1170 PyObject* attrib;
1171
1172 PyObject* key;
1173 PyObject* value;
1174 if (!PyArg_ParseTuple(args, "OO:set", &key, &value))
1175 return NULL;
1176
1177 if (!self->extra)
1178 element_new_extra(self, NULL);
1179
1180 attrib = element_get_attrib(self);
1181 if (!attrib)
1182 return NULL;
1183
1184 if (PyDict_SetItem(attrib, key, value) < 0)
1185 return NULL;
1186
1187 Py_RETURN_NONE;
1188}
1189
1190static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001191element_setslice(PyObject* self_, Py_ssize_t start, Py_ssize_t end, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001192{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001193 ElementObject* self = (ElementObject*) self_;
Neal Norwitzc7074382006-06-12 02:06:17 +00001194 Py_ssize_t i, new, old;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001195 PyObject* recycle = NULL;
1196
1197 if (!self->extra)
1198 element_new_extra(self, NULL);
1199
1200 /* standard clamping */
1201 if (start < 0)
1202 start = 0;
1203 if (end < 0)
1204 end = 0;
1205 if (end > self->extra->length)
1206 end = self->extra->length;
1207 if (start > end)
1208 start = end;
1209
1210 old = end - start;
1211
1212 if (item == NULL)
1213 new = 0;
1214 else if (PyList_CheckExact(item)) {
1215 new = PyList_GET_SIZE(item);
1216 } else {
1217 /* FIXME: support arbitrary sequences? */
1218 PyErr_Format(
1219 PyExc_TypeError,
Christian Heimese93237d2007-12-19 02:37:44 +00001220 "expected list, not \"%.200s\"", Py_TYPE(item)->tp_name
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001221 );
1222 return -1;
1223 }
1224
1225 if (old > 0) {
1226 /* to avoid recursive calls to this method (via decref), move
1227 old items to the recycle bin here, and get rid of them when
1228 we're done modifying the element */
1229 recycle = PyList_New(old);
1230 for (i = 0; i < old; i++)
1231 PyList_SET_ITEM(recycle, i, self->extra->children[i + start]);
1232 }
1233
1234 if (new < old) {
1235 /* delete slice */
1236 for (i = end; i < self->extra->length; i++)
1237 self->extra->children[i + new - old] = self->extra->children[i];
1238 } else if (new > old) {
1239 /* insert slice */
1240 if (element_resize(self, new - old) < 0)
1241 return -1;
1242 for (i = self->extra->length-1; i >= end; i--)
1243 self->extra->children[i + new - old] = self->extra->children[i];
1244 }
1245
1246 /* replace the slice */
1247 for (i = 0; i < new; i++) {
1248 PyObject* element = PyList_GET_ITEM(item, i);
1249 Py_INCREF(element);
1250 self->extra->children[i + start] = element;
1251 }
1252
1253 self->extra->length += new - old;
1254
1255 /* discard the recycle bin, and everything in it */
1256 Py_XDECREF(recycle);
1257
1258 return 0;
1259}
1260
1261static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001262element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001263{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001264 ElementObject* self = (ElementObject*) self_;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001265 int i;
1266 PyObject* old;
1267
1268 if (!self->extra || index < 0 || index >= self->extra->length) {
1269 PyErr_SetString(
1270 PyExc_IndexError,
1271 "child assignment index out of range");
1272 return -1;
1273 }
1274
1275 old = self->extra->children[index];
1276
1277 if (item) {
1278 Py_INCREF(item);
1279 self->extra->children[index] = item;
1280 } else {
1281 self->extra->length--;
1282 for (i = index; i < self->extra->length; i++)
1283 self->extra->children[i] = self->extra->children[i+1];
1284 }
1285
1286 Py_DECREF(old);
1287
1288 return 0;
1289}
1290
1291static PyMethodDef element_methods[] = {
1292
1293 {"clear", (PyCFunction) element_clear, METH_VARARGS},
1294
1295 {"get", (PyCFunction) element_get, METH_VARARGS},
1296 {"set", (PyCFunction) element_set, METH_VARARGS},
1297
1298 {"find", (PyCFunction) element_find, METH_VARARGS},
1299 {"findtext", (PyCFunction) element_findtext, METH_VARARGS},
1300 {"findall", (PyCFunction) element_findall, METH_VARARGS},
1301
1302 {"append", (PyCFunction) element_append, METH_VARARGS},
1303 {"insert", (PyCFunction) element_insert, METH_VARARGS},
1304 {"remove", (PyCFunction) element_remove, METH_VARARGS},
1305
1306 {"getiterator", (PyCFunction) element_getiterator, METH_VARARGS},
1307 {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS},
1308
1309 {"items", (PyCFunction) element_items, METH_VARARGS},
1310 {"keys", (PyCFunction) element_keys, METH_VARARGS},
1311
1312 {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS},
1313
1314 {"__copy__", (PyCFunction) element_copy, METH_VARARGS},
1315 {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS},
1316
1317 /* Some 2.3 and 2.4 versions do not handle the __copy__ method on
1318 C objects correctly, so we have to fake it using a __reduce__-
1319 based hack (see the element_reduce implementation above for
1320 details). */
1321
1322 /* The behaviour has been changed in 2.3.5 and 2.4.1, so we're
1323 using a runtime test to figure out if we need to fake things
1324 or now (see the init code below). The following entry is
1325 enabled only if the hack is needed. */
1326
1327 {"!__reduce__", (PyCFunction) element_reduce, METH_VARARGS},
1328
1329 {NULL, NULL}
1330};
1331
1332static PyObject*
1333element_getattr(ElementObject* self, char* name)
1334{
1335 PyObject* res;
1336
1337 res = Py_FindMethod(element_methods, (PyObject*) self, name);
1338 if (res)
1339 return res;
1340
1341 PyErr_Clear();
1342
1343 if (strcmp(name, "tag") == 0)
1344 res = self->tag;
1345 else if (strcmp(name, "text") == 0)
1346 res = element_get_text(self);
1347 else if (strcmp(name, "tail") == 0) {
1348 res = element_get_tail(self);
1349 } else if (strcmp(name, "attrib") == 0) {
1350 if (!self->extra)
1351 element_new_extra(self, NULL);
1352 res = element_get_attrib(self);
1353 } else {
1354 PyErr_SetString(PyExc_AttributeError, name);
1355 return NULL;
1356 }
1357
1358 if (!res)
1359 return NULL;
1360
1361 Py_INCREF(res);
1362 return res;
1363}
1364
1365static int
1366element_setattr(ElementObject* self, const char* name, PyObject* value)
1367{
1368 if (value == NULL) {
1369 PyErr_SetString(
1370 PyExc_AttributeError,
1371 "can't delete element attributes"
1372 );
1373 return -1;
1374 }
1375
1376 if (strcmp(name, "tag") == 0) {
1377 Py_DECREF(self->tag);
1378 self->tag = value;
1379 Py_INCREF(self->tag);
1380 } else if (strcmp(name, "text") == 0) {
1381 Py_DECREF(JOIN_OBJ(self->text));
1382 self->text = value;
1383 Py_INCREF(self->text);
1384 } else if (strcmp(name, "tail") == 0) {
1385 Py_DECREF(JOIN_OBJ(self->tail));
1386 self->tail = value;
1387 Py_INCREF(self->tail);
1388 } else if (strcmp(name, "attrib") == 0) {
1389 if (!self->extra)
1390 element_new_extra(self, NULL);
1391 Py_DECREF(self->extra->attrib);
1392 self->extra->attrib = value;
1393 Py_INCREF(self->extra->attrib);
1394 } else {
1395 PyErr_SetString(PyExc_AttributeError, name);
1396 return -1;
1397 }
1398
1399 return 0;
1400}
1401
1402static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001403 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001404 0, /* sq_concat */
1405 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001406 element_getitem,
1407 element_getslice,
1408 element_setitem,
1409 element_setslice,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001410};
1411
1412statichere PyTypeObject Element_Type = {
1413 PyObject_HEAD_INIT(NULL)
1414 0, "Element", sizeof(ElementObject), 0,
1415 /* methods */
1416 (destructor)element_dealloc, /* tp_dealloc */
1417 0, /* tp_print */
1418 (getattrfunc)element_getattr, /* tp_getattr */
1419 (setattrfunc)element_setattr, /* tp_setattr */
1420 0, /* tp_compare */
1421 (reprfunc)element_repr, /* tp_repr */
1422 0, /* tp_as_number */
1423 &element_as_sequence, /* tp_as_sequence */
1424};
1425
1426/* ==================================================================== */
1427/* the tree builder type */
1428
1429typedef struct {
1430 PyObject_HEAD
1431
1432 PyObject* root; /* root node (first created node) */
1433
1434 ElementObject* this; /* current node */
1435 ElementObject* last; /* most recently created node */
1436
1437 PyObject* data; /* data collector (string or list), or NULL */
1438
1439 PyObject* stack; /* element stack */
Neal Norwitzc7074382006-06-12 02:06:17 +00001440 Py_ssize_t index; /* current stack size (0=empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001441
1442 /* element tracing */
1443 PyObject* events; /* list of events, or NULL if not collecting */
1444 PyObject* start_event_obj; /* event objects (NULL to ignore) */
1445 PyObject* end_event_obj;
1446 PyObject* start_ns_event_obj;
1447 PyObject* end_ns_event_obj;
1448
1449} TreeBuilderObject;
1450
1451staticforward PyTypeObject TreeBuilder_Type;
1452
Christian Heimese93237d2007-12-19 02:37:44 +00001453#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001454
1455/* -------------------------------------------------------------------- */
1456/* constructor and destructor */
1457
1458LOCAL(PyObject*)
1459treebuilder_new(void)
1460{
1461 TreeBuilderObject* self;
1462
1463 self = PyObject_New(TreeBuilderObject, &TreeBuilder_Type);
1464 if (self == NULL)
1465 return NULL;
1466
1467 self->root = NULL;
1468
1469 Py_INCREF(Py_None);
1470 self->this = (ElementObject*) Py_None;
1471
1472 Py_INCREF(Py_None);
1473 self->last = (ElementObject*) Py_None;
1474
1475 self->data = NULL;
1476
1477 self->stack = PyList_New(20);
1478 self->index = 0;
1479
1480 self->events = NULL;
1481 self->start_event_obj = self->end_event_obj = NULL;
1482 self->start_ns_event_obj = self->end_ns_event_obj = NULL;
1483
1484 ALLOC(sizeof(TreeBuilderObject), "create treebuilder");
1485
1486 return (PyObject*) self;
1487}
1488
1489static PyObject*
Fredrik Lundh81707f12006-06-03 21:56:05 +00001490treebuilder(PyObject* self_, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001491{
1492 if (!PyArg_ParseTuple(args, ":TreeBuilder"))
1493 return NULL;
1494
1495 return treebuilder_new();
1496}
1497
1498static void
1499treebuilder_dealloc(TreeBuilderObject* self)
1500{
1501 Py_XDECREF(self->end_ns_event_obj);
1502 Py_XDECREF(self->start_ns_event_obj);
1503 Py_XDECREF(self->end_event_obj);
1504 Py_XDECREF(self->start_event_obj);
1505 Py_XDECREF(self->events);
1506 Py_DECREF(self->stack);
1507 Py_XDECREF(self->data);
1508 Py_DECREF(self->last);
1509 Py_DECREF(self->this);
1510 Py_XDECREF(self->root);
1511
1512 RELEASE(sizeof(TreeBuilderObject), "destroy treebuilder");
1513
1514 PyObject_Del(self);
1515}
1516
1517/* -------------------------------------------------------------------- */
1518/* handlers */
1519
1520LOCAL(PyObject*)
1521treebuilder_handle_xml(TreeBuilderObject* self, PyObject* encoding,
1522 PyObject* standalone)
1523{
1524 Py_RETURN_NONE;
1525}
1526
1527LOCAL(PyObject*)
1528treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
1529 PyObject* attrib)
1530{
1531 PyObject* node;
1532 PyObject* this;
1533
1534 if (self->data) {
1535 if (self->this == self->last) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001536 Py_DECREF(JOIN_OBJ(self->last->text));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001537 self->last->text = JOIN_SET(
1538 self->data, PyList_CheckExact(self->data)
1539 );
1540 } else {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001541 Py_DECREF(JOIN_OBJ(self->last->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001542 self->last->tail = JOIN_SET(
1543 self->data, PyList_CheckExact(self->data)
1544 );
1545 }
1546 self->data = NULL;
1547 }
1548
1549 node = element_new(tag, attrib);
1550 if (!node)
1551 return NULL;
1552
1553 this = (PyObject*) self->this;
1554
1555 if (this != Py_None) {
1556 if (element_add_subelement((ElementObject*) this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001557 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001558 } else {
1559 if (self->root) {
1560 PyErr_SetString(
1561 PyExc_SyntaxError,
1562 "multiple elements on top level"
1563 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001564 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001565 }
1566 Py_INCREF(node);
1567 self->root = node;
1568 }
1569
1570 if (self->index < PyList_GET_SIZE(self->stack)) {
1571 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001572 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001573 Py_INCREF(this);
1574 } else {
1575 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001576 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001577 }
1578 self->index++;
1579
1580 Py_DECREF(this);
1581 Py_INCREF(node);
1582 self->this = (ElementObject*) node;
1583
1584 Py_DECREF(self->last);
1585 Py_INCREF(node);
1586 self->last = (ElementObject*) node;
1587
1588 if (self->start_event_obj) {
1589 PyObject* res;
1590 PyObject* action = self->start_event_obj;
1591 res = PyTuple_New(2);
1592 if (res) {
1593 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
1594 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
1595 PyList_Append(self->events, res);
1596 Py_DECREF(res);
1597 } else
1598 PyErr_Clear(); /* FIXME: propagate error */
1599 }
1600
1601 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001602
1603 error:
1604 Py_DECREF(node);
1605 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001606}
1607
1608LOCAL(PyObject*)
1609treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
1610{
1611 if (!self->data) {
Fredrik Lundhdc075b92006-08-16 16:47:07 +00001612 if (self->last == (ElementObject*) Py_None) {
1613 /* ignore calls to data before the first call to start */
1614 Py_RETURN_NONE;
1615 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001616 /* store the first item as is */
1617 Py_INCREF(data); self->data = data;
1618 } else {
1619 /* more than one item; use a list to collect items */
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001620 if (PyString_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
1621 PyString_CheckExact(data) && PyString_GET_SIZE(data) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001622 /* expat often generates single character data sections; handle
1623 the most common case by resizing the existing string... */
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001624 Py_ssize_t size = PyString_GET_SIZE(self->data);
1625 if (_PyString_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001626 return NULL;
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001627 PyString_AS_STRING(self->data)[size] = PyString_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001628 } else if (PyList_CheckExact(self->data)) {
1629 if (PyList_Append(self->data, data) < 0)
1630 return NULL;
1631 } else {
1632 PyObject* list = PyList_New(2);
1633 if (!list)
1634 return NULL;
1635 PyList_SET_ITEM(list, 0, self->data);
1636 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
1637 self->data = list;
1638 }
1639 }
1640
1641 Py_RETURN_NONE;
1642}
1643
1644LOCAL(PyObject*)
1645treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
1646{
1647 PyObject* item;
1648
1649 if (self->data) {
1650 if (self->this == self->last) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001651 Py_DECREF(JOIN_OBJ(self->last->text));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001652 self->last->text = JOIN_SET(
1653 self->data, PyList_CheckExact(self->data)
1654 );
1655 } else {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001656 Py_DECREF(JOIN_OBJ(self->last->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001657 self->last->tail = JOIN_SET(
1658 self->data, PyList_CheckExact(self->data)
1659 );
1660 }
1661 self->data = NULL;
1662 }
1663
1664 if (self->index == 0) {
1665 PyErr_SetString(
1666 PyExc_IndexError,
1667 "pop from empty stack"
1668 );
1669 return NULL;
1670 }
1671
1672 self->index--;
1673
1674 item = PyList_GET_ITEM(self->stack, self->index);
1675 Py_INCREF(item);
1676
1677 Py_DECREF(self->last);
1678
1679 self->last = (ElementObject*) self->this;
1680 self->this = (ElementObject*) item;
1681
1682 if (self->end_event_obj) {
1683 PyObject* res;
1684 PyObject* action = self->end_event_obj;
1685 PyObject* node = (PyObject*) self->last;
1686 res = PyTuple_New(2);
1687 if (res) {
1688 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
1689 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
1690 PyList_Append(self->events, res);
1691 Py_DECREF(res);
1692 } else
1693 PyErr_Clear(); /* FIXME: propagate error */
1694 }
1695
1696 Py_INCREF(self->last);
1697 return (PyObject*) self->last;
1698}
1699
1700LOCAL(void)
1701treebuilder_handle_namespace(TreeBuilderObject* self, int start,
1702 const char* prefix, const char *uri)
1703{
1704 PyObject* res;
1705 PyObject* action;
1706 PyObject* parcel;
1707
1708 if (!self->events)
1709 return;
1710
1711 if (start) {
1712 if (!self->start_ns_event_obj)
1713 return;
1714 action = self->start_ns_event_obj;
1715 /* FIXME: prefix and uri use utf-8 encoding! */
1716 parcel = Py_BuildValue("ss", (prefix) ? prefix : "", uri);
1717 if (!parcel)
1718 return;
1719 Py_INCREF(action);
1720 } else {
1721 if (!self->end_ns_event_obj)
1722 return;
1723 action = self->end_ns_event_obj;
1724 Py_INCREF(action);
1725 parcel = Py_None;
1726 Py_INCREF(parcel);
1727 }
1728
1729 res = PyTuple_New(2);
1730
1731 if (res) {
1732 PyTuple_SET_ITEM(res, 0, action);
1733 PyTuple_SET_ITEM(res, 1, parcel);
1734 PyList_Append(self->events, res);
1735 Py_DECREF(res);
1736 } else
1737 PyErr_Clear(); /* FIXME: propagate error */
1738}
1739
1740/* -------------------------------------------------------------------- */
1741/* methods (in alphabetical order) */
1742
1743static PyObject*
1744treebuilder_data(TreeBuilderObject* self, PyObject* args)
1745{
1746 PyObject* data;
1747 if (!PyArg_ParseTuple(args, "O:data", &data))
1748 return NULL;
1749
1750 return treebuilder_handle_data(self, data);
1751}
1752
1753static PyObject*
1754treebuilder_end(TreeBuilderObject* self, PyObject* args)
1755{
1756 PyObject* tag;
1757 if (!PyArg_ParseTuple(args, "O:end", &tag))
1758 return NULL;
1759
1760 return treebuilder_handle_end(self, tag);
1761}
1762
1763LOCAL(PyObject*)
1764treebuilder_done(TreeBuilderObject* self)
1765{
1766 PyObject* res;
1767
1768 /* FIXME: check stack size? */
1769
1770 if (self->root)
1771 res = self->root;
1772 else
1773 res = Py_None;
1774
1775 Py_INCREF(res);
1776 return res;
1777}
1778
1779static PyObject*
1780treebuilder_close(TreeBuilderObject* self, PyObject* args)
1781{
1782 if (!PyArg_ParseTuple(args, ":close"))
1783 return NULL;
1784
1785 return treebuilder_done(self);
1786}
1787
1788static PyObject*
1789treebuilder_start(TreeBuilderObject* self, PyObject* args)
1790{
1791 PyObject* tag;
1792 PyObject* attrib = Py_None;
1793 if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib))
1794 return NULL;
1795
1796 return treebuilder_handle_start(self, tag, attrib);
1797}
1798
1799static PyObject*
1800treebuilder_xml(TreeBuilderObject* self, PyObject* args)
1801{
1802 PyObject* encoding;
1803 PyObject* standalone;
1804 if (!PyArg_ParseTuple(args, "OO:xml", &encoding, &standalone))
1805 return NULL;
1806
1807 return treebuilder_handle_xml(self, encoding, standalone);
1808}
1809
1810static PyMethodDef treebuilder_methods[] = {
1811 {"data", (PyCFunction) treebuilder_data, METH_VARARGS},
1812 {"start", (PyCFunction) treebuilder_start, METH_VARARGS},
1813 {"end", (PyCFunction) treebuilder_end, METH_VARARGS},
1814 {"xml", (PyCFunction) treebuilder_xml, METH_VARARGS},
1815 {"close", (PyCFunction) treebuilder_close, METH_VARARGS},
1816 {NULL, NULL}
1817};
1818
1819static PyObject*
1820treebuilder_getattr(TreeBuilderObject* self, char* name)
1821{
1822 return Py_FindMethod(treebuilder_methods, (PyObject*) self, name);
1823}
1824
1825statichere PyTypeObject TreeBuilder_Type = {
1826 PyObject_HEAD_INIT(NULL)
1827 0, "TreeBuilder", sizeof(TreeBuilderObject), 0,
1828 /* methods */
1829 (destructor)treebuilder_dealloc, /* tp_dealloc */
1830 0, /* tp_print */
1831 (getattrfunc)treebuilder_getattr, /* tp_getattr */
1832};
1833
1834/* ==================================================================== */
1835/* the expat interface */
1836
1837#if defined(USE_EXPAT)
1838
1839#include "expat.h"
1840
1841#if defined(USE_PYEXPAT_CAPI)
1842#include "pyexpat.h"
1843static struct PyExpat_CAPI* expat_capi;
1844#define EXPAT(func) (expat_capi->func)
1845#else
1846#define EXPAT(func) (XML_##func)
1847#endif
1848
1849typedef struct {
1850 PyObject_HEAD
1851
1852 XML_Parser parser;
1853
1854 PyObject* target;
1855 PyObject* entity;
1856
1857 PyObject* names;
1858
1859 PyObject* handle_xml;
1860 PyObject* handle_start;
1861 PyObject* handle_data;
1862 PyObject* handle_end;
1863
1864 PyObject* handle_comment;
1865 PyObject* handle_pi;
1866
1867} XMLParserObject;
1868
1869staticforward PyTypeObject XMLParser_Type;
1870
1871/* helpers */
1872
1873#if defined(Py_USING_UNICODE)
1874LOCAL(int)
1875checkstring(const char* string, int size)
1876{
1877 int i;
1878
1879 /* check if an 8-bit string contains UTF-8 characters */
1880 for (i = 0; i < size; i++)
1881 if (string[i] & 0x80)
1882 return 1;
1883
1884 return 0;
1885}
1886#endif
1887
1888LOCAL(PyObject*)
1889makestring(const char* string, int size)
1890{
1891 /* convert a UTF-8 string to either a 7-bit ascii string or a
1892 Unicode string */
1893
1894#if defined(Py_USING_UNICODE)
1895 if (checkstring(string, size))
1896 return PyUnicode_DecodeUTF8(string, size, "strict");
1897#endif
1898
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001899 return PyString_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001900}
1901
1902LOCAL(PyObject*)
1903makeuniversal(XMLParserObject* self, const char* string)
1904{
1905 /* convert a UTF-8 tag/attribute name from the expat parser
1906 to a universal name string */
1907
1908 int size = strlen(string);
1909 PyObject* key;
1910 PyObject* value;
1911
1912 /* look the 'raw' name up in the names dictionary */
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001913 key = PyString_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001914 if (!key)
1915 return NULL;
1916
1917 value = PyDict_GetItem(self->names, key);
1918
1919 if (value) {
1920 Py_INCREF(value);
1921 } else {
1922 /* new name. convert to universal name, and decode as
1923 necessary */
1924
1925 PyObject* tag;
1926 char* p;
1927 int i;
1928
1929 /* look for namespace separator */
1930 for (i = 0; i < size; i++)
1931 if (string[i] == '}')
1932 break;
1933 if (i != size) {
1934 /* convert to universal name */
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001935 tag = PyString_FromStringAndSize(NULL, size+1);
1936 p = PyString_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001937 p[0] = '{';
1938 memcpy(p+1, string, size);
1939 size++;
1940 } else {
1941 /* plain name; use key as tag */
1942 Py_INCREF(key);
1943 tag = key;
1944 }
1945
1946 /* decode universal name */
1947#if defined(Py_USING_UNICODE)
1948 /* inline makestring, to avoid duplicating the source string if
1949 it's not an utf-8 string */
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001950 p = PyString_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001951 if (checkstring(p, size)) {
1952 value = PyUnicode_DecodeUTF8(p, size, "strict");
1953 Py_DECREF(tag);
1954 if (!value) {
1955 Py_DECREF(key);
1956 return NULL;
1957 }
1958 } else
1959#endif
1960 value = tag; /* use tag as is */
1961
1962 /* add to names dictionary */
1963 if (PyDict_SetItem(self->names, key, value) < 0) {
1964 Py_DECREF(key);
1965 Py_DECREF(value);
1966 return NULL;
1967 }
1968 }
1969
1970 Py_DECREF(key);
1971 return value;
1972}
1973
1974/* -------------------------------------------------------------------- */
1975/* handlers */
1976
1977static void
1978expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
1979 int data_len)
1980{
1981 PyObject* key;
1982 PyObject* value;
1983 PyObject* res;
1984
1985 if (data_len < 2 || data_in[0] != '&')
1986 return;
1987
1988 key = makestring(data_in + 1, data_len - 2);
1989 if (!key)
1990 return;
1991
1992 value = PyDict_GetItem(self->entity, key);
1993
1994 if (value) {
1995 if (TreeBuilder_CheckExact(self->target))
1996 res = treebuilder_handle_data(
1997 (TreeBuilderObject*) self->target, value
1998 );
1999 else if (self->handle_data)
2000 res = PyObject_CallFunction(self->handle_data, "O", value);
2001 else
2002 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002003 Py_XDECREF(res);
2004 } else {
2005 PyErr_Format(
Trent Mickf08d6632006-06-19 23:21:25 +00002006 PyExc_SyntaxError, "undefined entity &%s;: line %ld, column %ld",
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002007 PyString_AS_STRING(key),
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002008 EXPAT(GetErrorLineNumber)(self->parser),
2009 EXPAT(GetErrorColumnNumber)(self->parser)
2010 );
2011 }
2012
2013 Py_DECREF(key);
2014}
2015
2016static void
2017expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2018 const XML_Char **attrib_in)
2019{
2020 PyObject* res;
2021 PyObject* tag;
2022 PyObject* attrib;
2023 int ok;
2024
2025 /* tag name */
2026 tag = makeuniversal(self, tag_in);
2027 if (!tag)
2028 return; /* parser will look for errors */
2029
2030 /* attributes */
2031 if (attrib_in[0]) {
2032 attrib = PyDict_New();
2033 if (!attrib)
2034 return;
2035 while (attrib_in[0] && attrib_in[1]) {
2036 PyObject* key = makeuniversal(self, attrib_in[0]);
2037 PyObject* value = makestring(attrib_in[1], strlen(attrib_in[1]));
2038 if (!key || !value) {
2039 Py_XDECREF(value);
2040 Py_XDECREF(key);
2041 Py_DECREF(attrib);
2042 return;
2043 }
2044 ok = PyDict_SetItem(attrib, key, value);
2045 Py_DECREF(value);
2046 Py_DECREF(key);
2047 if (ok < 0) {
2048 Py_DECREF(attrib);
2049 return;
2050 }
2051 attrib_in += 2;
2052 }
2053 } else {
2054 Py_INCREF(Py_None);
2055 attrib = Py_None;
2056 }
2057
2058 if (TreeBuilder_CheckExact(self->target))
2059 /* shortcut */
2060 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2061 tag, attrib);
2062 else if (self->handle_start)
2063 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
2064 else
2065 res = NULL;
2066
2067 Py_DECREF(tag);
2068 Py_DECREF(attrib);
2069
2070 Py_XDECREF(res);
2071}
2072
2073static void
2074expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2075 int data_len)
2076{
2077 PyObject* data;
2078 PyObject* res;
2079
2080 data = makestring(data_in, data_len);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002081 if (!data)
2082 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002083
2084 if (TreeBuilder_CheckExact(self->target))
2085 /* shortcut */
2086 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
2087 else if (self->handle_data)
2088 res = PyObject_CallFunction(self->handle_data, "O", data);
2089 else
2090 res = NULL;
2091
2092 Py_DECREF(data);
2093
2094 Py_XDECREF(res);
2095}
2096
2097static void
2098expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
2099{
2100 PyObject* tag;
2101 PyObject* res = NULL;
2102
2103 if (TreeBuilder_CheckExact(self->target))
2104 /* shortcut */
2105 /* the standard tree builder doesn't look at the end tag */
2106 res = treebuilder_handle_end(
2107 (TreeBuilderObject*) self->target, Py_None
2108 );
2109 else if (self->handle_end) {
2110 tag = makeuniversal(self, tag_in);
2111 if (tag) {
2112 res = PyObject_CallFunction(self->handle_end, "O", tag);
2113 Py_DECREF(tag);
2114 }
2115 }
2116
2117 Py_XDECREF(res);
2118}
2119
2120static void
2121expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
2122 const XML_Char *uri)
2123{
2124 treebuilder_handle_namespace(
2125 (TreeBuilderObject*) self->target, 1, prefix, uri
2126 );
2127}
2128
2129static void
2130expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
2131{
2132 treebuilder_handle_namespace(
2133 (TreeBuilderObject*) self->target, 0, NULL, NULL
2134 );
2135}
2136
2137static void
2138expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
2139{
2140 PyObject* comment;
2141 PyObject* res;
2142
2143 if (self->handle_comment) {
2144 comment = makestring(comment_in, strlen(comment_in));
2145 if (comment) {
2146 res = PyObject_CallFunction(self->handle_comment, "O", comment);
2147 Py_XDECREF(res);
2148 Py_DECREF(comment);
2149 }
2150 }
2151}
2152
2153static void
2154expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
2155 const XML_Char* data_in)
2156{
2157 PyObject* target;
2158 PyObject* data;
2159 PyObject* res;
2160
2161 if (self->handle_pi) {
2162 target = makestring(target_in, strlen(target_in));
2163 data = makestring(data_in, strlen(data_in));
2164 if (target && data) {
2165 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
2166 Py_XDECREF(res);
2167 Py_DECREF(data);
2168 Py_DECREF(target);
2169 } else {
2170 Py_XDECREF(data);
2171 Py_XDECREF(target);
2172 }
2173 }
2174}
2175
2176#if defined(Py_USING_UNICODE)
2177static int
2178expat_unknown_encoding_handler(XMLParserObject *self, const XML_Char *name,
2179 XML_Encoding *info)
2180{
2181 PyObject* u;
2182 Py_UNICODE* p;
2183 unsigned char s[256];
2184 int i;
2185
2186 memset(info, 0, sizeof(XML_Encoding));
2187
2188 for (i = 0; i < 256; i++)
2189 s[i] = i;
2190
Fredrik Lundhc3389992005-12-25 11:40:19 +00002191 u = PyUnicode_Decode((char*) s, 256, name, "replace");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002192 if (!u)
2193 return XML_STATUS_ERROR;
2194
2195 if (PyUnicode_GET_SIZE(u) != 256) {
2196 Py_DECREF(u);
2197 return XML_STATUS_ERROR;
2198 }
2199
2200 p = PyUnicode_AS_UNICODE(u);
2201
2202 for (i = 0; i < 256; i++) {
2203 if (p[i] != Py_UNICODE_REPLACEMENT_CHARACTER)
2204 info->map[i] = p[i];
2205 else
2206 info->map[i] = -1;
2207 }
2208
2209 Py_DECREF(u);
2210
2211 return XML_STATUS_OK;
2212}
2213#endif
2214
2215/* -------------------------------------------------------------------- */
2216/* constructor and destructor */
2217
2218static PyObject*
Fredrik Lundh81707f12006-06-03 21:56:05 +00002219xmlparser(PyObject* self_, PyObject* args, PyObject* kw)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002220{
2221 XMLParserObject* self;
2222 /* FIXME: does this need to be static? */
2223 static XML_Memory_Handling_Suite memory_handler;
2224
2225 PyObject* target = NULL;
2226 char* encoding = NULL;
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +00002227 static char* kwlist[] = { "target", "encoding", NULL };
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002228 if (!PyArg_ParseTupleAndKeywords(args, kw, "|Oz:XMLParser", kwlist,
2229 &target, &encoding))
2230 return NULL;
2231
2232#if defined(USE_PYEXPAT_CAPI)
2233 if (!expat_capi) {
2234 PyErr_SetString(
2235 PyExc_RuntimeError, "cannot load dispatch table from pyexpat"
2236 );
2237 return NULL;
2238 }
2239#endif
2240
2241 self = PyObject_New(XMLParserObject, &XMLParser_Type);
2242 if (self == NULL)
2243 return NULL;
2244
2245 self->entity = PyDict_New();
2246 if (!self->entity) {
2247 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002248 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002249 }
2250
2251 self->names = PyDict_New();
2252 if (!self->names) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002253 PyObject_Del(self->entity);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002254 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002255 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002256 }
2257
2258 memory_handler.malloc_fcn = PyObject_Malloc;
2259 memory_handler.realloc_fcn = PyObject_Realloc;
2260 memory_handler.free_fcn = PyObject_Free;
2261
2262 self->parser = EXPAT(ParserCreate_MM)(encoding, &memory_handler, "}");
2263 if (!self->parser) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002264 PyObject_Del(self->names);
2265 PyObject_Del(self->entity);
2266 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002267 PyErr_NoMemory();
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002268 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002269 }
2270
2271 /* setup target handlers */
2272 if (!target) {
2273 target = treebuilder_new();
2274 if (!target) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002275 EXPAT(ParserFree)(self->parser);
2276 PyObject_Del(self->names);
2277 PyObject_Del(self->entity);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002278 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002279 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002280 }
2281 } else
2282 Py_INCREF(target);
2283 self->target = target;
2284
2285 self->handle_xml = PyObject_GetAttrString(target, "xml");
2286 self->handle_start = PyObject_GetAttrString(target, "start");
2287 self->handle_data = PyObject_GetAttrString(target, "data");
2288 self->handle_end = PyObject_GetAttrString(target, "end");
2289 self->handle_comment = PyObject_GetAttrString(target, "comment");
2290 self->handle_pi = PyObject_GetAttrString(target, "pi");
2291
2292 PyErr_Clear();
2293
2294 /* configure parser */
2295 EXPAT(SetUserData)(self->parser, self);
2296 EXPAT(SetElementHandler)(
2297 self->parser,
2298 (XML_StartElementHandler) expat_start_handler,
2299 (XML_EndElementHandler) expat_end_handler
2300 );
2301 EXPAT(SetDefaultHandlerExpand)(
2302 self->parser,
2303 (XML_DefaultHandler) expat_default_handler
2304 );
2305 EXPAT(SetCharacterDataHandler)(
2306 self->parser,
2307 (XML_CharacterDataHandler) expat_data_handler
2308 );
2309 if (self->handle_comment)
2310 EXPAT(SetCommentHandler)(
2311 self->parser,
2312 (XML_CommentHandler) expat_comment_handler
2313 );
2314 if (self->handle_pi)
2315 EXPAT(SetProcessingInstructionHandler)(
2316 self->parser,
2317 (XML_ProcessingInstructionHandler) expat_pi_handler
2318 );
2319#if defined(Py_USING_UNICODE)
2320 EXPAT(SetUnknownEncodingHandler)(
2321 self->parser,
2322 (XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL
2323 );
2324#endif
2325
2326 ALLOC(sizeof(XMLParserObject), "create expatparser");
2327
2328 return (PyObject*) self;
2329}
2330
2331static void
2332xmlparser_dealloc(XMLParserObject* self)
2333{
2334 EXPAT(ParserFree)(self->parser);
2335
2336 Py_XDECREF(self->handle_pi);
2337 Py_XDECREF(self->handle_comment);
2338 Py_XDECREF(self->handle_end);
2339 Py_XDECREF(self->handle_data);
2340 Py_XDECREF(self->handle_start);
2341 Py_XDECREF(self->handle_xml);
2342
2343 Py_DECREF(self->target);
2344 Py_DECREF(self->entity);
2345 Py_DECREF(self->names);
2346
2347 RELEASE(sizeof(XMLParserObject), "destroy expatparser");
2348
2349 PyObject_Del(self);
2350}
2351
2352/* -------------------------------------------------------------------- */
2353/* methods (in alphabetical order) */
2354
2355LOCAL(PyObject*)
2356expat_parse(XMLParserObject* self, char* data, int data_len, int final)
2357{
2358 int ok;
2359
2360 ok = EXPAT(Parse)(self->parser, data, data_len, final);
2361
2362 if (PyErr_Occurred())
2363 return NULL;
2364
2365 if (!ok) {
2366 PyErr_Format(
Trent Mickf08d6632006-06-19 23:21:25 +00002367 PyExc_SyntaxError, "%s: line %ld, column %ld",
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002368 EXPAT(ErrorString)(EXPAT(GetErrorCode)(self->parser)),
2369 EXPAT(GetErrorLineNumber)(self->parser),
2370 EXPAT(GetErrorColumnNumber)(self->parser)
2371 );
2372 return NULL;
2373 }
2374
2375 Py_RETURN_NONE;
2376}
2377
2378static PyObject*
2379xmlparser_close(XMLParserObject* self, PyObject* args)
2380{
2381 /* end feeding data to parser */
2382
2383 PyObject* res;
2384 if (!PyArg_ParseTuple(args, ":close"))
2385 return NULL;
2386
2387 res = expat_parse(self, "", 0, 1);
2388
2389 if (res && TreeBuilder_CheckExact(self->target)) {
2390 Py_DECREF(res);
2391 return treebuilder_done((TreeBuilderObject*) self->target);
2392 }
2393
2394 return res;
2395}
2396
2397static PyObject*
2398xmlparser_feed(XMLParserObject* self, PyObject* args)
2399{
2400 /* feed data to parser */
2401
2402 char* data;
2403 int data_len;
2404 if (!PyArg_ParseTuple(args, "s#:feed", &data, &data_len))
2405 return NULL;
2406
2407 return expat_parse(self, data, data_len, 0);
2408}
2409
2410static PyObject*
2411xmlparser_parse(XMLParserObject* self, PyObject* args)
2412{
2413 /* (internal) parse until end of input stream */
2414
2415 PyObject* reader;
2416 PyObject* buffer;
2417 PyObject* res;
2418
2419 PyObject* fileobj;
2420 if (!PyArg_ParseTuple(args, "O:_parse", &fileobj))
2421 return NULL;
2422
2423 reader = PyObject_GetAttrString(fileobj, "read");
2424 if (!reader)
2425 return NULL;
2426
2427 /* read from open file object */
2428 for (;;) {
2429
2430 buffer = PyObject_CallFunction(reader, "i", 64*1024);
2431
2432 if (!buffer) {
2433 /* read failed (e.g. due to KeyboardInterrupt) */
2434 Py_DECREF(reader);
2435 return NULL;
2436 }
2437
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002438 if (!PyString_CheckExact(buffer) || PyString_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002439 Py_DECREF(buffer);
2440 break;
2441 }
2442
2443 res = expat_parse(
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002444 self, PyString_AS_STRING(buffer), PyString_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002445 );
2446
2447 Py_DECREF(buffer);
2448
2449 if (!res) {
2450 Py_DECREF(reader);
2451 return NULL;
2452 }
2453 Py_DECREF(res);
2454
2455 }
2456
2457 Py_DECREF(reader);
2458
2459 res = expat_parse(self, "", 0, 1);
2460
2461 if (res && TreeBuilder_CheckExact(self->target)) {
2462 Py_DECREF(res);
2463 return treebuilder_done((TreeBuilderObject*) self->target);
2464 }
2465
2466 return res;
2467}
2468
2469static PyObject*
2470xmlparser_setevents(XMLParserObject* self, PyObject* args)
2471{
2472 /* activate element event reporting */
2473
Neal Norwitzc7074382006-06-12 02:06:17 +00002474 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002475 TreeBuilderObject* target;
2476
2477 PyObject* events; /* event collector */
2478 PyObject* event_set = Py_None;
2479 if (!PyArg_ParseTuple(args, "O!|O:_setevents", &PyList_Type, &events,
2480 &event_set))
2481 return NULL;
2482
2483 if (!TreeBuilder_CheckExact(self->target)) {
2484 PyErr_SetString(
2485 PyExc_TypeError,
2486 "event handling only supported for cElementTree.Treebuilder "
2487 "targets"
2488 );
2489 return NULL;
2490 }
2491
2492 target = (TreeBuilderObject*) self->target;
2493
2494 Py_INCREF(events);
2495 Py_XDECREF(target->events);
2496 target->events = events;
2497
2498 /* clear out existing events */
2499 Py_XDECREF(target->start_event_obj); target->start_event_obj = NULL;
2500 Py_XDECREF(target->end_event_obj); target->end_event_obj = NULL;
2501 Py_XDECREF(target->start_ns_event_obj); target->start_ns_event_obj = NULL;
2502 Py_XDECREF(target->end_ns_event_obj); target->end_ns_event_obj = NULL;
2503
2504 if (event_set == Py_None) {
2505 /* default is "end" only */
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002506 target->end_event_obj = PyString_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002507 Py_RETURN_NONE;
2508 }
2509
2510 if (!PyTuple_Check(event_set)) /* FIXME: handle arbitrary sequences */
2511 goto error;
2512
2513 for (i = 0; i < PyTuple_GET_SIZE(event_set); i++) {
2514 PyObject* item = PyTuple_GET_ITEM(event_set, i);
2515 char* event;
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002516 if (!PyString_Check(item))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002517 goto error;
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002518 event = PyString_AS_STRING(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002519 if (strcmp(event, "start") == 0) {
2520 Py_INCREF(item);
2521 target->start_event_obj = item;
2522 } else if (strcmp(event, "end") == 0) {
2523 Py_INCREF(item);
2524 Py_XDECREF(target->end_event_obj);
2525 target->end_event_obj = item;
2526 } else if (strcmp(event, "start-ns") == 0) {
2527 Py_INCREF(item);
2528 Py_XDECREF(target->start_ns_event_obj);
2529 target->start_ns_event_obj = item;
2530 EXPAT(SetNamespaceDeclHandler)(
2531 self->parser,
2532 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2533 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2534 );
2535 } else if (strcmp(event, "end-ns") == 0) {
2536 Py_INCREF(item);
2537 Py_XDECREF(target->end_ns_event_obj);
2538 target->end_ns_event_obj = item;
2539 EXPAT(SetNamespaceDeclHandler)(
2540 self->parser,
2541 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2542 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2543 );
2544 } else {
2545 PyErr_Format(
2546 PyExc_ValueError,
2547 "unknown event '%s'", event
2548 );
2549 return NULL;
2550 }
2551 }
2552
2553 Py_RETURN_NONE;
2554
2555 error:
2556 PyErr_SetString(
2557 PyExc_TypeError,
2558 "invalid event tuple"
2559 );
2560 return NULL;
2561}
2562
2563static PyMethodDef xmlparser_methods[] = {
2564 {"feed", (PyCFunction) xmlparser_feed, METH_VARARGS},
2565 {"close", (PyCFunction) xmlparser_close, METH_VARARGS},
2566 {"_parse", (PyCFunction) xmlparser_parse, METH_VARARGS},
2567 {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
2568 {NULL, NULL}
2569};
2570
2571static PyObject*
2572xmlparser_getattr(XMLParserObject* self, char* name)
2573{
2574 PyObject* res;
2575
2576 res = Py_FindMethod(xmlparser_methods, (PyObject*) self, name);
2577 if (res)
2578 return res;
2579
2580 PyErr_Clear();
2581
2582 if (strcmp(name, "entity") == 0)
2583 res = self->entity;
2584 else if (strcmp(name, "target") == 0)
2585 res = self->target;
2586 else if (strcmp(name, "version") == 0) {
2587 char buffer[100];
2588 sprintf(buffer, "Expat %d.%d.%d", XML_MAJOR_VERSION,
2589 XML_MINOR_VERSION, XML_MICRO_VERSION);
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002590 return PyString_FromString(buffer);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002591 } else {
2592 PyErr_SetString(PyExc_AttributeError, name);
2593 return NULL;
2594 }
2595
2596 Py_INCREF(res);
2597 return res;
2598}
2599
2600statichere PyTypeObject XMLParser_Type = {
2601 PyObject_HEAD_INIT(NULL)
2602 0, "XMLParser", sizeof(XMLParserObject), 0,
2603 /* methods */
2604 (destructor)xmlparser_dealloc, /* tp_dealloc */
2605 0, /* tp_print */
2606 (getattrfunc)xmlparser_getattr, /* tp_getattr */
2607};
2608
2609#endif
2610
2611/* ==================================================================== */
2612/* python module interface */
2613
2614static PyMethodDef _functions[] = {
2615 {"Element", (PyCFunction) element, METH_VARARGS|METH_KEYWORDS},
2616 {"SubElement", (PyCFunction) subelement, METH_VARARGS|METH_KEYWORDS},
2617 {"TreeBuilder", (PyCFunction) treebuilder, METH_VARARGS},
2618#if defined(USE_EXPAT)
2619 {"XMLParser", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
2620 {"XMLTreeBuilder", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
2621#endif
2622 {NULL, NULL}
2623};
2624
2625DL_EXPORT(void)
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002626init_elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002627{
2628 PyObject* m;
2629 PyObject* g;
2630 char* bootstrap;
2631#if defined(USE_PYEXPAT_CAPI)
2632 struct PyExpat_CAPI* capi;
2633#endif
2634
2635 /* Patch object type */
Christian Heimese93237d2007-12-19 02:37:44 +00002636 Py_TYPE(&Element_Type) = Py_TYPE(&TreeBuilder_Type) = &PyType_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002637#if defined(USE_EXPAT)
Christian Heimese93237d2007-12-19 02:37:44 +00002638 Py_TYPE(&XMLParser_Type) = &PyType_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002639#endif
2640
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002641 m = Py_InitModule("_elementtree", _functions);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002642 if (!m)
2643 return;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002644
2645 /* python glue code */
2646
2647 g = PyDict_New();
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002648 if (!g)
2649 return;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002650
2651 PyDict_SetItemString(g, "__builtins__", PyEval_GetBuiltins());
2652
2653 bootstrap = (
2654
2655#if (PY_VERSION_HEX >= 0x02020000 && PY_VERSION_HEX < 0x02030000)
2656 "from __future__ import generators\n" /* enable yield under 2.2 */
2657#endif
2658
2659 "from copy import copy, deepcopy\n"
2660
2661 "try:\n"
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002662 " from xml.etree import ElementTree\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002663 "except ImportError:\n"
2664 " import ElementTree\n"
2665 "ET = ElementTree\n"
2666 "del ElementTree\n"
2667
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002668 "import _elementtree as cElementTree\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002669
2670 "try:\n" /* check if copy works as is */
2671 " copy(cElementTree.Element('x'))\n"
2672 "except:\n"
2673 " def copyelement(elem):\n"
2674 " return elem\n"
2675
2676 "def Comment(text=None):\n" /* public */
2677 " element = cElementTree.Element(ET.Comment)\n"
2678 " element.text = text\n"
2679 " return element\n"
2680 "cElementTree.Comment = Comment\n"
2681
2682 "class ElementTree(ET.ElementTree):\n" /* public */
2683 " def parse(self, source, parser=None):\n"
2684 " if not hasattr(source, 'read'):\n"
2685 " source = open(source, 'rb')\n"
2686 " if parser is not None:\n"
2687 " while 1:\n"
2688 " data = source.read(65536)\n"
2689 " if not data:\n"
2690 " break\n"
2691 " parser.feed(data)\n"
2692 " self._root = parser.close()\n"
2693 " else:\n"
2694 " parser = cElementTree.XMLParser()\n"
2695 " self._root = parser._parse(source)\n"
2696 " return self._root\n"
2697 "cElementTree.ElementTree = ElementTree\n"
2698
2699 "def getiterator(node, tag=None):\n" /* helper */
2700 " if tag == '*':\n"
2701 " tag = None\n"
2702#if (PY_VERSION_HEX < 0x02020000)
2703 " nodes = []\n" /* 2.1 doesn't have yield */
2704 " if tag is None or node.tag == tag:\n"
2705 " nodes.append(node)\n"
2706 " for node in node:\n"
2707 " nodes.extend(getiterator(node, tag))\n"
2708 " return nodes\n"
2709#else
2710 " if tag is None or node.tag == tag:\n"
2711 " yield node\n"
2712 " for node in node:\n"
2713 " for node in getiterator(node, tag):\n"
2714 " yield node\n"
2715#endif
2716
2717 "def parse(source, parser=None):\n" /* public */
2718 " tree = ElementTree()\n"
2719 " tree.parse(source, parser)\n"
2720 " return tree\n"
2721 "cElementTree.parse = parse\n"
2722
2723#if (PY_VERSION_HEX < 0x02020000)
2724 "if hasattr(ET, 'iterparse'):\n"
2725 " cElementTree.iterparse = ET.iterparse\n" /* delegate on 2.1 */
2726#else
2727 "class iterparse(object):\n"
2728 " root = None\n"
2729 " def __init__(self, file, events=None):\n"
2730 " if not hasattr(file, 'read'):\n"
2731 " file = open(file, 'rb')\n"
2732 " self._file = file\n"
2733 " self._events = events\n"
2734 " def __iter__(self):\n"
2735 " events = []\n"
2736 " b = cElementTree.TreeBuilder()\n"
2737 " p = cElementTree.XMLParser(b)\n"
2738 " p._setevents(events, self._events)\n"
2739 " while 1:\n"
2740 " data = self._file.read(16384)\n"
2741 " if not data:\n"
2742 " break\n"
2743 " p.feed(data)\n"
2744 " for event in events:\n"
2745 " yield event\n"
2746 " del events[:]\n"
2747 " root = p.close()\n"
2748 " for event in events:\n"
2749 " yield event\n"
2750 " self.root = root\n"
2751 "cElementTree.iterparse = iterparse\n"
2752#endif
2753
2754 "def PI(target, text=None):\n" /* public */
2755 " element = cElementTree.Element(ET.ProcessingInstruction)\n"
2756 " element.text = target\n"
2757 " if text:\n"
2758 " element.text = element.text + ' ' + text\n"
2759 " return element\n"
2760
2761 " elem = cElementTree.Element(ET.PI)\n"
2762 " elem.text = text\n"
2763 " return elem\n"
2764 "cElementTree.PI = cElementTree.ProcessingInstruction = PI\n"
2765
2766 "def XML(text):\n" /* public */
2767 " parser = cElementTree.XMLParser()\n"
2768 " parser.feed(text)\n"
2769 " return parser.close()\n"
2770 "cElementTree.XML = cElementTree.fromstring = XML\n"
2771
2772 "def XMLID(text):\n" /* public */
2773 " tree = XML(text)\n"
2774 " ids = {}\n"
2775 " for elem in tree.getiterator():\n"
2776 " id = elem.get('id')\n"
2777 " if id:\n"
2778 " ids[id] = elem\n"
2779 " return tree, ids\n"
2780 "cElementTree.XMLID = XMLID\n"
2781
2782 "cElementTree.dump = ET.dump\n"
2783 "cElementTree.ElementPath = ElementPath = ET.ElementPath\n"
2784 "cElementTree.iselement = ET.iselement\n"
2785 "cElementTree.QName = ET.QName\n"
2786 "cElementTree.tostring = ET.tostring\n"
2787 "cElementTree.VERSION = '" VERSION "'\n"
2788 "cElementTree.__version__ = '" VERSION "'\n"
2789 "cElementTree.XMLParserError = SyntaxError\n"
2790
2791 );
2792
2793 PyRun_String(bootstrap, Py_file_input, g, NULL);
2794
2795 elementpath_obj = PyDict_GetItemString(g, "ElementPath");
2796
2797 elementtree_copyelement_obj = PyDict_GetItemString(g, "copyelement");
2798 if (elementtree_copyelement_obj) {
2799 /* reduce hack needed; enable reduce method */
2800 PyMethodDef* mp;
2801 for (mp = element_methods; mp->ml_name; mp++)
2802 if (mp->ml_meth == (PyCFunction) element_reduce) {
2803 mp->ml_name = "__reduce__";
2804 break;
2805 }
2806 } else
2807 PyErr_Clear();
2808 elementtree_deepcopy_obj = PyDict_GetItemString(g, "deepcopy");
2809 elementtree_getiterator_obj = PyDict_GetItemString(g, "getiterator");
2810
2811#if defined(USE_PYEXPAT_CAPI)
2812 /* link against pyexpat, if possible */
2813 capi = PyCObject_Import("pyexpat", "expat_CAPI");
2814 if (capi &&
2815 strcmp(capi->magic, PyExpat_CAPI_MAGIC) == 0 &&
2816 capi->size <= sizeof(*expat_capi) &&
2817 capi->MAJOR_VERSION == XML_MAJOR_VERSION &&
2818 capi->MINOR_VERSION == XML_MINOR_VERSION &&
2819 capi->MICRO_VERSION == XML_MICRO_VERSION)
2820 expat_capi = capi;
2821 else
2822 expat_capi = NULL;
2823#endif
2824
2825}