blob: af813bd54bd0b3a54fa0dbee711aa7ffd21e6771 [file] [log] [blame]
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001/*
2 * ElementTree
3 * $Id: /work/modules/celementtree/cElementTree.c 1128 2005-12-16T21:57:13.668520Z Fredrik $
4 *
5 * elementtree accelerator
6 *
7 * History:
8 * 1999-06-20 fl created (as part of sgmlop)
9 * 2001-05-29 fl effdom edition
10 * 2001-06-05 fl backported to unix; fixed bogus free in clear
11 * 2001-07-10 fl added findall helper
12 * 2003-02-27 fl elementtree edition (alpha)
13 * 2004-06-03 fl updates for elementtree 1.2
14 * 2005-01-05 fl added universal name cache, Element/SubElement factories
15 * 2005-01-06 fl moved python helpers into C module; removed 1.5.2 support
16 * 2005-01-07 fl added 2.1 support; work around broken __copy__ in 2.3
17 * 2005-01-08 fl added makeelement method; fixed path support
18 * 2005-01-10 fl optimized memory usage
19 * 2005-01-11 fl first public release (cElementTree 0.8)
20 * 2005-01-12 fl split element object into base and extras
21 * 2005-01-13 fl use tagged pointers for tail/text (cElementTree 0.9)
22 * 2005-01-17 fl added treebuilder close method
23 * 2005-01-17 fl fixed crash in getchildren
24 * 2005-01-18 fl removed observer api, added iterparse (cElementTree 0.9.3)
25 * 2005-01-23 fl revised iterparse api; added namespace event support (0.9.8)
26 * 2005-01-26 fl added VERSION module property (cElementTree 1.0)
27 * 2005-01-28 fl added remove method (1.0.1)
28 * 2005-03-01 fl added iselement function; fixed makeelement aliasing (1.0.2)
29 * 2005-03-13 fl export Comment and ProcessingInstruction/PI helpers
30 * 2005-03-26 fl added Comment and PI support to XMLParser
31 * 2005-03-27 fl event optimizations; complain about bogus events
32 * 2005-08-08 fl fixed read error handling in parse
33 * 2005-08-11 fl added runtime test for copy workaround (1.0.3)
34 * 2005-12-13 fl added expat_capi support (for xml.etree) (1.0.4)
35 * 2005-12-16 fl added support for non-standard encodings
36 *
37 * Copyright (c) 1999-2005 by Secret Labs AB. All rights reserved.
38 * Copyright (c) 1999-2005 by Fredrik Lundh.
39 *
40 * info@pythonware.com
41 * http://www.pythonware.com
42 */
43
Fredrik Lundh6d52b552005-12-16 22:06:43 +000044/* Licensed to PSF under a Contributor Agreement. */
45/* See http://www.python.org/2.4/license for licensing details. */
46
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000047#include "Python.h"
48
49#define VERSION "1.0.5"
50
51/* -------------------------------------------------------------------- */
52/* configuration */
53
54/* Leave defined to include the expat-based XMLParser type */
55#define USE_EXPAT
56
57/* Define to to all expat calls via pyexpat's embedded expat library */
58/* #define USE_PYEXPAT_CAPI */
59
60/* An element can hold this many children without extra memory
61 allocations. */
62#define STATIC_CHILDREN 4
63
64/* For best performance, chose a value so that 80-90% of all nodes
65 have no more than the given number of children. Set this to zero
66 to minimize the size of the element structure itself (this only
67 helps if you have lots of leaf nodes with attributes). */
68
69/* Also note that pymalloc always allocates blocks in multiples of
70 eight bytes. For the current version of cElementTree, this means
71 that the number of children should be an even number, at least on
72 32-bit platforms. */
73
74/* -------------------------------------------------------------------- */
75
76#if 0
77static int memory = 0;
78#define ALLOC(size, comment)\
79do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
80#define RELEASE(size, comment)\
81do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
82#else
83#define ALLOC(size, comment)
84#define RELEASE(size, comment)
85#endif
86
87/* compiler tweaks */
88#if defined(_MSC_VER)
89#define LOCAL(type) static __inline type __fastcall
90#else
91#define LOCAL(type) static type
92#endif
93
94/* compatibility macros */
95#if (PY_VERSION_HEX < 0x02040000)
96#define PyDict_CheckExact PyDict_Check
97#if (PY_VERSION_HEX < 0x02020000)
98#define PyList_CheckExact PyList_Check
99#define PyString_CheckExact PyString_Check
100#if (PY_VERSION_HEX >= 0x01060000)
101#define Py_USING_UNICODE /* always enabled for 2.0 and 2.1 */
102#endif
103#endif
104#endif
105
106#if (PY_VERSION_HEX >= 0x02050000)
107#define PY_CONST const /* 2.5 adds const to some API:s */
108#else
109#define PY_CONST
110#endif
111
112#if !defined(Py_RETURN_NONE)
113#define Py_RETURN_NONE return Py_INCREF(Py_None), Py_None
114#endif
115
116/* macros used to store 'join' flags in string object pointers. note
117 that all use of text and tail as object pointers must be wrapped in
118 JOIN_OBJ. see comments in the ElementObject definition for more
119 info. */
120#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
121#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
122#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~1))
123
124/* glue functions (see the init function for details) */
125static PyObject* elementtree_copyelement_obj;
126static PyObject* elementtree_deepcopy_obj;
127static PyObject* elementtree_getiterator_obj;
128static PyObject* elementpath_obj;
129
130/* helpers */
131
132LOCAL(PyObject*)
133deepcopy(PyObject* object, PyObject* memo)
134{
135 /* do a deep copy of the given object */
136
137 PyObject* args;
138 PyObject* result;
139
140 if (!elementtree_deepcopy_obj) {
141 PyErr_SetString(
142 PyExc_RuntimeError,
143 "deepcopy helper not found"
144 );
145 return NULL;
146 }
147
148 args = PyTuple_New(2);
149 Py_INCREF(object); PyTuple_SET_ITEM(args, 0, (PyObject*) object);
150 Py_INCREF(memo); PyTuple_SET_ITEM(args, 1, (PyObject*) memo);
151
152 result = PyObject_CallObject(elementtree_deepcopy_obj, args);
153
154 Py_DECREF(args);
155
156 return result;
157}
158
159LOCAL(PyObject*)
160list_join(PyObject* list)
161{
162 /* join list elements (destroying the list in the process) */
163
164 PyObject* joiner;
165 PyObject* function;
166 PyObject* args;
167 PyObject* result;
168
169 switch (PyList_GET_SIZE(list)) {
170 case 0:
171 Py_DECREF(list);
172 return PyString_FromString("");
173 case 1:
174 result = PyList_GET_ITEM(list, 0);
175 Py_INCREF(result);
176 Py_DECREF(list);
177 return result;
178 }
179
180 /* two or more elements: slice out a suitable separator from the
181 first member, and use that to join the entire list */
182
183 joiner = PySequence_GetSlice(PyList_GET_ITEM(list, 0), 0, 0);
184 if (!joiner)
185 return NULL;
186
187 function = PyObject_GetAttrString(joiner, "join");
188 if (!function) {
189 Py_DECREF(joiner);
190 return NULL;
191 }
192
193 args = PyTuple_New(1);
194 PyTuple_SET_ITEM(args, 0, list);
195
196 result = PyObject_CallObject(function, args);
197
198 Py_DECREF(args); /* also removes list */
199 Py_DECREF(function);
200 Py_DECREF(joiner);
201
202 return result;
203}
204
205#if (PY_VERSION_HEX < 0x02020000)
206LOCAL(int)
207PyDict_Update(PyObject* dict, PyObject* other)
208{
209 /* PyDict_Update emulation for 2.1 and earlier */
210
211 PyObject* res;
212
213 res = PyObject_CallMethod(dict, "update", "O", other);
214 if (!res)
215 return -1;
216
217 Py_DECREF(res);
218 return 0;
219}
220#endif
221
222/* -------------------------------------------------------------------- */
223/* the element type */
224
225typedef struct {
226
227 /* attributes (a dictionary object), or None if no attributes */
228 PyObject* attrib;
229
230 /* child elements */
231 int length; /* actual number of items */
232 int allocated; /* allocated items */
233
234 /* this either points to _children or to a malloced buffer */
235 PyObject* *children;
236
237 PyObject* _children[STATIC_CHILDREN];
238
239} ElementObjectExtra;
240
241typedef struct {
242 PyObject_HEAD
243
244 /* element tag (a string). */
245 PyObject* tag;
246
247 /* text before first child. note that this is a tagged pointer;
248 use JOIN_OBJ to get the object pointer. the join flag is used
249 to distinguish lists created by the tree builder from lists
250 assigned to the attribute by application code; the former
251 should be joined before being returned to the user, the latter
252 should be left intact. */
253 PyObject* text;
254
255 /* text after this element, in parent. note that this is a tagged
256 pointer; use JOIN_OBJ to get the object pointer. */
257 PyObject* tail;
258
259 ElementObjectExtra* extra;
260
261} ElementObject;
262
263staticforward PyTypeObject Element_Type;
264
265#define Element_CheckExact(op) ((op)->ob_type == &Element_Type)
266
267/* -------------------------------------------------------------------- */
268/* element constructor and destructor */
269
270LOCAL(int)
271element_new_extra(ElementObject* self, PyObject* attrib)
272{
273 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
274 if (!self->extra)
275 return -1;
276
277 if (!attrib)
278 attrib = Py_None;
279
280 Py_INCREF(attrib);
281 self->extra->attrib = attrib;
282
283 self->extra->length = 0;
284 self->extra->allocated = STATIC_CHILDREN;
285 self->extra->children = self->extra->_children;
286
287 return 0;
288}
289
290LOCAL(void)
291element_dealloc_extra(ElementObject* self)
292{
293 int i;
294
295 Py_DECREF(self->extra->attrib);
296
297 for (i = 0; i < self->extra->length; i++)
298 Py_DECREF(self->extra->children[i]);
299
300 if (self->extra->children != self->extra->_children)
301 PyObject_Free(self->extra->children);
302
303 PyObject_Free(self->extra);
304}
305
306LOCAL(PyObject*)
307element_new(PyObject* tag, PyObject* attrib)
308{
309 ElementObject* self;
310
311 self = PyObject_New(ElementObject, &Element_Type);
312 if (self == NULL)
313 return NULL;
314
315 /* use None for empty dictionaries */
316 if (PyDict_CheckExact(attrib) && !PyDict_Size(attrib))
317 attrib = Py_None;
318
319 self->extra = NULL;
320
321 if (attrib != Py_None) {
322
323 if (element_new_extra(self, attrib) < 0)
324 return NULL;
325
326 self->extra->length = 0;
327 self->extra->allocated = STATIC_CHILDREN;
328 self->extra->children = self->extra->_children;
329
330 }
331
332 Py_INCREF(tag);
333 self->tag = tag;
334
335 Py_INCREF(Py_None);
336 self->text = Py_None;
337
338 Py_INCREF(Py_None);
339 self->tail = Py_None;
340
341 ALLOC(sizeof(ElementObject), "create element");
342
343 return (PyObject*) self;
344}
345
346LOCAL(int)
347element_resize(ElementObject* self, int extra)
348{
349 int size;
350 PyObject* *children;
351
352 /* make sure self->children can hold the given number of extra
353 elements. set an exception and return -1 if allocation failed */
354
355 if (!self->extra)
356 element_new_extra(self, NULL);
357
358 size = self->extra->length + extra;
359
360 if (size > self->extra->allocated) {
361 /* use Python 2.4's list growth strategy */
362 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
363 if (self->extra->children != self->extra->_children) {
364 children = PyObject_Realloc(self->extra->children,
365 size * sizeof(PyObject*));
366 if (!children)
367 goto nomemory;
368 } else {
369 children = PyObject_Malloc(size * sizeof(PyObject*));
370 if (!children)
371 goto nomemory;
372 /* copy existing children from static area to malloc buffer */
373 memcpy(children, self->extra->children,
374 self->extra->length * sizeof(PyObject*));
375 }
376 self->extra->children = children;
377 self->extra->allocated = size;
378 }
379
380 return 0;
381
382 nomemory:
383 PyErr_NoMemory();
384 return -1;
385}
386
387LOCAL(int)
388element_add_subelement(ElementObject* self, PyObject* element)
389{
390 /* add a child element to a parent */
391
392 if (element_resize(self, 1) < 0)
393 return -1;
394
395 Py_INCREF(element);
396 self->extra->children[self->extra->length] = element;
397
398 self->extra->length++;
399
400 return 0;
401}
402
403LOCAL(PyObject*)
404element_get_attrib(ElementObject* self)
405{
406 /* return borrowed reference to attrib dictionary */
407 /* note: this function assumes that the extra section exists */
408
409 PyObject* res = self->extra->attrib;
410
411 if (res == Py_None) {
412 /* create missing dictionary */
413 res = PyDict_New();
414 if (!res)
415 return NULL;
416 self->extra->attrib = res;
417 }
418
419 return res;
420}
421
422LOCAL(PyObject*)
423element_get_text(ElementObject* self)
424{
425 /* return borrowed reference to text attribute */
426
427 PyObject* res = self->text;
428
429 if (JOIN_GET(res)) {
430 res = JOIN_OBJ(res);
431 if (PyList_CheckExact(res)) {
432 res = list_join(res);
433 if (!res)
434 return NULL;
435 self->text = res;
436 }
437 }
438
439 return res;
440}
441
442LOCAL(PyObject*)
443element_get_tail(ElementObject* self)
444{
445 /* return borrowed reference to text attribute */
446
447 PyObject* res = self->tail;
448
449 if (JOIN_GET(res)) {
450 res = JOIN_OBJ(res);
451 if (PyList_CheckExact(res)) {
452 res = list_join(res);
453 if (!res)
454 return NULL;
455 self->tail = res;
456 }
457 }
458
459 return res;
460}
461
462static PyObject*
463element(PyObject* self, PyObject* args, PyObject* kw)
464{
465 PyObject* elem;
466
467 PyObject* tag;
468 PyObject* attrib = NULL;
469 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag,
470 &PyDict_Type, &attrib))
471 return NULL;
472
473 if (attrib || kw) {
474 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
475 if (!attrib)
476 return NULL;
477 if (kw)
478 PyDict_Update(attrib, kw);
479 } else {
480 Py_INCREF(Py_None);
481 attrib = Py_None;
482 }
483
484 elem = element_new(tag, attrib);
485
486 Py_DECREF(attrib);
487
488 return elem;
489}
490
491static PyObject*
492subelement(PyObject* self, PyObject* args, PyObject* kw)
493{
494 PyObject* elem;
495
496 ElementObject* parent;
497 PyObject* tag;
498 PyObject* attrib = NULL;
499 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
500 &Element_Type, &parent, &tag,
501 &PyDict_Type, &attrib))
502 return NULL;
503
504 if (attrib || kw) {
505 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
506 if (!attrib)
507 return NULL;
508 if (kw)
509 PyDict_Update(attrib, kw);
510 } else {
511 Py_INCREF(Py_None);
512 attrib = Py_None;
513 }
514
515 elem = element_new(tag, attrib);
516
517 Py_DECREF(attrib);
518
519 if (element_add_subelement(parent, elem) < 0)
520 return NULL;
521
522 return elem;
523}
524
525static void
526element_dealloc(ElementObject* self)
527{
528 if (self->extra)
529 element_dealloc_extra(self);
530
531 /* discard attributes */
532 Py_DECREF(self->tag);
533 Py_DECREF(JOIN_OBJ(self->text));
534 Py_DECREF(JOIN_OBJ(self->tail));
535
536 RELEASE(sizeof(ElementObject), "destroy element");
537
538 PyObject_Del(self);
539}
540
541/* -------------------------------------------------------------------- */
542/* methods (in alphabetical order) */
543
544static PyObject*
545element_append(ElementObject* self, PyObject* args)
546{
547 PyObject* element;
548 if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element))
549 return NULL;
550
551 if (element_add_subelement(self, element) < 0)
552 return NULL;
553
554 Py_RETURN_NONE;
555}
556
557static PyObject*
558element_clear(ElementObject* self, PyObject* args)
559{
560 if (!PyArg_ParseTuple(args, ":clear"))
561 return NULL;
562
563 if (self->extra) {
564 element_dealloc_extra(self);
565 self->extra = NULL;
566 }
567
568 Py_INCREF(Py_None);
569 Py_DECREF(JOIN_OBJ(self->text));
570 self->text = Py_None;
571
572 Py_INCREF(Py_None);
573 Py_DECREF(JOIN_OBJ(self->tail));
574 self->tail = Py_None;
575
576 Py_RETURN_NONE;
577}
578
579static PyObject*
580element_copy(ElementObject* self, PyObject* args)
581{
582 int i;
583 ElementObject* element;
584
585 if (!PyArg_ParseTuple(args, ":__copy__"))
586 return NULL;
587
588 element = (ElementObject*) element_new(
589 self->tag, (self->extra) ? self->extra->attrib : Py_None
590 );
591 if (!element)
592 return NULL;
593
594 Py_DECREF(JOIN_OBJ(element->text));
595 element->text = self->text;
596 Py_INCREF(JOIN_OBJ(element->text));
597
598 Py_DECREF(JOIN_OBJ(element->tail));
599 element->tail = self->tail;
600 Py_INCREF(JOIN_OBJ(element->tail));
601
602 if (self->extra) {
603
604 if (element_resize(element, self->extra->length) < 0)
605 return NULL;
606
607 for (i = 0; i < self->extra->length; i++) {
608 Py_INCREF(self->extra->children[i]);
609 element->extra->children[i] = self->extra->children[i];
610 }
611
612 element->extra->length = self->extra->length;
613
614 }
615
616 return (PyObject*) element;
617}
618
619static PyObject*
620element_deepcopy(ElementObject* self, PyObject* args)
621{
622 int i;
623 ElementObject* element;
624 PyObject* tag;
625 PyObject* attrib;
626 PyObject* text;
627 PyObject* tail;
628 PyObject* id;
629
630 PyObject* memo;
631 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
632 return NULL;
633
634 tag = deepcopy(self->tag, memo);
635 if (!tag)
636 return NULL;
637
638 if (self->extra) {
639 attrib = deepcopy(self->extra->attrib, memo);
640 if (!attrib) {
641 Py_DECREF(tag);
642 return NULL;
643 }
644 } else {
645 Py_INCREF(Py_None);
646 attrib = Py_None;
647 }
648
649 element = (ElementObject*) element_new(tag, attrib);
650
651 Py_DECREF(tag);
652 Py_DECREF(attrib);
653
654 if (!element)
655 return NULL;
656
657 text = deepcopy(JOIN_OBJ(self->text), memo);
658 if (!text)
659 goto error;
660 Py_DECREF(element->text);
661 element->text = JOIN_SET(text, JOIN_GET(self->text));
662
663 tail = deepcopy(JOIN_OBJ(self->tail), memo);
664 if (!tail)
665 goto error;
666 Py_DECREF(element->tail);
667 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
668
669 if (self->extra) {
670
671 if (element_resize(element, self->extra->length) < 0)
672 goto error;
673
674 for (i = 0; i < self->extra->length; i++) {
675 PyObject* child = deepcopy(self->extra->children[i], memo);
676 if (!child) {
677 element->extra->length = i;
678 goto error;
679 }
680 element->extra->children[i] = child;
681 }
682
683 element->extra->length = self->extra->length;
684
685 }
686
687 /* add object to memo dictionary (so deepcopy won't visit it again) */
688 id = PyInt_FromLong((Py_uintptr_t) self);
689
690 i = PyDict_SetItem(memo, id, (PyObject*) element);
691
692 Py_DECREF(id);
693
694 if (i < 0)
695 goto error;
696
697 return (PyObject*) element;
698
699 error:
700 Py_DECREF(element);
701 return NULL;
702}
703
704LOCAL(int)
705checkpath(PyObject* tag)
706{
707 int i, check = 1;
708
709 /* check if a tag contains an xpath character */
710
711#define PATHCHAR(ch) (ch == '/' || ch == '*' || ch == '[' || ch == '@')
712
713#if defined(Py_USING_UNICODE)
714 if (PyUnicode_Check(tag)) {
715 Py_UNICODE *p = PyUnicode_AS_UNICODE(tag);
716 for (i = 0; i < PyUnicode_GET_SIZE(tag); i++) {
717 if (p[i] == '{')
718 check = 0;
719 else if (p[i] == '}')
720 check = 1;
721 else if (check && PATHCHAR(p[i]))
722 return 1;
723 }
724 return 0;
725 }
726#endif
727 if (PyString_Check(tag)) {
728 char *p = PyString_AS_STRING(tag);
729 for (i = 0; i < PyString_GET_SIZE(tag); i++) {
730 if (p[i] == '{')
731 check = 0;
732 else if (p[i] == '}')
733 check = 1;
734 else if (check && PATHCHAR(p[i]))
735 return 1;
736 }
737 return 0;
738 }
739
740 return 1; /* unknown type; might be path expression */
741}
742
743static PyObject*
744element_find(ElementObject* self, PyObject* args)
745{
746 int i;
747
748 PyObject* tag;
749 if (!PyArg_ParseTuple(args, "O:find", &tag))
750 return NULL;
751
752 if (checkpath(tag))
753 return PyObject_CallMethod(
754 elementpath_obj, "find", "OO", self, tag
755 );
756
757 if (!self->extra)
758 Py_RETURN_NONE;
759
760 for (i = 0; i < self->extra->length; i++) {
761 PyObject* item = self->extra->children[i];
762 if (Element_CheckExact(item) &&
763 PyObject_Compare(((ElementObject*)item)->tag, tag) == 0) {
764 Py_INCREF(item);
765 return item;
766 }
767 }
768
769 Py_RETURN_NONE;
770}
771
772static PyObject*
773element_findtext(ElementObject* self, PyObject* args)
774{
775 int i;
776
777 PyObject* tag;
778 PyObject* default_value = Py_None;
779 if (!PyArg_ParseTuple(args, "O|O:findtext", &tag, &default_value))
780 return NULL;
781
782 if (checkpath(tag))
783 return PyObject_CallMethod(
784 elementpath_obj, "findtext", "OOO", self, tag, default_value
785 );
786
787 if (!self->extra) {
788 Py_INCREF(default_value);
789 return default_value;
790 }
791
792 for (i = 0; i < self->extra->length; i++) {
793 ElementObject* item = (ElementObject*) self->extra->children[i];
794 if (Element_CheckExact(item) && !PyObject_Compare(item->tag, tag)) {
795 PyObject* text = element_get_text(item);
796 if (text == Py_None)
797 return PyString_FromString("");
798 Py_INCREF(text);
799 return text;
800 }
801 }
802
803 Py_INCREF(default_value);
804 return default_value;
805}
806
807static PyObject*
808element_findall(ElementObject* self, PyObject* args)
809{
810 int i;
811 PyObject* out;
812
813 PyObject* tag;
814 if (!PyArg_ParseTuple(args, "O:findall", &tag))
815 return NULL;
816
817 if (checkpath(tag))
818 return PyObject_CallMethod(
819 elementpath_obj, "findall", "OO", self, tag
820 );
821
822 out = PyList_New(0);
823 if (!out)
824 return NULL;
825
826 if (!self->extra)
827 return out;
828
829 for (i = 0; i < self->extra->length; i++) {
830 PyObject* item = self->extra->children[i];
831 if (Element_CheckExact(item) &&
832 PyObject_Compare(((ElementObject*)item)->tag, tag) == 0) {
833 if (PyList_Append(out, item) < 0) {
834 Py_DECREF(out);
835 return NULL;
836 }
837 }
838 }
839
840 return out;
841}
842
843static PyObject*
844element_get(ElementObject* self, PyObject* args)
845{
846 PyObject* value;
847
848 PyObject* key;
849 PyObject* default_value = Py_None;
850 if (!PyArg_ParseTuple(args, "O|O:get", &key, &default_value))
851 return NULL;
852
853 if (!self->extra || self->extra->attrib == Py_None)
854 value = default_value;
855 else {
856 value = PyDict_GetItem(self->extra->attrib, key);
857 if (!value)
858 value = default_value;
859 }
860
861 Py_INCREF(value);
862 return value;
863}
864
865static PyObject*
866element_getchildren(ElementObject* self, PyObject* args)
867{
868 int i;
869 PyObject* list;
870
871 if (!PyArg_ParseTuple(args, ":getchildren"))
872 return NULL;
873
874 if (!self->extra)
875 return PyList_New(0);
876
877 list = PyList_New(self->extra->length);
878 if (!list)
879 return NULL;
880
881 for (i = 0; i < self->extra->length; i++) {
882 PyObject* item = self->extra->children[i];
883 Py_INCREF(item);
884 PyList_SET_ITEM(list, i, item);
885 }
886
887 return list;
888}
889
890static PyObject*
891element_getiterator(ElementObject* self, PyObject* args)
892{
893 PyObject* result;
894
895 PyObject* tag = Py_None;
896 if (!PyArg_ParseTuple(args, "|O:getiterator", &tag))
897 return NULL;
898
899 if (!elementtree_getiterator_obj) {
900 PyErr_SetString(
901 PyExc_RuntimeError,
902 "getiterator helper not found"
903 );
904 return NULL;
905 }
906
907 args = PyTuple_New(2);
908 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
909 Py_INCREF(tag); PyTuple_SET_ITEM(args, 1, (PyObject*) tag);
910
911 result = PyObject_CallObject(elementtree_getiterator_obj, args);
912
913 Py_DECREF(args);
914
915 return result;
916}
917
918static PyObject*
919element_getitem(ElementObject* self, int index)
920{
921 if (!self->extra || index < 0 || index >= self->extra->length) {
922 PyErr_SetString(
923 PyExc_IndexError,
924 "child index out of range"
925 );
926 return NULL;
927 }
928
929 Py_INCREF(self->extra->children[index]);
930 return self->extra->children[index];
931}
932
933static PyObject*
934element_getslice(ElementObject* self, int start, int end)
935{
936 int i;
937 PyObject* list;
938
939 if (!self->extra)
940 return PyList_New(0);
941
942 /* standard clamping */
943 if (start < 0)
944 start = 0;
945 if (end < 0)
946 end = 0;
947 if (end > self->extra->length)
948 end = self->extra->length;
949 if (start > end)
950 start = end;
951
952 list = PyList_New(end - start);
953 if (!list)
954 return NULL;
955
956 for (i = start; i < end; i++) {
957 PyObject* item = self->extra->children[i];
958 Py_INCREF(item);
959 PyList_SET_ITEM(list, i - start, item);
960 }
961
962 return list;
963}
964
965static PyObject*
966element_insert(ElementObject* self, PyObject* args)
967{
968 int i;
969
970 int index;
971 PyObject* element;
972 if (!PyArg_ParseTuple(args, "iO!:insert", &index,
973 &Element_Type, &element))
974 return NULL;
975
976 if (!self->extra)
977 element_new_extra(self, NULL);
978
979 if (index < 0)
980 index = 0;
981 if (index > self->extra->length)
982 index = self->extra->length;
983
984 if (element_resize(self, 1) < 0)
985 return NULL;
986
987 for (i = self->extra->length; i > index; i--)
988 self->extra->children[i] = self->extra->children[i-1];
989
990 Py_INCREF(element);
991 self->extra->children[index] = element;
992
993 self->extra->length++;
994
995 Py_RETURN_NONE;
996}
997
998static PyObject*
999element_items(ElementObject* self, PyObject* args)
1000{
1001 if (!PyArg_ParseTuple(args, ":items"))
1002 return NULL;
1003
1004 if (!self->extra || self->extra->attrib == Py_None)
1005 return PyList_New(0);
1006
1007 return PyDict_Items(self->extra->attrib);
1008}
1009
1010static PyObject*
1011element_keys(ElementObject* self, PyObject* args)
1012{
1013 if (!PyArg_ParseTuple(args, ":keys"))
1014 return NULL;
1015
1016 if (!self->extra || self->extra->attrib == Py_None)
1017 return PyList_New(0);
1018
1019 return PyDict_Keys(self->extra->attrib);
1020}
1021
1022static int
1023element_length(ElementObject* self)
1024{
1025 if (!self->extra)
1026 return 0;
1027
1028 return self->extra->length;
1029}
1030
1031static PyObject*
1032element_makeelement(PyObject* self, PyObject* args, PyObject* kw)
1033{
1034 PyObject* elem;
1035
1036 PyObject* tag;
1037 PyObject* attrib;
1038 if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib))
1039 return NULL;
1040
1041 attrib = PyDict_Copy(attrib);
1042 if (!attrib)
1043 return NULL;
1044
1045 elem = element_new(tag, attrib);
1046
1047 Py_DECREF(attrib);
1048
1049 return elem;
1050}
1051
1052static PyObject*
1053element_reduce(ElementObject* self, PyObject* args)
1054{
1055 if (!PyArg_ParseTuple(args, ":__reduce__"))
1056 return NULL;
1057
1058 /* Hack alert: This method is used to work around a __copy__
1059 problem on certain 2.3 and 2.4 versions. To save time and
1060 simplify the code, we create the copy in here, and use a dummy
1061 copyelement helper to trick the copy module into doing the
1062 right thing. */
1063
1064 if (!elementtree_copyelement_obj) {
1065 PyErr_SetString(
1066 PyExc_RuntimeError,
1067 "copyelement helper not found"
1068 );
1069 return NULL;
1070 }
1071
1072 return Py_BuildValue(
1073 "O(N)", elementtree_copyelement_obj, element_copy(self, args)
1074 );
1075}
1076
1077static PyObject*
1078element_remove(ElementObject* self, PyObject* args)
1079{
1080 int i;
1081
1082 PyObject* element;
1083 if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element))
1084 return NULL;
1085
1086 if (!self->extra) {
1087 /* element has no children, so raise exception */
1088 PyErr_SetString(
1089 PyExc_ValueError,
1090 "list.remove(x): x not in list"
1091 );
1092 return NULL;
1093 }
1094
1095 for (i = 0; i < self->extra->length; i++) {
1096 if (self->extra->children[i] == element)
1097 break;
1098 if (PyObject_Compare(self->extra->children[i], element) == 0)
1099 break;
1100 }
1101
1102 if (i == self->extra->length) {
1103 /* element is not in children, so raise exception */
1104 PyErr_SetString(
1105 PyExc_ValueError,
1106 "list.remove(x): x not in list"
1107 );
1108 return NULL;
1109 }
1110
1111 Py_DECREF(self->extra->children[i]);
1112
1113 self->extra->length--;
1114
1115 for (; i < self->extra->length; i++)
1116 self->extra->children[i] = self->extra->children[i+1];
1117
1118 Py_RETURN_NONE;
1119}
1120
1121static PyObject*
1122element_repr(ElementObject* self)
1123{
1124 PyObject* repr;
1125 char buffer[100];
1126
1127 repr = PyString_FromString("<Element ");
1128
1129 PyString_ConcatAndDel(&repr, PyObject_Repr(self->tag));
1130
1131 sprintf(buffer, " at %p>", self);
1132 PyString_ConcatAndDel(&repr, PyString_FromString(buffer));
1133
1134 return repr;
1135}
1136
1137static PyObject*
1138element_set(ElementObject* self, PyObject* args)
1139{
1140 PyObject* attrib;
1141
1142 PyObject* key;
1143 PyObject* value;
1144 if (!PyArg_ParseTuple(args, "OO:set", &key, &value))
1145 return NULL;
1146
1147 if (!self->extra)
1148 element_new_extra(self, NULL);
1149
1150 attrib = element_get_attrib(self);
1151 if (!attrib)
1152 return NULL;
1153
1154 if (PyDict_SetItem(attrib, key, value) < 0)
1155 return NULL;
1156
1157 Py_RETURN_NONE;
1158}
1159
1160static int
1161element_setslice(ElementObject* self, int start, int end, PyObject* item)
1162{
1163 int i, new, old;
1164 PyObject* recycle = NULL;
1165
1166 if (!self->extra)
1167 element_new_extra(self, NULL);
1168
1169 /* standard clamping */
1170 if (start < 0)
1171 start = 0;
1172 if (end < 0)
1173 end = 0;
1174 if (end > self->extra->length)
1175 end = self->extra->length;
1176 if (start > end)
1177 start = end;
1178
1179 old = end - start;
1180
1181 if (item == NULL)
1182 new = 0;
1183 else if (PyList_CheckExact(item)) {
1184 new = PyList_GET_SIZE(item);
1185 } else {
1186 /* FIXME: support arbitrary sequences? */
1187 PyErr_Format(
1188 PyExc_TypeError,
1189 "expected list, not \"%.200s\"", item->ob_type->tp_name
1190 );
1191 return -1;
1192 }
1193
1194 if (old > 0) {
1195 /* to avoid recursive calls to this method (via decref), move
1196 old items to the recycle bin here, and get rid of them when
1197 we're done modifying the element */
1198 recycle = PyList_New(old);
1199 for (i = 0; i < old; i++)
1200 PyList_SET_ITEM(recycle, i, self->extra->children[i + start]);
1201 }
1202
1203 if (new < old) {
1204 /* delete slice */
1205 for (i = end; i < self->extra->length; i++)
1206 self->extra->children[i + new - old] = self->extra->children[i];
1207 } else if (new > old) {
1208 /* insert slice */
1209 if (element_resize(self, new - old) < 0)
1210 return -1;
1211 for (i = self->extra->length-1; i >= end; i--)
1212 self->extra->children[i + new - old] = self->extra->children[i];
1213 }
1214
1215 /* replace the slice */
1216 for (i = 0; i < new; i++) {
1217 PyObject* element = PyList_GET_ITEM(item, i);
1218 Py_INCREF(element);
1219 self->extra->children[i + start] = element;
1220 }
1221
1222 self->extra->length += new - old;
1223
1224 /* discard the recycle bin, and everything in it */
1225 Py_XDECREF(recycle);
1226
1227 return 0;
1228}
1229
1230static int
1231element_setitem(ElementObject* self, int index, PyObject* item)
1232{
1233 int i;
1234 PyObject* old;
1235
1236 if (!self->extra || index < 0 || index >= self->extra->length) {
1237 PyErr_SetString(
1238 PyExc_IndexError,
1239 "child assignment index out of range");
1240 return -1;
1241 }
1242
1243 old = self->extra->children[index];
1244
1245 if (item) {
1246 Py_INCREF(item);
1247 self->extra->children[index] = item;
1248 } else {
1249 self->extra->length--;
1250 for (i = index; i < self->extra->length; i++)
1251 self->extra->children[i] = self->extra->children[i+1];
1252 }
1253
1254 Py_DECREF(old);
1255
1256 return 0;
1257}
1258
1259static PyMethodDef element_methods[] = {
1260
1261 {"clear", (PyCFunction) element_clear, METH_VARARGS},
1262
1263 {"get", (PyCFunction) element_get, METH_VARARGS},
1264 {"set", (PyCFunction) element_set, METH_VARARGS},
1265
1266 {"find", (PyCFunction) element_find, METH_VARARGS},
1267 {"findtext", (PyCFunction) element_findtext, METH_VARARGS},
1268 {"findall", (PyCFunction) element_findall, METH_VARARGS},
1269
1270 {"append", (PyCFunction) element_append, METH_VARARGS},
1271 {"insert", (PyCFunction) element_insert, METH_VARARGS},
1272 {"remove", (PyCFunction) element_remove, METH_VARARGS},
1273
1274 {"getiterator", (PyCFunction) element_getiterator, METH_VARARGS},
1275 {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS},
1276
1277 {"items", (PyCFunction) element_items, METH_VARARGS},
1278 {"keys", (PyCFunction) element_keys, METH_VARARGS},
1279
1280 {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS},
1281
1282 {"__copy__", (PyCFunction) element_copy, METH_VARARGS},
1283 {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS},
1284
1285 /* Some 2.3 and 2.4 versions do not handle the __copy__ method on
1286 C objects correctly, so we have to fake it using a __reduce__-
1287 based hack (see the element_reduce implementation above for
1288 details). */
1289
1290 /* The behaviour has been changed in 2.3.5 and 2.4.1, so we're
1291 using a runtime test to figure out if we need to fake things
1292 or now (see the init code below). The following entry is
1293 enabled only if the hack is needed. */
1294
1295 {"!__reduce__", (PyCFunction) element_reduce, METH_VARARGS},
1296
1297 {NULL, NULL}
1298};
1299
1300static PyObject*
1301element_getattr(ElementObject* self, char* name)
1302{
1303 PyObject* res;
1304
1305 res = Py_FindMethod(element_methods, (PyObject*) self, name);
1306 if (res)
1307 return res;
1308
1309 PyErr_Clear();
1310
1311 if (strcmp(name, "tag") == 0)
1312 res = self->tag;
1313 else if (strcmp(name, "text") == 0)
1314 res = element_get_text(self);
1315 else if (strcmp(name, "tail") == 0) {
1316 res = element_get_tail(self);
1317 } else if (strcmp(name, "attrib") == 0) {
1318 if (!self->extra)
1319 element_new_extra(self, NULL);
1320 res = element_get_attrib(self);
1321 } else {
1322 PyErr_SetString(PyExc_AttributeError, name);
1323 return NULL;
1324 }
1325
1326 if (!res)
1327 return NULL;
1328
1329 Py_INCREF(res);
1330 return res;
1331}
1332
1333static int
1334element_setattr(ElementObject* self, const char* name, PyObject* value)
1335{
1336 if (value == NULL) {
1337 PyErr_SetString(
1338 PyExc_AttributeError,
1339 "can't delete element attributes"
1340 );
1341 return -1;
1342 }
1343
1344 if (strcmp(name, "tag") == 0) {
1345 Py_DECREF(self->tag);
1346 self->tag = value;
1347 Py_INCREF(self->tag);
1348 } else if (strcmp(name, "text") == 0) {
1349 Py_DECREF(JOIN_OBJ(self->text));
1350 self->text = value;
1351 Py_INCREF(self->text);
1352 } else if (strcmp(name, "tail") == 0) {
1353 Py_DECREF(JOIN_OBJ(self->tail));
1354 self->tail = value;
1355 Py_INCREF(self->tail);
1356 } else if (strcmp(name, "attrib") == 0) {
1357 if (!self->extra)
1358 element_new_extra(self, NULL);
1359 Py_DECREF(self->extra->attrib);
1360 self->extra->attrib = value;
1361 Py_INCREF(self->extra->attrib);
1362 } else {
1363 PyErr_SetString(PyExc_AttributeError, name);
1364 return -1;
1365 }
1366
1367 return 0;
1368}
1369
1370static PySequenceMethods element_as_sequence = {
1371 (inquiry) element_length,
1372 0, /* sq_concat */
1373 0, /* sq_repeat */
1374 (intargfunc) element_getitem,
1375 (intintargfunc) element_getslice,
1376 (intobjargproc) element_setitem,
1377 (intintobjargproc) element_setslice,
1378};
1379
1380statichere PyTypeObject Element_Type = {
1381 PyObject_HEAD_INIT(NULL)
1382 0, "Element", sizeof(ElementObject), 0,
1383 /* methods */
1384 (destructor)element_dealloc, /* tp_dealloc */
1385 0, /* tp_print */
1386 (getattrfunc)element_getattr, /* tp_getattr */
1387 (setattrfunc)element_setattr, /* tp_setattr */
1388 0, /* tp_compare */
1389 (reprfunc)element_repr, /* tp_repr */
1390 0, /* tp_as_number */
1391 &element_as_sequence, /* tp_as_sequence */
1392};
1393
1394/* ==================================================================== */
1395/* the tree builder type */
1396
1397typedef struct {
1398 PyObject_HEAD
1399
1400 PyObject* root; /* root node (first created node) */
1401
1402 ElementObject* this; /* current node */
1403 ElementObject* last; /* most recently created node */
1404
1405 PyObject* data; /* data collector (string or list), or NULL */
1406
1407 PyObject* stack; /* element stack */
1408 int index; /* current stack size (0=empty) */
1409
1410 /* element tracing */
1411 PyObject* events; /* list of events, or NULL if not collecting */
1412 PyObject* start_event_obj; /* event objects (NULL to ignore) */
1413 PyObject* end_event_obj;
1414 PyObject* start_ns_event_obj;
1415 PyObject* end_ns_event_obj;
1416
1417} TreeBuilderObject;
1418
1419staticforward PyTypeObject TreeBuilder_Type;
1420
1421#define TreeBuilder_CheckExact(op) ((op)->ob_type == &TreeBuilder_Type)
1422
1423/* -------------------------------------------------------------------- */
1424/* constructor and destructor */
1425
1426LOCAL(PyObject*)
1427treebuilder_new(void)
1428{
1429 TreeBuilderObject* self;
1430
1431 self = PyObject_New(TreeBuilderObject, &TreeBuilder_Type);
1432 if (self == NULL)
1433 return NULL;
1434
1435 self->root = NULL;
1436
1437 Py_INCREF(Py_None);
1438 self->this = (ElementObject*) Py_None;
1439
1440 Py_INCREF(Py_None);
1441 self->last = (ElementObject*) Py_None;
1442
1443 self->data = NULL;
1444
1445 self->stack = PyList_New(20);
1446 self->index = 0;
1447
1448 self->events = NULL;
1449 self->start_event_obj = self->end_event_obj = NULL;
1450 self->start_ns_event_obj = self->end_ns_event_obj = NULL;
1451
1452 ALLOC(sizeof(TreeBuilderObject), "create treebuilder");
1453
1454 return (PyObject*) self;
1455}
1456
1457static PyObject*
1458treebuilder(PyObject* _self, PyObject* args)
1459{
1460 if (!PyArg_ParseTuple(args, ":TreeBuilder"))
1461 return NULL;
1462
1463 return treebuilder_new();
1464}
1465
1466static void
1467treebuilder_dealloc(TreeBuilderObject* self)
1468{
1469 Py_XDECREF(self->end_ns_event_obj);
1470 Py_XDECREF(self->start_ns_event_obj);
1471 Py_XDECREF(self->end_event_obj);
1472 Py_XDECREF(self->start_event_obj);
1473 Py_XDECREF(self->events);
1474 Py_DECREF(self->stack);
1475 Py_XDECREF(self->data);
1476 Py_DECREF(self->last);
1477 Py_DECREF(self->this);
1478 Py_XDECREF(self->root);
1479
1480 RELEASE(sizeof(TreeBuilderObject), "destroy treebuilder");
1481
1482 PyObject_Del(self);
1483}
1484
1485/* -------------------------------------------------------------------- */
1486/* handlers */
1487
1488LOCAL(PyObject*)
1489treebuilder_handle_xml(TreeBuilderObject* self, PyObject* encoding,
1490 PyObject* standalone)
1491{
1492 Py_RETURN_NONE;
1493}
1494
1495LOCAL(PyObject*)
1496treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
1497 PyObject* attrib)
1498{
1499 PyObject* node;
1500 PyObject* this;
1501
1502 if (self->data) {
1503 if (self->this == self->last) {
1504 self->last->text = JOIN_SET(
1505 self->data, PyList_CheckExact(self->data)
1506 );
1507 } else {
1508 self->last->tail = JOIN_SET(
1509 self->data, PyList_CheckExact(self->data)
1510 );
1511 }
1512 self->data = NULL;
1513 }
1514
1515 node = element_new(tag, attrib);
1516 if (!node)
1517 return NULL;
1518
1519 this = (PyObject*) self->this;
1520
1521 if (this != Py_None) {
1522 if (element_add_subelement((ElementObject*) this, node) < 0)
1523 return NULL;
1524 } else {
1525 if (self->root) {
1526 PyErr_SetString(
1527 PyExc_SyntaxError,
1528 "multiple elements on top level"
1529 );
1530 return NULL;
1531 }
1532 Py_INCREF(node);
1533 self->root = node;
1534 }
1535
1536 if (self->index < PyList_GET_SIZE(self->stack)) {
1537 if (PyList_SetItem(self->stack, self->index, this) < 0)
1538 return NULL;
1539 Py_INCREF(this);
1540 } else {
1541 if (PyList_Append(self->stack, this) < 0)
1542 return NULL;
1543 }
1544 self->index++;
1545
1546 Py_DECREF(this);
1547 Py_INCREF(node);
1548 self->this = (ElementObject*) node;
1549
1550 Py_DECREF(self->last);
1551 Py_INCREF(node);
1552 self->last = (ElementObject*) node;
1553
1554 if (self->start_event_obj) {
1555 PyObject* res;
1556 PyObject* action = self->start_event_obj;
1557 res = PyTuple_New(2);
1558 if (res) {
1559 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
1560 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
1561 PyList_Append(self->events, res);
1562 Py_DECREF(res);
1563 } else
1564 PyErr_Clear(); /* FIXME: propagate error */
1565 }
1566
1567 return node;
1568}
1569
1570LOCAL(PyObject*)
1571treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
1572{
1573 if (!self->data) {
1574 /* store the first item as is */
1575 Py_INCREF(data); self->data = data;
1576 } else {
1577 /* more than one item; use a list to collect items */
1578 if (PyString_CheckExact(self->data) && self->data->ob_refcnt == 1 &&
1579 PyString_CheckExact(data) && PyString_GET_SIZE(data) == 1) {
1580 /* expat often generates single character data sections; handle
1581 the most common case by resizing the existing string... */
1582 int size = PyString_GET_SIZE(self->data);
1583 if (_PyString_Resize(&self->data, size + 1) < 0)
1584 return NULL;
1585 PyString_AS_STRING(self->data)[size] = PyString_AS_STRING(data)[0];
1586 } else if (PyList_CheckExact(self->data)) {
1587 if (PyList_Append(self->data, data) < 0)
1588 return NULL;
1589 } else {
1590 PyObject* list = PyList_New(2);
1591 if (!list)
1592 return NULL;
1593 PyList_SET_ITEM(list, 0, self->data);
1594 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
1595 self->data = list;
1596 }
1597 }
1598
1599 Py_RETURN_NONE;
1600}
1601
1602LOCAL(PyObject*)
1603treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
1604{
1605 PyObject* item;
1606
1607 if (self->data) {
1608 if (self->this == self->last) {
1609 self->last->text = JOIN_SET(
1610 self->data, PyList_CheckExact(self->data)
1611 );
1612 } else {
1613 self->last->tail = JOIN_SET(
1614 self->data, PyList_CheckExact(self->data)
1615 );
1616 }
1617 self->data = NULL;
1618 }
1619
1620 if (self->index == 0) {
1621 PyErr_SetString(
1622 PyExc_IndexError,
1623 "pop from empty stack"
1624 );
1625 return NULL;
1626 }
1627
1628 self->index--;
1629
1630 item = PyList_GET_ITEM(self->stack, self->index);
1631 Py_INCREF(item);
1632
1633 Py_DECREF(self->last);
1634
1635 self->last = (ElementObject*) self->this;
1636 self->this = (ElementObject*) item;
1637
1638 if (self->end_event_obj) {
1639 PyObject* res;
1640 PyObject* action = self->end_event_obj;
1641 PyObject* node = (PyObject*) self->last;
1642 res = PyTuple_New(2);
1643 if (res) {
1644 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
1645 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
1646 PyList_Append(self->events, res);
1647 Py_DECREF(res);
1648 } else
1649 PyErr_Clear(); /* FIXME: propagate error */
1650 }
1651
1652 Py_INCREF(self->last);
1653 return (PyObject*) self->last;
1654}
1655
1656LOCAL(void)
1657treebuilder_handle_namespace(TreeBuilderObject* self, int start,
1658 const char* prefix, const char *uri)
1659{
1660 PyObject* res;
1661 PyObject* action;
1662 PyObject* parcel;
1663
1664 if (!self->events)
1665 return;
1666
1667 if (start) {
1668 if (!self->start_ns_event_obj)
1669 return;
1670 action = self->start_ns_event_obj;
1671 /* FIXME: prefix and uri use utf-8 encoding! */
1672 parcel = Py_BuildValue("ss", (prefix) ? prefix : "", uri);
1673 if (!parcel)
1674 return;
1675 Py_INCREF(action);
1676 } else {
1677 if (!self->end_ns_event_obj)
1678 return;
1679 action = self->end_ns_event_obj;
1680 Py_INCREF(action);
1681 parcel = Py_None;
1682 Py_INCREF(parcel);
1683 }
1684
1685 res = PyTuple_New(2);
1686
1687 if (res) {
1688 PyTuple_SET_ITEM(res, 0, action);
1689 PyTuple_SET_ITEM(res, 1, parcel);
1690 PyList_Append(self->events, res);
1691 Py_DECREF(res);
1692 } else
1693 PyErr_Clear(); /* FIXME: propagate error */
1694}
1695
1696/* -------------------------------------------------------------------- */
1697/* methods (in alphabetical order) */
1698
1699static PyObject*
1700treebuilder_data(TreeBuilderObject* self, PyObject* args)
1701{
1702 PyObject* data;
1703 if (!PyArg_ParseTuple(args, "O:data", &data))
1704 return NULL;
1705
1706 return treebuilder_handle_data(self, data);
1707}
1708
1709static PyObject*
1710treebuilder_end(TreeBuilderObject* self, PyObject* args)
1711{
1712 PyObject* tag;
1713 if (!PyArg_ParseTuple(args, "O:end", &tag))
1714 return NULL;
1715
1716 return treebuilder_handle_end(self, tag);
1717}
1718
1719LOCAL(PyObject*)
1720treebuilder_done(TreeBuilderObject* self)
1721{
1722 PyObject* res;
1723
1724 /* FIXME: check stack size? */
1725
1726 if (self->root)
1727 res = self->root;
1728 else
1729 res = Py_None;
1730
1731 Py_INCREF(res);
1732 return res;
1733}
1734
1735static PyObject*
1736treebuilder_close(TreeBuilderObject* self, PyObject* args)
1737{
1738 if (!PyArg_ParseTuple(args, ":close"))
1739 return NULL;
1740
1741 return treebuilder_done(self);
1742}
1743
1744static PyObject*
1745treebuilder_start(TreeBuilderObject* self, PyObject* args)
1746{
1747 PyObject* tag;
1748 PyObject* attrib = Py_None;
1749 if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib))
1750 return NULL;
1751
1752 return treebuilder_handle_start(self, tag, attrib);
1753}
1754
1755static PyObject*
1756treebuilder_xml(TreeBuilderObject* self, PyObject* args)
1757{
1758 PyObject* encoding;
1759 PyObject* standalone;
1760 if (!PyArg_ParseTuple(args, "OO:xml", &encoding, &standalone))
1761 return NULL;
1762
1763 return treebuilder_handle_xml(self, encoding, standalone);
1764}
1765
1766static PyMethodDef treebuilder_methods[] = {
1767 {"data", (PyCFunction) treebuilder_data, METH_VARARGS},
1768 {"start", (PyCFunction) treebuilder_start, METH_VARARGS},
1769 {"end", (PyCFunction) treebuilder_end, METH_VARARGS},
1770 {"xml", (PyCFunction) treebuilder_xml, METH_VARARGS},
1771 {"close", (PyCFunction) treebuilder_close, METH_VARARGS},
1772 {NULL, NULL}
1773};
1774
1775static PyObject*
1776treebuilder_getattr(TreeBuilderObject* self, char* name)
1777{
1778 return Py_FindMethod(treebuilder_methods, (PyObject*) self, name);
1779}
1780
1781statichere PyTypeObject TreeBuilder_Type = {
1782 PyObject_HEAD_INIT(NULL)
1783 0, "TreeBuilder", sizeof(TreeBuilderObject), 0,
1784 /* methods */
1785 (destructor)treebuilder_dealloc, /* tp_dealloc */
1786 0, /* tp_print */
1787 (getattrfunc)treebuilder_getattr, /* tp_getattr */
1788};
1789
1790/* ==================================================================== */
1791/* the expat interface */
1792
1793#if defined(USE_EXPAT)
1794
1795#include "expat.h"
1796
1797#if defined(USE_PYEXPAT_CAPI)
1798#include "pyexpat.h"
1799static struct PyExpat_CAPI* expat_capi;
1800#define EXPAT(func) (expat_capi->func)
1801#else
1802#define EXPAT(func) (XML_##func)
1803#endif
1804
1805typedef struct {
1806 PyObject_HEAD
1807
1808 XML_Parser parser;
1809
1810 PyObject* target;
1811 PyObject* entity;
1812
1813 PyObject* names;
1814
1815 PyObject* handle_xml;
1816 PyObject* handle_start;
1817 PyObject* handle_data;
1818 PyObject* handle_end;
1819
1820 PyObject* handle_comment;
1821 PyObject* handle_pi;
1822
1823} XMLParserObject;
1824
1825staticforward PyTypeObject XMLParser_Type;
1826
1827/* helpers */
1828
1829#if defined(Py_USING_UNICODE)
1830LOCAL(int)
1831checkstring(const char* string, int size)
1832{
1833 int i;
1834
1835 /* check if an 8-bit string contains UTF-8 characters */
1836 for (i = 0; i < size; i++)
1837 if (string[i] & 0x80)
1838 return 1;
1839
1840 return 0;
1841}
1842#endif
1843
1844LOCAL(PyObject*)
1845makestring(const char* string, int size)
1846{
1847 /* convert a UTF-8 string to either a 7-bit ascii string or a
1848 Unicode string */
1849
1850#if defined(Py_USING_UNICODE)
1851 if (checkstring(string, size))
1852 return PyUnicode_DecodeUTF8(string, size, "strict");
1853#endif
1854
1855 return PyString_FromStringAndSize(string, size);
1856}
1857
1858LOCAL(PyObject*)
1859makeuniversal(XMLParserObject* self, const char* string)
1860{
1861 /* convert a UTF-8 tag/attribute name from the expat parser
1862 to a universal name string */
1863
1864 int size = strlen(string);
1865 PyObject* key;
1866 PyObject* value;
1867
1868 /* look the 'raw' name up in the names dictionary */
1869 key = PyString_FromStringAndSize(string, size);
1870 if (!key)
1871 return NULL;
1872
1873 value = PyDict_GetItem(self->names, key);
1874
1875 if (value) {
1876 Py_INCREF(value);
1877 } else {
1878 /* new name. convert to universal name, and decode as
1879 necessary */
1880
1881 PyObject* tag;
1882 char* p;
1883 int i;
1884
1885 /* look for namespace separator */
1886 for (i = 0; i < size; i++)
1887 if (string[i] == '}')
1888 break;
1889 if (i != size) {
1890 /* convert to universal name */
1891 tag = PyString_FromStringAndSize(NULL, size+1);
1892 p = PyString_AS_STRING(tag);
1893 p[0] = '{';
1894 memcpy(p+1, string, size);
1895 size++;
1896 } else {
1897 /* plain name; use key as tag */
1898 Py_INCREF(key);
1899 tag = key;
1900 }
1901
1902 /* decode universal name */
1903#if defined(Py_USING_UNICODE)
1904 /* inline makestring, to avoid duplicating the source string if
1905 it's not an utf-8 string */
1906 p = PyString_AS_STRING(tag);
1907 if (checkstring(p, size)) {
1908 value = PyUnicode_DecodeUTF8(p, size, "strict");
1909 Py_DECREF(tag);
1910 if (!value) {
1911 Py_DECREF(key);
1912 return NULL;
1913 }
1914 } else
1915#endif
1916 value = tag; /* use tag as is */
1917
1918 /* add to names dictionary */
1919 if (PyDict_SetItem(self->names, key, value) < 0) {
1920 Py_DECREF(key);
1921 Py_DECREF(value);
1922 return NULL;
1923 }
1924 }
1925
1926 Py_DECREF(key);
1927 return value;
1928}
1929
1930/* -------------------------------------------------------------------- */
1931/* handlers */
1932
1933static void
1934expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
1935 int data_len)
1936{
1937 PyObject* key;
1938 PyObject* value;
1939 PyObject* res;
1940
1941 if (data_len < 2 || data_in[0] != '&')
1942 return;
1943
1944 key = makestring(data_in + 1, data_len - 2);
1945 if (!key)
1946 return;
1947
1948 value = PyDict_GetItem(self->entity, key);
1949
1950 if (value) {
1951 if (TreeBuilder_CheckExact(self->target))
1952 res = treebuilder_handle_data(
1953 (TreeBuilderObject*) self->target, value
1954 );
1955 else if (self->handle_data)
1956 res = PyObject_CallFunction(self->handle_data, "O", value);
1957 else
1958 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001959 Py_XDECREF(res);
1960 } else {
1961 PyErr_Format(
1962 PyExc_SyntaxError, "undefined entity &%s;: line %d, column %d",
1963 PyString_AS_STRING(key),
1964 EXPAT(GetErrorLineNumber)(self->parser),
1965 EXPAT(GetErrorColumnNumber)(self->parser)
1966 );
1967 }
1968
1969 Py_DECREF(key);
1970}
1971
1972static void
1973expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
1974 const XML_Char **attrib_in)
1975{
1976 PyObject* res;
1977 PyObject* tag;
1978 PyObject* attrib;
1979 int ok;
1980
1981 /* tag name */
1982 tag = makeuniversal(self, tag_in);
1983 if (!tag)
1984 return; /* parser will look for errors */
1985
1986 /* attributes */
1987 if (attrib_in[0]) {
1988 attrib = PyDict_New();
1989 if (!attrib)
1990 return;
1991 while (attrib_in[0] && attrib_in[1]) {
1992 PyObject* key = makeuniversal(self, attrib_in[0]);
1993 PyObject* value = makestring(attrib_in[1], strlen(attrib_in[1]));
1994 if (!key || !value) {
1995 Py_XDECREF(value);
1996 Py_XDECREF(key);
1997 Py_DECREF(attrib);
1998 return;
1999 }
2000 ok = PyDict_SetItem(attrib, key, value);
2001 Py_DECREF(value);
2002 Py_DECREF(key);
2003 if (ok < 0) {
2004 Py_DECREF(attrib);
2005 return;
2006 }
2007 attrib_in += 2;
2008 }
2009 } else {
2010 Py_INCREF(Py_None);
2011 attrib = Py_None;
2012 }
2013
2014 if (TreeBuilder_CheckExact(self->target))
2015 /* shortcut */
2016 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2017 tag, attrib);
2018 else if (self->handle_start)
2019 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
2020 else
2021 res = NULL;
2022
2023 Py_DECREF(tag);
2024 Py_DECREF(attrib);
2025
2026 Py_XDECREF(res);
2027}
2028
2029static void
2030expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2031 int data_len)
2032{
2033 PyObject* data;
2034 PyObject* res;
2035
2036 data = makestring(data_in, data_len);
2037
2038 if (TreeBuilder_CheckExact(self->target))
2039 /* shortcut */
2040 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
2041 else if (self->handle_data)
2042 res = PyObject_CallFunction(self->handle_data, "O", data);
2043 else
2044 res = NULL;
2045
2046 Py_DECREF(data);
2047
2048 Py_XDECREF(res);
2049}
2050
2051static void
2052expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
2053{
2054 PyObject* tag;
2055 PyObject* res = NULL;
2056
2057 if (TreeBuilder_CheckExact(self->target))
2058 /* shortcut */
2059 /* the standard tree builder doesn't look at the end tag */
2060 res = treebuilder_handle_end(
2061 (TreeBuilderObject*) self->target, Py_None
2062 );
2063 else if (self->handle_end) {
2064 tag = makeuniversal(self, tag_in);
2065 if (tag) {
2066 res = PyObject_CallFunction(self->handle_end, "O", tag);
2067 Py_DECREF(tag);
2068 }
2069 }
2070
2071 Py_XDECREF(res);
2072}
2073
2074static void
2075expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
2076 const XML_Char *uri)
2077{
2078 treebuilder_handle_namespace(
2079 (TreeBuilderObject*) self->target, 1, prefix, uri
2080 );
2081}
2082
2083static void
2084expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
2085{
2086 treebuilder_handle_namespace(
2087 (TreeBuilderObject*) self->target, 0, NULL, NULL
2088 );
2089}
2090
2091static void
2092expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
2093{
2094 PyObject* comment;
2095 PyObject* res;
2096
2097 if (self->handle_comment) {
2098 comment = makestring(comment_in, strlen(comment_in));
2099 if (comment) {
2100 res = PyObject_CallFunction(self->handle_comment, "O", comment);
2101 Py_XDECREF(res);
2102 Py_DECREF(comment);
2103 }
2104 }
2105}
2106
2107static void
2108expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
2109 const XML_Char* data_in)
2110{
2111 PyObject* target;
2112 PyObject* data;
2113 PyObject* res;
2114
2115 if (self->handle_pi) {
2116 target = makestring(target_in, strlen(target_in));
2117 data = makestring(data_in, strlen(data_in));
2118 if (target && data) {
2119 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
2120 Py_XDECREF(res);
2121 Py_DECREF(data);
2122 Py_DECREF(target);
2123 } else {
2124 Py_XDECREF(data);
2125 Py_XDECREF(target);
2126 }
2127 }
2128}
2129
2130#if defined(Py_USING_UNICODE)
2131static int
2132expat_unknown_encoding_handler(XMLParserObject *self, const XML_Char *name,
2133 XML_Encoding *info)
2134{
2135 PyObject* u;
2136 Py_UNICODE* p;
2137 unsigned char s[256];
2138 int i;
2139
2140 memset(info, 0, sizeof(XML_Encoding));
2141
2142 for (i = 0; i < 256; i++)
2143 s[i] = i;
2144
2145 u = PyUnicode_Decode(s, 256, name, "replace");
2146 if (!u)
2147 return XML_STATUS_ERROR;
2148
2149 if (PyUnicode_GET_SIZE(u) != 256) {
2150 Py_DECREF(u);
2151 return XML_STATUS_ERROR;
2152 }
2153
2154 p = PyUnicode_AS_UNICODE(u);
2155
2156 for (i = 0; i < 256; i++) {
2157 if (p[i] != Py_UNICODE_REPLACEMENT_CHARACTER)
2158 info->map[i] = p[i];
2159 else
2160 info->map[i] = -1;
2161 }
2162
2163 Py_DECREF(u);
2164
2165 return XML_STATUS_OK;
2166}
2167#endif
2168
2169/* -------------------------------------------------------------------- */
2170/* constructor and destructor */
2171
2172static PyObject*
2173xmlparser(PyObject* _self, PyObject* args, PyObject* kw)
2174{
2175 XMLParserObject* self;
2176 /* FIXME: does this need to be static? */
2177 static XML_Memory_Handling_Suite memory_handler;
2178
2179 PyObject* target = NULL;
2180 char* encoding = NULL;
2181 static PY_CONST char* kwlist[] = { "target", "encoding", NULL };
2182 if (!PyArg_ParseTupleAndKeywords(args, kw, "|Oz:XMLParser", kwlist,
2183 &target, &encoding))
2184 return NULL;
2185
2186#if defined(USE_PYEXPAT_CAPI)
2187 if (!expat_capi) {
2188 PyErr_SetString(
2189 PyExc_RuntimeError, "cannot load dispatch table from pyexpat"
2190 );
2191 return NULL;
2192 }
2193#endif
2194
2195 self = PyObject_New(XMLParserObject, &XMLParser_Type);
2196 if (self == NULL)
2197 return NULL;
2198
2199 self->entity = PyDict_New();
2200 if (!self->entity) {
2201 PyObject_Del(self);
2202 return NULL; /* FIXME: cleanup on error */
2203 }
2204
2205 self->names = PyDict_New();
2206 if (!self->names) {
2207 PyObject_Del(self);
2208 return NULL; /* FIXME: cleanup on error */
2209 }
2210
2211 memory_handler.malloc_fcn = PyObject_Malloc;
2212 memory_handler.realloc_fcn = PyObject_Realloc;
2213 memory_handler.free_fcn = PyObject_Free;
2214
2215 self->parser = EXPAT(ParserCreate_MM)(encoding, &memory_handler, "}");
2216 if (!self->parser) {
2217 PyErr_NoMemory();
2218 return NULL; /* FIXME: cleanup on error */
2219 }
2220
2221 /* setup target handlers */
2222 if (!target) {
2223 target = treebuilder_new();
2224 if (!target) {
2225 PyObject_Del(self);
2226 return NULL; /* FIXME: cleanup on error */
2227 }
2228 } else
2229 Py_INCREF(target);
2230 self->target = target;
2231
2232 self->handle_xml = PyObject_GetAttrString(target, "xml");
2233 self->handle_start = PyObject_GetAttrString(target, "start");
2234 self->handle_data = PyObject_GetAttrString(target, "data");
2235 self->handle_end = PyObject_GetAttrString(target, "end");
2236 self->handle_comment = PyObject_GetAttrString(target, "comment");
2237 self->handle_pi = PyObject_GetAttrString(target, "pi");
2238
2239 PyErr_Clear();
2240
2241 /* configure parser */
2242 EXPAT(SetUserData)(self->parser, self);
2243 EXPAT(SetElementHandler)(
2244 self->parser,
2245 (XML_StartElementHandler) expat_start_handler,
2246 (XML_EndElementHandler) expat_end_handler
2247 );
2248 EXPAT(SetDefaultHandlerExpand)(
2249 self->parser,
2250 (XML_DefaultHandler) expat_default_handler
2251 );
2252 EXPAT(SetCharacterDataHandler)(
2253 self->parser,
2254 (XML_CharacterDataHandler) expat_data_handler
2255 );
2256 if (self->handle_comment)
2257 EXPAT(SetCommentHandler)(
2258 self->parser,
2259 (XML_CommentHandler) expat_comment_handler
2260 );
2261 if (self->handle_pi)
2262 EXPAT(SetProcessingInstructionHandler)(
2263 self->parser,
2264 (XML_ProcessingInstructionHandler) expat_pi_handler
2265 );
2266#if defined(Py_USING_UNICODE)
2267 EXPAT(SetUnknownEncodingHandler)(
2268 self->parser,
2269 (XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL
2270 );
2271#endif
2272
2273 ALLOC(sizeof(XMLParserObject), "create expatparser");
2274
2275 return (PyObject*) self;
2276}
2277
2278static void
2279xmlparser_dealloc(XMLParserObject* self)
2280{
2281 EXPAT(ParserFree)(self->parser);
2282
2283 Py_XDECREF(self->handle_pi);
2284 Py_XDECREF(self->handle_comment);
2285 Py_XDECREF(self->handle_end);
2286 Py_XDECREF(self->handle_data);
2287 Py_XDECREF(self->handle_start);
2288 Py_XDECREF(self->handle_xml);
2289
2290 Py_DECREF(self->target);
2291 Py_DECREF(self->entity);
2292 Py_DECREF(self->names);
2293
2294 RELEASE(sizeof(XMLParserObject), "destroy expatparser");
2295
2296 PyObject_Del(self);
2297}
2298
2299/* -------------------------------------------------------------------- */
2300/* methods (in alphabetical order) */
2301
2302LOCAL(PyObject*)
2303expat_parse(XMLParserObject* self, char* data, int data_len, int final)
2304{
2305 int ok;
2306
2307 ok = EXPAT(Parse)(self->parser, data, data_len, final);
2308
2309 if (PyErr_Occurred())
2310 return NULL;
2311
2312 if (!ok) {
2313 PyErr_Format(
2314 PyExc_SyntaxError, "%s: line %d, column %d",
2315 EXPAT(ErrorString)(EXPAT(GetErrorCode)(self->parser)),
2316 EXPAT(GetErrorLineNumber)(self->parser),
2317 EXPAT(GetErrorColumnNumber)(self->parser)
2318 );
2319 return NULL;
2320 }
2321
2322 Py_RETURN_NONE;
2323}
2324
2325static PyObject*
2326xmlparser_close(XMLParserObject* self, PyObject* args)
2327{
2328 /* end feeding data to parser */
2329
2330 PyObject* res;
2331 if (!PyArg_ParseTuple(args, ":close"))
2332 return NULL;
2333
2334 res = expat_parse(self, "", 0, 1);
2335
2336 if (res && TreeBuilder_CheckExact(self->target)) {
2337 Py_DECREF(res);
2338 return treebuilder_done((TreeBuilderObject*) self->target);
2339 }
2340
2341 return res;
2342}
2343
2344static PyObject*
2345xmlparser_feed(XMLParserObject* self, PyObject* args)
2346{
2347 /* feed data to parser */
2348
2349 char* data;
2350 int data_len;
2351 if (!PyArg_ParseTuple(args, "s#:feed", &data, &data_len))
2352 return NULL;
2353
2354 return expat_parse(self, data, data_len, 0);
2355}
2356
2357static PyObject*
2358xmlparser_parse(XMLParserObject* self, PyObject* args)
2359{
2360 /* (internal) parse until end of input stream */
2361
2362 PyObject* reader;
2363 PyObject* buffer;
2364 PyObject* res;
2365
2366 PyObject* fileobj;
2367 if (!PyArg_ParseTuple(args, "O:_parse", &fileobj))
2368 return NULL;
2369
2370 reader = PyObject_GetAttrString(fileobj, "read");
2371 if (!reader)
2372 return NULL;
2373
2374 /* read from open file object */
2375 for (;;) {
2376
2377 buffer = PyObject_CallFunction(reader, "i", 64*1024);
2378
2379 if (!buffer) {
2380 /* read failed (e.g. due to KeyboardInterrupt) */
2381 Py_DECREF(reader);
2382 return NULL;
2383 }
2384
2385 if (!PyString_CheckExact(buffer) || PyString_GET_SIZE(buffer) == 0) {
2386 Py_DECREF(buffer);
2387 break;
2388 }
2389
2390 res = expat_parse(
2391 self, PyString_AS_STRING(buffer), PyString_GET_SIZE(buffer), 0
2392 );
2393
2394 Py_DECREF(buffer);
2395
2396 if (!res) {
2397 Py_DECREF(reader);
2398 return NULL;
2399 }
2400 Py_DECREF(res);
2401
2402 }
2403
2404 Py_DECREF(reader);
2405
2406 res = expat_parse(self, "", 0, 1);
2407
2408 if (res && TreeBuilder_CheckExact(self->target)) {
2409 Py_DECREF(res);
2410 return treebuilder_done((TreeBuilderObject*) self->target);
2411 }
2412
2413 return res;
2414}
2415
2416static PyObject*
2417xmlparser_setevents(XMLParserObject* self, PyObject* args)
2418{
2419 /* activate element event reporting */
2420
2421 int i;
2422 TreeBuilderObject* target;
2423
2424 PyObject* events; /* event collector */
2425 PyObject* event_set = Py_None;
2426 if (!PyArg_ParseTuple(args, "O!|O:_setevents", &PyList_Type, &events,
2427 &event_set))
2428 return NULL;
2429
2430 if (!TreeBuilder_CheckExact(self->target)) {
2431 PyErr_SetString(
2432 PyExc_TypeError,
2433 "event handling only supported for cElementTree.Treebuilder "
2434 "targets"
2435 );
2436 return NULL;
2437 }
2438
2439 target = (TreeBuilderObject*) self->target;
2440
2441 Py_INCREF(events);
2442 Py_XDECREF(target->events);
2443 target->events = events;
2444
2445 /* clear out existing events */
2446 Py_XDECREF(target->start_event_obj); target->start_event_obj = NULL;
2447 Py_XDECREF(target->end_event_obj); target->end_event_obj = NULL;
2448 Py_XDECREF(target->start_ns_event_obj); target->start_ns_event_obj = NULL;
2449 Py_XDECREF(target->end_ns_event_obj); target->end_ns_event_obj = NULL;
2450
2451 if (event_set == Py_None) {
2452 /* default is "end" only */
2453 target->end_event_obj = PyString_FromString("end");
2454 Py_RETURN_NONE;
2455 }
2456
2457 if (!PyTuple_Check(event_set)) /* FIXME: handle arbitrary sequences */
2458 goto error;
2459
2460 for (i = 0; i < PyTuple_GET_SIZE(event_set); i++) {
2461 PyObject* item = PyTuple_GET_ITEM(event_set, i);
2462 char* event;
2463 if (!PyString_Check(item))
2464 goto error;
2465 event = PyString_AS_STRING(item);
2466 if (strcmp(event, "start") == 0) {
2467 Py_INCREF(item);
2468 target->start_event_obj = item;
2469 } else if (strcmp(event, "end") == 0) {
2470 Py_INCREF(item);
2471 Py_XDECREF(target->end_event_obj);
2472 target->end_event_obj = item;
2473 } else if (strcmp(event, "start-ns") == 0) {
2474 Py_INCREF(item);
2475 Py_XDECREF(target->start_ns_event_obj);
2476 target->start_ns_event_obj = item;
2477 EXPAT(SetNamespaceDeclHandler)(
2478 self->parser,
2479 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2480 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2481 );
2482 } else if (strcmp(event, "end-ns") == 0) {
2483 Py_INCREF(item);
2484 Py_XDECREF(target->end_ns_event_obj);
2485 target->end_ns_event_obj = item;
2486 EXPAT(SetNamespaceDeclHandler)(
2487 self->parser,
2488 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2489 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2490 );
2491 } else {
2492 PyErr_Format(
2493 PyExc_ValueError,
2494 "unknown event '%s'", event
2495 );
2496 return NULL;
2497 }
2498 }
2499
2500 Py_RETURN_NONE;
2501
2502 error:
2503 PyErr_SetString(
2504 PyExc_TypeError,
2505 "invalid event tuple"
2506 );
2507 return NULL;
2508}
2509
2510static PyMethodDef xmlparser_methods[] = {
2511 {"feed", (PyCFunction) xmlparser_feed, METH_VARARGS},
2512 {"close", (PyCFunction) xmlparser_close, METH_VARARGS},
2513 {"_parse", (PyCFunction) xmlparser_parse, METH_VARARGS},
2514 {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
2515 {NULL, NULL}
2516};
2517
2518static PyObject*
2519xmlparser_getattr(XMLParserObject* self, char* name)
2520{
2521 PyObject* res;
2522
2523 res = Py_FindMethod(xmlparser_methods, (PyObject*) self, name);
2524 if (res)
2525 return res;
2526
2527 PyErr_Clear();
2528
2529 if (strcmp(name, "entity") == 0)
2530 res = self->entity;
2531 else if (strcmp(name, "target") == 0)
2532 res = self->target;
2533 else if (strcmp(name, "version") == 0) {
2534 char buffer[100];
2535 sprintf(buffer, "Expat %d.%d.%d", XML_MAJOR_VERSION,
2536 XML_MINOR_VERSION, XML_MICRO_VERSION);
2537 return PyString_FromString(buffer);
2538 } else {
2539 PyErr_SetString(PyExc_AttributeError, name);
2540 return NULL;
2541 }
2542
2543 Py_INCREF(res);
2544 return res;
2545}
2546
2547statichere PyTypeObject XMLParser_Type = {
2548 PyObject_HEAD_INIT(NULL)
2549 0, "XMLParser", sizeof(XMLParserObject), 0,
2550 /* methods */
2551 (destructor)xmlparser_dealloc, /* tp_dealloc */
2552 0, /* tp_print */
2553 (getattrfunc)xmlparser_getattr, /* tp_getattr */
2554};
2555
2556#endif
2557
2558/* ==================================================================== */
2559/* python module interface */
2560
2561static PyMethodDef _functions[] = {
2562 {"Element", (PyCFunction) element, METH_VARARGS|METH_KEYWORDS},
2563 {"SubElement", (PyCFunction) subelement, METH_VARARGS|METH_KEYWORDS},
2564 {"TreeBuilder", (PyCFunction) treebuilder, METH_VARARGS},
2565#if defined(USE_EXPAT)
2566 {"XMLParser", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
2567 {"XMLTreeBuilder", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
2568#endif
2569 {NULL, NULL}
2570};
2571
2572DL_EXPORT(void)
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002573init_elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002574{
2575 PyObject* m;
2576 PyObject* g;
2577 char* bootstrap;
2578#if defined(USE_PYEXPAT_CAPI)
2579 struct PyExpat_CAPI* capi;
2580#endif
2581
2582 /* Patch object type */
2583 Element_Type.ob_type = TreeBuilder_Type.ob_type = &PyType_Type;
2584#if defined(USE_EXPAT)
2585 XMLParser_Type.ob_type = &PyType_Type;
2586#endif
2587
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002588 m = Py_InitModule("_elementtree", _functions);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002589
2590 /* python glue code */
2591
2592 g = PyDict_New();
2593
2594 PyDict_SetItemString(g, "__builtins__", PyEval_GetBuiltins());
2595
2596 bootstrap = (
2597
2598#if (PY_VERSION_HEX >= 0x02020000 && PY_VERSION_HEX < 0x02030000)
2599 "from __future__ import generators\n" /* enable yield under 2.2 */
2600#endif
2601
2602 "from copy import copy, deepcopy\n"
2603
2604 "try:\n"
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002605 " from xml.etree import ElementTree\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002606 "except ImportError:\n"
2607 " import ElementTree\n"
2608 "ET = ElementTree\n"
2609 "del ElementTree\n"
2610
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002611 "import _elementtree as cElementTree\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002612
2613 "try:\n" /* check if copy works as is */
2614 " copy(cElementTree.Element('x'))\n"
2615 "except:\n"
2616 " def copyelement(elem):\n"
2617 " return elem\n"
2618
2619 "def Comment(text=None):\n" /* public */
2620 " element = cElementTree.Element(ET.Comment)\n"
2621 " element.text = text\n"
2622 " return element\n"
2623 "cElementTree.Comment = Comment\n"
2624
2625 "class ElementTree(ET.ElementTree):\n" /* public */
2626 " def parse(self, source, parser=None):\n"
2627 " if not hasattr(source, 'read'):\n"
2628 " source = open(source, 'rb')\n"
2629 " if parser is not None:\n"
2630 " while 1:\n"
2631 " data = source.read(65536)\n"
2632 " if not data:\n"
2633 " break\n"
2634 " parser.feed(data)\n"
2635 " self._root = parser.close()\n"
2636 " else:\n"
2637 " parser = cElementTree.XMLParser()\n"
2638 " self._root = parser._parse(source)\n"
2639 " return self._root\n"
2640 "cElementTree.ElementTree = ElementTree\n"
2641
2642 "def getiterator(node, tag=None):\n" /* helper */
2643 " if tag == '*':\n"
2644 " tag = None\n"
2645#if (PY_VERSION_HEX < 0x02020000)
2646 " nodes = []\n" /* 2.1 doesn't have yield */
2647 " if tag is None or node.tag == tag:\n"
2648 " nodes.append(node)\n"
2649 " for node in node:\n"
2650 " nodes.extend(getiterator(node, tag))\n"
2651 " return nodes\n"
2652#else
2653 " if tag is None or node.tag == tag:\n"
2654 " yield node\n"
2655 " for node in node:\n"
2656 " for node in getiterator(node, tag):\n"
2657 " yield node\n"
2658#endif
2659
2660 "def parse(source, parser=None):\n" /* public */
2661 " tree = ElementTree()\n"
2662 " tree.parse(source, parser)\n"
2663 " return tree\n"
2664 "cElementTree.parse = parse\n"
2665
2666#if (PY_VERSION_HEX < 0x02020000)
2667 "if hasattr(ET, 'iterparse'):\n"
2668 " cElementTree.iterparse = ET.iterparse\n" /* delegate on 2.1 */
2669#else
2670 "class iterparse(object):\n"
2671 " root = None\n"
2672 " def __init__(self, file, events=None):\n"
2673 " if not hasattr(file, 'read'):\n"
2674 " file = open(file, 'rb')\n"
2675 " self._file = file\n"
2676 " self._events = events\n"
2677 " def __iter__(self):\n"
2678 " events = []\n"
2679 " b = cElementTree.TreeBuilder()\n"
2680 " p = cElementTree.XMLParser(b)\n"
2681 " p._setevents(events, self._events)\n"
2682 " while 1:\n"
2683 " data = self._file.read(16384)\n"
2684 " if not data:\n"
2685 " break\n"
2686 " p.feed(data)\n"
2687 " for event in events:\n"
2688 " yield event\n"
2689 " del events[:]\n"
2690 " root = p.close()\n"
2691 " for event in events:\n"
2692 " yield event\n"
2693 " self.root = root\n"
2694 "cElementTree.iterparse = iterparse\n"
2695#endif
2696
2697 "def PI(target, text=None):\n" /* public */
2698 " element = cElementTree.Element(ET.ProcessingInstruction)\n"
2699 " element.text = target\n"
2700 " if text:\n"
2701 " element.text = element.text + ' ' + text\n"
2702 " return element\n"
2703
2704 " elem = cElementTree.Element(ET.PI)\n"
2705 " elem.text = text\n"
2706 " return elem\n"
2707 "cElementTree.PI = cElementTree.ProcessingInstruction = PI\n"
2708
2709 "def XML(text):\n" /* public */
2710 " parser = cElementTree.XMLParser()\n"
2711 " parser.feed(text)\n"
2712 " return parser.close()\n"
2713 "cElementTree.XML = cElementTree.fromstring = XML\n"
2714
2715 "def XMLID(text):\n" /* public */
2716 " tree = XML(text)\n"
2717 " ids = {}\n"
2718 " for elem in tree.getiterator():\n"
2719 " id = elem.get('id')\n"
2720 " if id:\n"
2721 " ids[id] = elem\n"
2722 " return tree, ids\n"
2723 "cElementTree.XMLID = XMLID\n"
2724
2725 "cElementTree.dump = ET.dump\n"
2726 "cElementTree.ElementPath = ElementPath = ET.ElementPath\n"
2727 "cElementTree.iselement = ET.iselement\n"
2728 "cElementTree.QName = ET.QName\n"
2729 "cElementTree.tostring = ET.tostring\n"
2730 "cElementTree.VERSION = '" VERSION "'\n"
2731 "cElementTree.__version__ = '" VERSION "'\n"
2732 "cElementTree.XMLParserError = SyntaxError\n"
2733
2734 );
2735
2736 PyRun_String(bootstrap, Py_file_input, g, NULL);
2737
2738 elementpath_obj = PyDict_GetItemString(g, "ElementPath");
2739
2740 elementtree_copyelement_obj = PyDict_GetItemString(g, "copyelement");
2741 if (elementtree_copyelement_obj) {
2742 /* reduce hack needed; enable reduce method */
2743 PyMethodDef* mp;
2744 for (mp = element_methods; mp->ml_name; mp++)
2745 if (mp->ml_meth == (PyCFunction) element_reduce) {
2746 mp->ml_name = "__reduce__";
2747 break;
2748 }
2749 } else
2750 PyErr_Clear();
2751 elementtree_deepcopy_obj = PyDict_GetItemString(g, "deepcopy");
2752 elementtree_getiterator_obj = PyDict_GetItemString(g, "getiterator");
2753
2754#if defined(USE_PYEXPAT_CAPI)
2755 /* link against pyexpat, if possible */
2756 capi = PyCObject_Import("pyexpat", "expat_CAPI");
2757 if (capi &&
2758 strcmp(capi->magic, PyExpat_CAPI_MAGIC) == 0 &&
2759 capi->size <= sizeof(*expat_capi) &&
2760 capi->MAJOR_VERSION == XML_MAJOR_VERSION &&
2761 capi->MINOR_VERSION == XML_MINOR_VERSION &&
2762 capi->MICRO_VERSION == XML_MICRO_VERSION)
2763 expat_capi = capi;
2764 else
2765 expat_capi = NULL;
2766#endif
2767
2768}