blob: 6348f521f889c05f3ce843b5ee3becc188d6122a [file] [log] [blame]
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001/*
2 * ElementTree
3 * $Id: /work/modules/celementtree/cElementTree.c 1128 2005-12-16T21:57:13.668520Z Fredrik $
4 *
5 * elementtree accelerator
6 *
7 * History:
8 * 1999-06-20 fl created (as part of sgmlop)
9 * 2001-05-29 fl effdom edition
10 * 2001-06-05 fl backported to unix; fixed bogus free in clear
11 * 2001-07-10 fl added findall helper
12 * 2003-02-27 fl elementtree edition (alpha)
13 * 2004-06-03 fl updates for elementtree 1.2
14 * 2005-01-05 fl added universal name cache, Element/SubElement factories
15 * 2005-01-06 fl moved python helpers into C module; removed 1.5.2 support
16 * 2005-01-07 fl added 2.1 support; work around broken __copy__ in 2.3
17 * 2005-01-08 fl added makeelement method; fixed path support
18 * 2005-01-10 fl optimized memory usage
19 * 2005-01-11 fl first public release (cElementTree 0.8)
20 * 2005-01-12 fl split element object into base and extras
21 * 2005-01-13 fl use tagged pointers for tail/text (cElementTree 0.9)
22 * 2005-01-17 fl added treebuilder close method
23 * 2005-01-17 fl fixed crash in getchildren
24 * 2005-01-18 fl removed observer api, added iterparse (cElementTree 0.9.3)
25 * 2005-01-23 fl revised iterparse api; added namespace event support (0.9.8)
26 * 2005-01-26 fl added VERSION module property (cElementTree 1.0)
27 * 2005-01-28 fl added remove method (1.0.1)
28 * 2005-03-01 fl added iselement function; fixed makeelement aliasing (1.0.2)
29 * 2005-03-13 fl export Comment and ProcessingInstruction/PI helpers
30 * 2005-03-26 fl added Comment and PI support to XMLParser
31 * 2005-03-27 fl event optimizations; complain about bogus events
32 * 2005-08-08 fl fixed read error handling in parse
33 * 2005-08-11 fl added runtime test for copy workaround (1.0.3)
34 * 2005-12-13 fl added expat_capi support (for xml.etree) (1.0.4)
35 * 2005-12-16 fl added support for non-standard encodings
36 *
37 * Copyright (c) 1999-2005 by Secret Labs AB. All rights reserved.
38 * Copyright (c) 1999-2005 by Fredrik Lundh.
39 *
40 * info@pythonware.com
41 * http://www.pythonware.com
42 */
43
Fredrik Lundh6d52b552005-12-16 22:06:43 +000044/* Licensed to PSF under a Contributor Agreement. */
45/* See http://www.python.org/2.4/license for licensing details. */
46
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000047#include "Python.h"
48
49#define VERSION "1.0.5"
50
51/* -------------------------------------------------------------------- */
52/* configuration */
53
54/* Leave defined to include the expat-based XMLParser type */
55#define USE_EXPAT
56
57/* Define to to all expat calls via pyexpat's embedded expat library */
58/* #define USE_PYEXPAT_CAPI */
59
60/* An element can hold this many children without extra memory
61 allocations. */
62#define STATIC_CHILDREN 4
63
64/* For best performance, chose a value so that 80-90% of all nodes
65 have no more than the given number of children. Set this to zero
66 to minimize the size of the element structure itself (this only
67 helps if you have lots of leaf nodes with attributes). */
68
69/* Also note that pymalloc always allocates blocks in multiples of
70 eight bytes. For the current version of cElementTree, this means
71 that the number of children should be an even number, at least on
72 32-bit platforms. */
73
74/* -------------------------------------------------------------------- */
75
76#if 0
77static int memory = 0;
78#define ALLOC(size, comment)\
79do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
80#define RELEASE(size, comment)\
81do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
82#else
83#define ALLOC(size, comment)
84#define RELEASE(size, comment)
85#endif
86
87/* compiler tweaks */
88#if defined(_MSC_VER)
89#define LOCAL(type) static __inline type __fastcall
90#else
91#define LOCAL(type) static type
92#endif
93
94/* compatibility macros */
95#if (PY_VERSION_HEX < 0x02040000)
96#define PyDict_CheckExact PyDict_Check
97#if (PY_VERSION_HEX < 0x02020000)
98#define PyList_CheckExact PyList_Check
99#define PyString_CheckExact PyString_Check
100#if (PY_VERSION_HEX >= 0x01060000)
101#define Py_USING_UNICODE /* always enabled for 2.0 and 2.1 */
102#endif
103#endif
104#endif
105
106#if (PY_VERSION_HEX >= 0x02050000)
107#define PY_CONST const /* 2.5 adds const to some API:s */
108#else
109#define PY_CONST
110#endif
111
112#if !defined(Py_RETURN_NONE)
113#define Py_RETURN_NONE return Py_INCREF(Py_None), Py_None
114#endif
115
116/* macros used to store 'join' flags in string object pointers. note
117 that all use of text and tail as object pointers must be wrapped in
118 JOIN_OBJ. see comments in the ElementObject definition for more
119 info. */
120#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
121#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
122#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~1))
123
124/* glue functions (see the init function for details) */
125static PyObject* elementtree_copyelement_obj;
126static PyObject* elementtree_deepcopy_obj;
127static PyObject* elementtree_getiterator_obj;
128static PyObject* elementpath_obj;
129
130/* helpers */
131
132LOCAL(PyObject*)
133deepcopy(PyObject* object, PyObject* memo)
134{
135 /* do a deep copy of the given object */
136
137 PyObject* args;
138 PyObject* result;
139
140 if (!elementtree_deepcopy_obj) {
141 PyErr_SetString(
142 PyExc_RuntimeError,
143 "deepcopy helper not found"
144 );
145 return NULL;
146 }
147
148 args = PyTuple_New(2);
149 Py_INCREF(object); PyTuple_SET_ITEM(args, 0, (PyObject*) object);
150 Py_INCREF(memo); PyTuple_SET_ITEM(args, 1, (PyObject*) memo);
151
152 result = PyObject_CallObject(elementtree_deepcopy_obj, args);
153
154 Py_DECREF(args);
155
156 return result;
157}
158
159LOCAL(PyObject*)
160list_join(PyObject* list)
161{
162 /* join list elements (destroying the list in the process) */
163
164 PyObject* joiner;
165 PyObject* function;
166 PyObject* args;
167 PyObject* result;
168
169 switch (PyList_GET_SIZE(list)) {
170 case 0:
171 Py_DECREF(list);
172 return PyString_FromString("");
173 case 1:
174 result = PyList_GET_ITEM(list, 0);
175 Py_INCREF(result);
176 Py_DECREF(list);
177 return result;
178 }
179
180 /* two or more elements: slice out a suitable separator from the
181 first member, and use that to join the entire list */
182
183 joiner = PySequence_GetSlice(PyList_GET_ITEM(list, 0), 0, 0);
184 if (!joiner)
185 return NULL;
186
187 function = PyObject_GetAttrString(joiner, "join");
188 if (!function) {
189 Py_DECREF(joiner);
190 return NULL;
191 }
192
193 args = PyTuple_New(1);
194 PyTuple_SET_ITEM(args, 0, list);
195
196 result = PyObject_CallObject(function, args);
197
198 Py_DECREF(args); /* also removes list */
199 Py_DECREF(function);
200 Py_DECREF(joiner);
201
202 return result;
203}
204
205#if (PY_VERSION_HEX < 0x02020000)
206LOCAL(int)
207PyDict_Update(PyObject* dict, PyObject* other)
208{
209 /* PyDict_Update emulation for 2.1 and earlier */
210
211 PyObject* res;
212
213 res = PyObject_CallMethod(dict, "update", "O", other);
214 if (!res)
215 return -1;
216
217 Py_DECREF(res);
218 return 0;
219}
220#endif
221
222/* -------------------------------------------------------------------- */
223/* the element type */
224
225typedef struct {
226
227 /* attributes (a dictionary object), or None if no attributes */
228 PyObject* attrib;
229
230 /* child elements */
231 int length; /* actual number of items */
232 int allocated; /* allocated items */
233
234 /* this either points to _children or to a malloced buffer */
235 PyObject* *children;
236
237 PyObject* _children[STATIC_CHILDREN];
238
239} ElementObjectExtra;
240
241typedef struct {
242 PyObject_HEAD
243
244 /* element tag (a string). */
245 PyObject* tag;
246
247 /* text before first child. note that this is a tagged pointer;
248 use JOIN_OBJ to get the object pointer. the join flag is used
249 to distinguish lists created by the tree builder from lists
250 assigned to the attribute by application code; the former
251 should be joined before being returned to the user, the latter
252 should be left intact. */
253 PyObject* text;
254
255 /* text after this element, in parent. note that this is a tagged
256 pointer; use JOIN_OBJ to get the object pointer. */
257 PyObject* tail;
258
259 ElementObjectExtra* extra;
260
261} ElementObject;
262
263staticforward PyTypeObject Element_Type;
264
265#define Element_CheckExact(op) ((op)->ob_type == &Element_Type)
266
267/* -------------------------------------------------------------------- */
268/* element constructor and destructor */
269
270LOCAL(int)
271element_new_extra(ElementObject* self, PyObject* attrib)
272{
273 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
274 if (!self->extra)
275 return -1;
276
277 if (!attrib)
278 attrib = Py_None;
279
280 Py_INCREF(attrib);
281 self->extra->attrib = attrib;
282
283 self->extra->length = 0;
284 self->extra->allocated = STATIC_CHILDREN;
285 self->extra->children = self->extra->_children;
286
287 return 0;
288}
289
290LOCAL(void)
291element_dealloc_extra(ElementObject* self)
292{
293 int i;
294
295 Py_DECREF(self->extra->attrib);
296
297 for (i = 0; i < self->extra->length; i++)
298 Py_DECREF(self->extra->children[i]);
299
300 if (self->extra->children != self->extra->_children)
301 PyObject_Free(self->extra->children);
302
303 PyObject_Free(self->extra);
304}
305
306LOCAL(PyObject*)
307element_new(PyObject* tag, PyObject* attrib)
308{
309 ElementObject* self;
310
311 self = PyObject_New(ElementObject, &Element_Type);
312 if (self == NULL)
313 return NULL;
314
315 /* use None for empty dictionaries */
316 if (PyDict_CheckExact(attrib) && !PyDict_Size(attrib))
317 attrib = Py_None;
318
319 self->extra = NULL;
320
321 if (attrib != Py_None) {
322
323 if (element_new_extra(self, attrib) < 0)
324 return NULL;
325
326 self->extra->length = 0;
327 self->extra->allocated = STATIC_CHILDREN;
328 self->extra->children = self->extra->_children;
329
330 }
331
332 Py_INCREF(tag);
333 self->tag = tag;
334
335 Py_INCREF(Py_None);
336 self->text = Py_None;
337
338 Py_INCREF(Py_None);
339 self->tail = Py_None;
340
341 ALLOC(sizeof(ElementObject), "create element");
342
343 return (PyObject*) self;
344}
345
346LOCAL(int)
347element_resize(ElementObject* self, int extra)
348{
349 int size;
350 PyObject* *children;
351
352 /* make sure self->children can hold the given number of extra
353 elements. set an exception and return -1 if allocation failed */
354
355 if (!self->extra)
356 element_new_extra(self, NULL);
357
358 size = self->extra->length + extra;
359
360 if (size > self->extra->allocated) {
361 /* use Python 2.4's list growth strategy */
362 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
363 if (self->extra->children != self->extra->_children) {
364 children = PyObject_Realloc(self->extra->children,
365 size * sizeof(PyObject*));
366 if (!children)
367 goto nomemory;
368 } else {
369 children = PyObject_Malloc(size * sizeof(PyObject*));
370 if (!children)
371 goto nomemory;
372 /* copy existing children from static area to malloc buffer */
373 memcpy(children, self->extra->children,
374 self->extra->length * sizeof(PyObject*));
375 }
376 self->extra->children = children;
377 self->extra->allocated = size;
378 }
379
380 return 0;
381
382 nomemory:
383 PyErr_NoMemory();
384 return -1;
385}
386
387LOCAL(int)
388element_add_subelement(ElementObject* self, PyObject* element)
389{
390 /* add a child element to a parent */
391
392 if (element_resize(self, 1) < 0)
393 return -1;
394
395 Py_INCREF(element);
396 self->extra->children[self->extra->length] = element;
397
398 self->extra->length++;
399
400 return 0;
401}
402
403LOCAL(PyObject*)
404element_get_attrib(ElementObject* self)
405{
406 /* return borrowed reference to attrib dictionary */
407 /* note: this function assumes that the extra section exists */
408
409 PyObject* res = self->extra->attrib;
410
411 if (res == Py_None) {
412 /* create missing dictionary */
413 res = PyDict_New();
414 if (!res)
415 return NULL;
416 self->extra->attrib = res;
417 }
418
419 return res;
420}
421
422LOCAL(PyObject*)
423element_get_text(ElementObject* self)
424{
425 /* return borrowed reference to text attribute */
426
427 PyObject* res = self->text;
428
429 if (JOIN_GET(res)) {
430 res = JOIN_OBJ(res);
431 if (PyList_CheckExact(res)) {
432 res = list_join(res);
433 if (!res)
434 return NULL;
435 self->text = res;
436 }
437 }
438
439 return res;
440}
441
442LOCAL(PyObject*)
443element_get_tail(ElementObject* self)
444{
445 /* return borrowed reference to text attribute */
446
447 PyObject* res = self->tail;
448
449 if (JOIN_GET(res)) {
450 res = JOIN_OBJ(res);
451 if (PyList_CheckExact(res)) {
452 res = list_join(res);
453 if (!res)
454 return NULL;
455 self->tail = res;
456 }
457 }
458
459 return res;
460}
461
462static PyObject*
463element(PyObject* self, PyObject* args, PyObject* kw)
464{
465 PyObject* elem;
466
467 PyObject* tag;
468 PyObject* attrib = NULL;
469 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag,
470 &PyDict_Type, &attrib))
471 return NULL;
472
473 if (attrib || kw) {
474 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
475 if (!attrib)
476 return NULL;
477 if (kw)
478 PyDict_Update(attrib, kw);
479 } else {
480 Py_INCREF(Py_None);
481 attrib = Py_None;
482 }
483
484 elem = element_new(tag, attrib);
485
486 Py_DECREF(attrib);
487
488 return elem;
489}
490
491static PyObject*
492subelement(PyObject* self, PyObject* args, PyObject* kw)
493{
494 PyObject* elem;
495
496 ElementObject* parent;
497 PyObject* tag;
498 PyObject* attrib = NULL;
499 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
500 &Element_Type, &parent, &tag,
501 &PyDict_Type, &attrib))
502 return NULL;
503
504 if (attrib || kw) {
505 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
506 if (!attrib)
507 return NULL;
508 if (kw)
509 PyDict_Update(attrib, kw);
510 } else {
511 Py_INCREF(Py_None);
512 attrib = Py_None;
513 }
514
515 elem = element_new(tag, attrib);
516
517 Py_DECREF(attrib);
518
519 if (element_add_subelement(parent, elem) < 0)
520 return NULL;
521
522 return elem;
523}
524
525static void
526element_dealloc(ElementObject* self)
527{
528 if (self->extra)
529 element_dealloc_extra(self);
530
531 /* discard attributes */
532 Py_DECREF(self->tag);
533 Py_DECREF(JOIN_OBJ(self->text));
534 Py_DECREF(JOIN_OBJ(self->tail));
535
536 RELEASE(sizeof(ElementObject), "destroy element");
537
538 PyObject_Del(self);
539}
540
541/* -------------------------------------------------------------------- */
542/* methods (in alphabetical order) */
543
544static PyObject*
545element_append(ElementObject* self, PyObject* args)
546{
547 PyObject* element;
548 if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element))
549 return NULL;
550
551 if (element_add_subelement(self, element) < 0)
552 return NULL;
553
554 Py_RETURN_NONE;
555}
556
557static PyObject*
558element_clear(ElementObject* self, PyObject* args)
559{
560 if (!PyArg_ParseTuple(args, ":clear"))
561 return NULL;
562
563 if (self->extra) {
564 element_dealloc_extra(self);
565 self->extra = NULL;
566 }
567
568 Py_INCREF(Py_None);
569 Py_DECREF(JOIN_OBJ(self->text));
570 self->text = Py_None;
571
572 Py_INCREF(Py_None);
573 Py_DECREF(JOIN_OBJ(self->tail));
574 self->tail = Py_None;
575
576 Py_RETURN_NONE;
577}
578
579static PyObject*
580element_copy(ElementObject* self, PyObject* args)
581{
582 int i;
583 ElementObject* element;
584
585 if (!PyArg_ParseTuple(args, ":__copy__"))
586 return NULL;
587
588 element = (ElementObject*) element_new(
589 self->tag, (self->extra) ? self->extra->attrib : Py_None
590 );
591 if (!element)
592 return NULL;
593
594 Py_DECREF(JOIN_OBJ(element->text));
595 element->text = self->text;
596 Py_INCREF(JOIN_OBJ(element->text));
597
598 Py_DECREF(JOIN_OBJ(element->tail));
599 element->tail = self->tail;
600 Py_INCREF(JOIN_OBJ(element->tail));
601
602 if (self->extra) {
603
604 if (element_resize(element, self->extra->length) < 0)
605 return NULL;
606
607 for (i = 0; i < self->extra->length; i++) {
608 Py_INCREF(self->extra->children[i]);
609 element->extra->children[i] = self->extra->children[i];
610 }
611
612 element->extra->length = self->extra->length;
613
614 }
615
616 return (PyObject*) element;
617}
618
619static PyObject*
620element_deepcopy(ElementObject* self, PyObject* args)
621{
622 int i;
623 ElementObject* element;
624 PyObject* tag;
625 PyObject* attrib;
626 PyObject* text;
627 PyObject* tail;
628 PyObject* id;
629
630 PyObject* memo;
631 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
632 return NULL;
633
634 tag = deepcopy(self->tag, memo);
635 if (!tag)
636 return NULL;
637
638 if (self->extra) {
639 attrib = deepcopy(self->extra->attrib, memo);
640 if (!attrib) {
641 Py_DECREF(tag);
642 return NULL;
643 }
644 } else {
645 Py_INCREF(Py_None);
646 attrib = Py_None;
647 }
648
649 element = (ElementObject*) element_new(tag, attrib);
650
651 Py_DECREF(tag);
652 Py_DECREF(attrib);
653
654 if (!element)
655 return NULL;
656
657 text = deepcopy(JOIN_OBJ(self->text), memo);
658 if (!text)
659 goto error;
660 Py_DECREF(element->text);
661 element->text = JOIN_SET(text, JOIN_GET(self->text));
662
663 tail = deepcopy(JOIN_OBJ(self->tail), memo);
664 if (!tail)
665 goto error;
666 Py_DECREF(element->tail);
667 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
668
669 if (self->extra) {
670
671 if (element_resize(element, self->extra->length) < 0)
672 goto error;
673
674 for (i = 0; i < self->extra->length; i++) {
675 PyObject* child = deepcopy(self->extra->children[i], memo);
676 if (!child) {
677 element->extra->length = i;
678 goto error;
679 }
680 element->extra->children[i] = child;
681 }
682
683 element->extra->length = self->extra->length;
684
685 }
686
687 /* add object to memo dictionary (so deepcopy won't visit it again) */
688 id = PyInt_FromLong((Py_uintptr_t) self);
689
690 i = PyDict_SetItem(memo, id, (PyObject*) element);
691
692 Py_DECREF(id);
693
694 if (i < 0)
695 goto error;
696
697 return (PyObject*) element;
698
699 error:
700 Py_DECREF(element);
701 return NULL;
702}
703
704LOCAL(int)
705checkpath(PyObject* tag)
706{
707 int i, check = 1;
708
709 /* check if a tag contains an xpath character */
710
711#define PATHCHAR(ch) (ch == '/' || ch == '*' || ch == '[' || ch == '@')
712
713#if defined(Py_USING_UNICODE)
714 if (PyUnicode_Check(tag)) {
715 Py_UNICODE *p = PyUnicode_AS_UNICODE(tag);
716 for (i = 0; i < PyUnicode_GET_SIZE(tag); i++) {
717 if (p[i] == '{')
718 check = 0;
719 else if (p[i] == '}')
720 check = 1;
721 else if (check && PATHCHAR(p[i]))
722 return 1;
723 }
724 return 0;
725 }
726#endif
727 if (PyString_Check(tag)) {
728 char *p = PyString_AS_STRING(tag);
729 for (i = 0; i < PyString_GET_SIZE(tag); i++) {
730 if (p[i] == '{')
731 check = 0;
732 else if (p[i] == '}')
733 check = 1;
734 else if (check && PATHCHAR(p[i]))
735 return 1;
736 }
737 return 0;
738 }
739
740 return 1; /* unknown type; might be path expression */
741}
742
743static PyObject*
744element_find(ElementObject* self, PyObject* args)
745{
746 int i;
747
748 PyObject* tag;
749 if (!PyArg_ParseTuple(args, "O:find", &tag))
750 return NULL;
751
752 if (checkpath(tag))
753 return PyObject_CallMethod(
754 elementpath_obj, "find", "OO", self, tag
755 );
756
757 if (!self->extra)
758 Py_RETURN_NONE;
759
760 for (i = 0; i < self->extra->length; i++) {
761 PyObject* item = self->extra->children[i];
762 if (Element_CheckExact(item) &&
763 PyObject_Compare(((ElementObject*)item)->tag, tag) == 0) {
764 Py_INCREF(item);
765 return item;
766 }
767 }
768
769 Py_RETURN_NONE;
770}
771
772static PyObject*
773element_findtext(ElementObject* self, PyObject* args)
774{
775 int i;
776
777 PyObject* tag;
778 PyObject* default_value = Py_None;
779 if (!PyArg_ParseTuple(args, "O|O:findtext", &tag, &default_value))
780 return NULL;
781
782 if (checkpath(tag))
783 return PyObject_CallMethod(
784 elementpath_obj, "findtext", "OOO", self, tag, default_value
785 );
786
787 if (!self->extra) {
788 Py_INCREF(default_value);
789 return default_value;
790 }
791
792 for (i = 0; i < self->extra->length; i++) {
793 ElementObject* item = (ElementObject*) self->extra->children[i];
794 if (Element_CheckExact(item) && !PyObject_Compare(item->tag, tag)) {
795 PyObject* text = element_get_text(item);
796 if (text == Py_None)
797 return PyString_FromString("");
798 Py_INCREF(text);
799 return text;
800 }
801 }
802
803 Py_INCREF(default_value);
804 return default_value;
805}
806
807static PyObject*
808element_findall(ElementObject* self, PyObject* args)
809{
810 int i;
811 PyObject* out;
812
813 PyObject* tag;
814 if (!PyArg_ParseTuple(args, "O:findall", &tag))
815 return NULL;
816
817 if (checkpath(tag))
818 return PyObject_CallMethod(
819 elementpath_obj, "findall", "OO", self, tag
820 );
821
822 out = PyList_New(0);
823 if (!out)
824 return NULL;
825
826 if (!self->extra)
827 return out;
828
829 for (i = 0; i < self->extra->length; i++) {
830 PyObject* item = self->extra->children[i];
831 if (Element_CheckExact(item) &&
832 PyObject_Compare(((ElementObject*)item)->tag, tag) == 0) {
833 if (PyList_Append(out, item) < 0) {
834 Py_DECREF(out);
835 return NULL;
836 }
837 }
838 }
839
840 return out;
841}
842
843static PyObject*
844element_get(ElementObject* self, PyObject* args)
845{
846 PyObject* value;
847
848 PyObject* key;
849 PyObject* default_value = Py_None;
850 if (!PyArg_ParseTuple(args, "O|O:get", &key, &default_value))
851 return NULL;
852
853 if (!self->extra || self->extra->attrib == Py_None)
854 value = default_value;
855 else {
856 value = PyDict_GetItem(self->extra->attrib, key);
857 if (!value)
858 value = default_value;
859 }
860
861 Py_INCREF(value);
862 return value;
863}
864
865static PyObject*
866element_getchildren(ElementObject* self, PyObject* args)
867{
868 int i;
869 PyObject* list;
870
871 if (!PyArg_ParseTuple(args, ":getchildren"))
872 return NULL;
873
874 if (!self->extra)
875 return PyList_New(0);
876
877 list = PyList_New(self->extra->length);
878 if (!list)
879 return NULL;
880
881 for (i = 0; i < self->extra->length; i++) {
882 PyObject* item = self->extra->children[i];
883 Py_INCREF(item);
884 PyList_SET_ITEM(list, i, item);
885 }
886
887 return list;
888}
889
890static PyObject*
891element_getiterator(ElementObject* self, PyObject* args)
892{
893 PyObject* result;
894
895 PyObject* tag = Py_None;
896 if (!PyArg_ParseTuple(args, "|O:getiterator", &tag))
897 return NULL;
898
899 if (!elementtree_getiterator_obj) {
900 PyErr_SetString(
901 PyExc_RuntimeError,
902 "getiterator helper not found"
903 );
904 return NULL;
905 }
906
907 args = PyTuple_New(2);
908 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
909 Py_INCREF(tag); PyTuple_SET_ITEM(args, 1, (PyObject*) tag);
910
911 result = PyObject_CallObject(elementtree_getiterator_obj, args);
912
913 Py_DECREF(args);
914
915 return result;
916}
917
918static PyObject*
919element_getitem(ElementObject* self, int index)
920{
921 if (!self->extra || index < 0 || index >= self->extra->length) {
922 PyErr_SetString(
923 PyExc_IndexError,
924 "child index out of range"
925 );
926 return NULL;
927 }
928
929 Py_INCREF(self->extra->children[index]);
930 return self->extra->children[index];
931}
932
933static PyObject*
934element_getslice(ElementObject* self, int start, int end)
935{
936 int i;
937 PyObject* list;
938
939 if (!self->extra)
940 return PyList_New(0);
941
942 /* standard clamping */
943 if (start < 0)
944 start = 0;
945 if (end < 0)
946 end = 0;
947 if (end > self->extra->length)
948 end = self->extra->length;
949 if (start > end)
950 start = end;
951
952 list = PyList_New(end - start);
953 if (!list)
954 return NULL;
955
956 for (i = start; i < end; i++) {
957 PyObject* item = self->extra->children[i];
958 Py_INCREF(item);
959 PyList_SET_ITEM(list, i - start, item);
960 }
961
962 return list;
963}
964
965static PyObject*
966element_insert(ElementObject* self, PyObject* args)
967{
968 int i;
969
970 int index;
971 PyObject* element;
972 if (!PyArg_ParseTuple(args, "iO!:insert", &index,
973 &Element_Type, &element))
974 return NULL;
975
976 if (!self->extra)
977 element_new_extra(self, NULL);
978
979 if (index < 0)
980 index = 0;
981 if (index > self->extra->length)
982 index = self->extra->length;
983
984 if (element_resize(self, 1) < 0)
985 return NULL;
986
987 for (i = self->extra->length; i > index; i--)
988 self->extra->children[i] = self->extra->children[i-1];
989
990 Py_INCREF(element);
991 self->extra->children[index] = element;
992
993 self->extra->length++;
994
995 Py_RETURN_NONE;
996}
997
998static PyObject*
999element_items(ElementObject* self, PyObject* args)
1000{
1001 if (!PyArg_ParseTuple(args, ":items"))
1002 return NULL;
1003
1004 if (!self->extra || self->extra->attrib == Py_None)
1005 return PyList_New(0);
1006
1007 return PyDict_Items(self->extra->attrib);
1008}
1009
1010static PyObject*
1011element_keys(ElementObject* self, PyObject* args)
1012{
1013 if (!PyArg_ParseTuple(args, ":keys"))
1014 return NULL;
1015
1016 if (!self->extra || self->extra->attrib == Py_None)
1017 return PyList_New(0);
1018
1019 return PyDict_Keys(self->extra->attrib);
1020}
1021
1022static int
1023element_length(ElementObject* self)
1024{
1025 if (!self->extra)
1026 return 0;
1027
1028 return self->extra->length;
1029}
1030
1031static PyObject*
1032element_makeelement(PyObject* self, PyObject* args, PyObject* kw)
1033{
1034 PyObject* elem;
1035
1036 PyObject* tag;
1037 PyObject* attrib;
1038 if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib))
1039 return NULL;
1040
1041 attrib = PyDict_Copy(attrib);
1042 if (!attrib)
1043 return NULL;
1044
1045 elem = element_new(tag, attrib);
1046
1047 Py_DECREF(attrib);
1048
1049 return elem;
1050}
1051
1052static PyObject*
1053element_reduce(ElementObject* self, PyObject* args)
1054{
1055 if (!PyArg_ParseTuple(args, ":__reduce__"))
1056 return NULL;
1057
1058 /* Hack alert: This method is used to work around a __copy__
1059 problem on certain 2.3 and 2.4 versions. To save time and
1060 simplify the code, we create the copy in here, and use a dummy
1061 copyelement helper to trick the copy module into doing the
1062 right thing. */
1063
1064 if (!elementtree_copyelement_obj) {
1065 PyErr_SetString(
1066 PyExc_RuntimeError,
1067 "copyelement helper not found"
1068 );
1069 return NULL;
1070 }
1071
1072 return Py_BuildValue(
1073 "O(N)", elementtree_copyelement_obj, element_copy(self, args)
1074 );
1075}
1076
1077static PyObject*
1078element_remove(ElementObject* self, PyObject* args)
1079{
1080 int i;
1081
1082 PyObject* element;
1083 if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element))
1084 return NULL;
1085
1086 if (!self->extra) {
1087 /* element has no children, so raise exception */
1088 PyErr_SetString(
1089 PyExc_ValueError,
1090 "list.remove(x): x not in list"
1091 );
1092 return NULL;
1093 }
1094
1095 for (i = 0; i < self->extra->length; i++) {
1096 if (self->extra->children[i] == element)
1097 break;
1098 if (PyObject_Compare(self->extra->children[i], element) == 0)
1099 break;
1100 }
1101
1102 if (i == self->extra->length) {
1103 /* element is not in children, so raise exception */
1104 PyErr_SetString(
1105 PyExc_ValueError,
1106 "list.remove(x): x not in list"
1107 );
1108 return NULL;
1109 }
1110
1111 Py_DECREF(self->extra->children[i]);
1112
1113 self->extra->length--;
1114
1115 for (; i < self->extra->length; i++)
1116 self->extra->children[i] = self->extra->children[i+1];
1117
1118 Py_RETURN_NONE;
1119}
1120
1121static PyObject*
1122element_repr(ElementObject* self)
1123{
1124 PyObject* repr;
1125 char buffer[100];
1126
1127 repr = PyString_FromString("<Element ");
1128
1129 PyString_ConcatAndDel(&repr, PyObject_Repr(self->tag));
1130
1131 sprintf(buffer, " at %p>", self);
1132 PyString_ConcatAndDel(&repr, PyString_FromString(buffer));
1133
1134 return repr;
1135}
1136
1137static PyObject*
1138element_set(ElementObject* self, PyObject* args)
1139{
1140 PyObject* attrib;
1141
1142 PyObject* key;
1143 PyObject* value;
1144 if (!PyArg_ParseTuple(args, "OO:set", &key, &value))
1145 return NULL;
1146
1147 if (!self->extra)
1148 element_new_extra(self, NULL);
1149
1150 attrib = element_get_attrib(self);
1151 if (!attrib)
1152 return NULL;
1153
1154 if (PyDict_SetItem(attrib, key, value) < 0)
1155 return NULL;
1156
1157 Py_RETURN_NONE;
1158}
1159
1160static int
1161element_setslice(ElementObject* self, int start, int end, PyObject* item)
1162{
1163 int i, new, old;
1164 PyObject* recycle = NULL;
1165
1166 if (!self->extra)
1167 element_new_extra(self, NULL);
1168
1169 /* standard clamping */
1170 if (start < 0)
1171 start = 0;
1172 if (end < 0)
1173 end = 0;
1174 if (end > self->extra->length)
1175 end = self->extra->length;
1176 if (start > end)
1177 start = end;
1178
1179 old = end - start;
1180
1181 if (item == NULL)
1182 new = 0;
1183 else if (PyList_CheckExact(item)) {
1184 new = PyList_GET_SIZE(item);
1185 } else {
1186 /* FIXME: support arbitrary sequences? */
1187 PyErr_Format(
1188 PyExc_TypeError,
1189 "expected list, not \"%.200s\"", item->ob_type->tp_name
1190 );
1191 return -1;
1192 }
1193
1194 if (old > 0) {
1195 /* to avoid recursive calls to this method (via decref), move
1196 old items to the recycle bin here, and get rid of them when
1197 we're done modifying the element */
1198 recycle = PyList_New(old);
1199 for (i = 0; i < old; i++)
1200 PyList_SET_ITEM(recycle, i, self->extra->children[i + start]);
1201 }
1202
1203 if (new < old) {
1204 /* delete slice */
1205 for (i = end; i < self->extra->length; i++)
1206 self->extra->children[i + new - old] = self->extra->children[i];
1207 } else if (new > old) {
1208 /* insert slice */
1209 if (element_resize(self, new - old) < 0)
1210 return -1;
1211 for (i = self->extra->length-1; i >= end; i--)
1212 self->extra->children[i + new - old] = self->extra->children[i];
1213 }
1214
1215 /* replace the slice */
1216 for (i = 0; i < new; i++) {
1217 PyObject* element = PyList_GET_ITEM(item, i);
1218 Py_INCREF(element);
1219 self->extra->children[i + start] = element;
1220 }
1221
1222 self->extra->length += new - old;
1223
1224 /* discard the recycle bin, and everything in it */
1225 Py_XDECREF(recycle);
1226
1227 return 0;
1228}
1229
1230static int
1231element_setitem(ElementObject* self, int index, PyObject* item)
1232{
1233 int i;
1234 PyObject* old;
1235
1236 if (!self->extra || index < 0 || index >= self->extra->length) {
1237 PyErr_SetString(
1238 PyExc_IndexError,
1239 "child assignment index out of range");
1240 return -1;
1241 }
1242
1243 old = self->extra->children[index];
1244
1245 if (item) {
1246 Py_INCREF(item);
1247 self->extra->children[index] = item;
1248 } else {
1249 self->extra->length--;
1250 for (i = index; i < self->extra->length; i++)
1251 self->extra->children[i] = self->extra->children[i+1];
1252 }
1253
1254 Py_DECREF(old);
1255
1256 return 0;
1257}
1258
1259static PyMethodDef element_methods[] = {
1260
1261 {"clear", (PyCFunction) element_clear, METH_VARARGS},
1262
1263 {"get", (PyCFunction) element_get, METH_VARARGS},
1264 {"set", (PyCFunction) element_set, METH_VARARGS},
1265
1266 {"find", (PyCFunction) element_find, METH_VARARGS},
1267 {"findtext", (PyCFunction) element_findtext, METH_VARARGS},
1268 {"findall", (PyCFunction) element_findall, METH_VARARGS},
1269
1270 {"append", (PyCFunction) element_append, METH_VARARGS},
1271 {"insert", (PyCFunction) element_insert, METH_VARARGS},
1272 {"remove", (PyCFunction) element_remove, METH_VARARGS},
1273
1274 {"getiterator", (PyCFunction) element_getiterator, METH_VARARGS},
1275 {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS},
1276
1277 {"items", (PyCFunction) element_items, METH_VARARGS},
1278 {"keys", (PyCFunction) element_keys, METH_VARARGS},
1279
1280 {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS},
1281
1282 {"__copy__", (PyCFunction) element_copy, METH_VARARGS},
1283 {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS},
1284
1285 /* Some 2.3 and 2.4 versions do not handle the __copy__ method on
1286 C objects correctly, so we have to fake it using a __reduce__-
1287 based hack (see the element_reduce implementation above for
1288 details). */
1289
1290 /* The behaviour has been changed in 2.3.5 and 2.4.1, so we're
1291 using a runtime test to figure out if we need to fake things
1292 or now (see the init code below). The following entry is
1293 enabled only if the hack is needed. */
1294
1295 {"!__reduce__", (PyCFunction) element_reduce, METH_VARARGS},
1296
1297 {NULL, NULL}
1298};
1299
1300static PyObject*
1301element_getattr(ElementObject* self, char* name)
1302{
1303 PyObject* res;
1304
1305 res = Py_FindMethod(element_methods, (PyObject*) self, name);
1306 if (res)
1307 return res;
1308
1309 PyErr_Clear();
1310
1311 if (strcmp(name, "tag") == 0)
1312 res = self->tag;
1313 else if (strcmp(name, "text") == 0)
1314 res = element_get_text(self);
1315 else if (strcmp(name, "tail") == 0) {
1316 res = element_get_tail(self);
1317 } else if (strcmp(name, "attrib") == 0) {
1318 if (!self->extra)
1319 element_new_extra(self, NULL);
1320 res = element_get_attrib(self);
1321 } else {
1322 PyErr_SetString(PyExc_AttributeError, name);
1323 return NULL;
1324 }
1325
1326 if (!res)
1327 return NULL;
1328
1329 Py_INCREF(res);
1330 return res;
1331}
1332
1333static int
1334element_setattr(ElementObject* self, const char* name, PyObject* value)
1335{
1336 if (value == NULL) {
1337 PyErr_SetString(
1338 PyExc_AttributeError,
1339 "can't delete element attributes"
1340 );
1341 return -1;
1342 }
1343
1344 if (strcmp(name, "tag") == 0) {
1345 Py_DECREF(self->tag);
1346 self->tag = value;
1347 Py_INCREF(self->tag);
1348 } else if (strcmp(name, "text") == 0) {
1349 Py_DECREF(JOIN_OBJ(self->text));
1350 self->text = value;
1351 Py_INCREF(self->text);
1352 } else if (strcmp(name, "tail") == 0) {
1353 Py_DECREF(JOIN_OBJ(self->tail));
1354 self->tail = value;
1355 Py_INCREF(self->tail);
1356 } else if (strcmp(name, "attrib") == 0) {
1357 if (!self->extra)
1358 element_new_extra(self, NULL);
1359 Py_DECREF(self->extra->attrib);
1360 self->extra->attrib = value;
1361 Py_INCREF(self->extra->attrib);
1362 } else {
1363 PyErr_SetString(PyExc_AttributeError, name);
1364 return -1;
1365 }
1366
1367 return 0;
1368}
1369
1370static PySequenceMethods element_as_sequence = {
1371 (inquiry) element_length,
1372 0, /* sq_concat */
1373 0, /* sq_repeat */
1374 (intargfunc) element_getitem,
1375 (intintargfunc) element_getslice,
1376 (intobjargproc) element_setitem,
1377 (intintobjargproc) element_setslice,
1378};
1379
1380statichere PyTypeObject Element_Type = {
1381 PyObject_HEAD_INIT(NULL)
1382 0, "Element", sizeof(ElementObject), 0,
1383 /* methods */
1384 (destructor)element_dealloc, /* tp_dealloc */
1385 0, /* tp_print */
1386 (getattrfunc)element_getattr, /* tp_getattr */
1387 (setattrfunc)element_setattr, /* tp_setattr */
1388 0, /* tp_compare */
1389 (reprfunc)element_repr, /* tp_repr */
1390 0, /* tp_as_number */
1391 &element_as_sequence, /* tp_as_sequence */
1392};
1393
1394/* ==================================================================== */
1395/* the tree builder type */
1396
1397typedef struct {
1398 PyObject_HEAD
1399
1400 PyObject* root; /* root node (first created node) */
1401
1402 ElementObject* this; /* current node */
1403 ElementObject* last; /* most recently created node */
1404
1405 PyObject* data; /* data collector (string or list), or NULL */
1406
1407 PyObject* stack; /* element stack */
1408 int index; /* current stack size (0=empty) */
1409
1410 /* element tracing */
1411 PyObject* events; /* list of events, or NULL if not collecting */
1412 PyObject* start_event_obj; /* event objects (NULL to ignore) */
1413 PyObject* end_event_obj;
1414 PyObject* start_ns_event_obj;
1415 PyObject* end_ns_event_obj;
1416
1417} TreeBuilderObject;
1418
1419staticforward PyTypeObject TreeBuilder_Type;
1420
1421#define TreeBuilder_CheckExact(op) ((op)->ob_type == &TreeBuilder_Type)
1422
1423/* -------------------------------------------------------------------- */
1424/* constructor and destructor */
1425
1426LOCAL(PyObject*)
1427treebuilder_new(void)
1428{
1429 TreeBuilderObject* self;
1430
1431 self = PyObject_New(TreeBuilderObject, &TreeBuilder_Type);
1432 if (self == NULL)
1433 return NULL;
1434
1435 self->root = NULL;
1436
1437 Py_INCREF(Py_None);
1438 self->this = (ElementObject*) Py_None;
1439
1440 Py_INCREF(Py_None);
1441 self->last = (ElementObject*) Py_None;
1442
1443 self->data = NULL;
1444
1445 self->stack = PyList_New(20);
1446 self->index = 0;
1447
1448 self->events = NULL;
1449 self->start_event_obj = self->end_event_obj = NULL;
1450 self->start_ns_event_obj = self->end_ns_event_obj = NULL;
1451
1452 ALLOC(sizeof(TreeBuilderObject), "create treebuilder");
1453
1454 return (PyObject*) self;
1455}
1456
1457static PyObject*
1458treebuilder(PyObject* _self, PyObject* args)
1459{
1460 if (!PyArg_ParseTuple(args, ":TreeBuilder"))
1461 return NULL;
1462
1463 return treebuilder_new();
1464}
1465
1466static void
1467treebuilder_dealloc(TreeBuilderObject* self)
1468{
1469 Py_XDECREF(self->end_ns_event_obj);
1470 Py_XDECREF(self->start_ns_event_obj);
1471 Py_XDECREF(self->end_event_obj);
1472 Py_XDECREF(self->start_event_obj);
1473 Py_XDECREF(self->events);
1474 Py_DECREF(self->stack);
1475 Py_XDECREF(self->data);
1476 Py_DECREF(self->last);
1477 Py_DECREF(self->this);
1478 Py_XDECREF(self->root);
1479
1480 RELEASE(sizeof(TreeBuilderObject), "destroy treebuilder");
1481
1482 PyObject_Del(self);
1483}
1484
1485/* -------------------------------------------------------------------- */
1486/* handlers */
1487
1488LOCAL(PyObject*)
1489treebuilder_handle_xml(TreeBuilderObject* self, PyObject* encoding,
1490 PyObject* standalone)
1491{
1492 Py_RETURN_NONE;
1493}
1494
1495LOCAL(PyObject*)
1496treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
1497 PyObject* attrib)
1498{
1499 PyObject* node;
1500 PyObject* this;
1501
1502 if (self->data) {
1503 if (self->this == self->last) {
Fredrik Lundh0149e3a2005-12-18 13:58:25 +00001504 Py_DECREF(self->last->text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001505 self->last->text = JOIN_SET(
1506 self->data, PyList_CheckExact(self->data)
1507 );
1508 } else {
Fredrik Lundh0149e3a2005-12-18 13:58:25 +00001509 Py_DECREF(self->last->tail);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001510 self->last->tail = JOIN_SET(
1511 self->data, PyList_CheckExact(self->data)
1512 );
1513 }
1514 self->data = NULL;
1515 }
1516
1517 node = element_new(tag, attrib);
1518 if (!node)
1519 return NULL;
1520
1521 this = (PyObject*) self->this;
1522
1523 if (this != Py_None) {
1524 if (element_add_subelement((ElementObject*) this, node) < 0)
1525 return NULL;
1526 } else {
1527 if (self->root) {
1528 PyErr_SetString(
1529 PyExc_SyntaxError,
1530 "multiple elements on top level"
1531 );
1532 return NULL;
1533 }
1534 Py_INCREF(node);
1535 self->root = node;
1536 }
1537
1538 if (self->index < PyList_GET_SIZE(self->stack)) {
1539 if (PyList_SetItem(self->stack, self->index, this) < 0)
1540 return NULL;
1541 Py_INCREF(this);
1542 } else {
1543 if (PyList_Append(self->stack, this) < 0)
1544 return NULL;
1545 }
1546 self->index++;
1547
1548 Py_DECREF(this);
1549 Py_INCREF(node);
1550 self->this = (ElementObject*) node;
1551
1552 Py_DECREF(self->last);
1553 Py_INCREF(node);
1554 self->last = (ElementObject*) node;
1555
1556 if (self->start_event_obj) {
1557 PyObject* res;
1558 PyObject* action = self->start_event_obj;
1559 res = PyTuple_New(2);
1560 if (res) {
1561 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
1562 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
1563 PyList_Append(self->events, res);
1564 Py_DECREF(res);
1565 } else
1566 PyErr_Clear(); /* FIXME: propagate error */
1567 }
1568
1569 return node;
1570}
1571
1572LOCAL(PyObject*)
1573treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
1574{
1575 if (!self->data) {
1576 /* store the first item as is */
1577 Py_INCREF(data); self->data = data;
1578 } else {
1579 /* more than one item; use a list to collect items */
1580 if (PyString_CheckExact(self->data) && self->data->ob_refcnt == 1 &&
1581 PyString_CheckExact(data) && PyString_GET_SIZE(data) == 1) {
1582 /* expat often generates single character data sections; handle
1583 the most common case by resizing the existing string... */
1584 int size = PyString_GET_SIZE(self->data);
1585 if (_PyString_Resize(&self->data, size + 1) < 0)
1586 return NULL;
1587 PyString_AS_STRING(self->data)[size] = PyString_AS_STRING(data)[0];
1588 } else if (PyList_CheckExact(self->data)) {
1589 if (PyList_Append(self->data, data) < 0)
1590 return NULL;
1591 } else {
1592 PyObject* list = PyList_New(2);
1593 if (!list)
1594 return NULL;
1595 PyList_SET_ITEM(list, 0, self->data);
1596 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
1597 self->data = list;
1598 }
1599 }
1600
1601 Py_RETURN_NONE;
1602}
1603
1604LOCAL(PyObject*)
1605treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
1606{
1607 PyObject* item;
1608
1609 if (self->data) {
1610 if (self->this == self->last) {
Fredrik Lundh0149e3a2005-12-18 13:58:25 +00001611 Py_DECREF(self->last->text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001612 self->last->text = JOIN_SET(
1613 self->data, PyList_CheckExact(self->data)
1614 );
1615 } else {
Fredrik Lundh0149e3a2005-12-18 13:58:25 +00001616 Py_DECREF(self->last->tail);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001617 self->last->tail = JOIN_SET(
1618 self->data, PyList_CheckExact(self->data)
1619 );
1620 }
1621 self->data = NULL;
1622 }
1623
1624 if (self->index == 0) {
1625 PyErr_SetString(
1626 PyExc_IndexError,
1627 "pop from empty stack"
1628 );
1629 return NULL;
1630 }
1631
1632 self->index--;
1633
1634 item = PyList_GET_ITEM(self->stack, self->index);
1635 Py_INCREF(item);
1636
1637 Py_DECREF(self->last);
1638
1639 self->last = (ElementObject*) self->this;
1640 self->this = (ElementObject*) item;
1641
1642 if (self->end_event_obj) {
1643 PyObject* res;
1644 PyObject* action = self->end_event_obj;
1645 PyObject* node = (PyObject*) self->last;
1646 res = PyTuple_New(2);
1647 if (res) {
1648 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
1649 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
1650 PyList_Append(self->events, res);
1651 Py_DECREF(res);
1652 } else
1653 PyErr_Clear(); /* FIXME: propagate error */
1654 }
1655
1656 Py_INCREF(self->last);
1657 return (PyObject*) self->last;
1658}
1659
1660LOCAL(void)
1661treebuilder_handle_namespace(TreeBuilderObject* self, int start,
1662 const char* prefix, const char *uri)
1663{
1664 PyObject* res;
1665 PyObject* action;
1666 PyObject* parcel;
1667
1668 if (!self->events)
1669 return;
1670
1671 if (start) {
1672 if (!self->start_ns_event_obj)
1673 return;
1674 action = self->start_ns_event_obj;
1675 /* FIXME: prefix and uri use utf-8 encoding! */
1676 parcel = Py_BuildValue("ss", (prefix) ? prefix : "", uri);
1677 if (!parcel)
1678 return;
1679 Py_INCREF(action);
1680 } else {
1681 if (!self->end_ns_event_obj)
1682 return;
1683 action = self->end_ns_event_obj;
1684 Py_INCREF(action);
1685 parcel = Py_None;
1686 Py_INCREF(parcel);
1687 }
1688
1689 res = PyTuple_New(2);
1690
1691 if (res) {
1692 PyTuple_SET_ITEM(res, 0, action);
1693 PyTuple_SET_ITEM(res, 1, parcel);
1694 PyList_Append(self->events, res);
1695 Py_DECREF(res);
1696 } else
1697 PyErr_Clear(); /* FIXME: propagate error */
1698}
1699
1700/* -------------------------------------------------------------------- */
1701/* methods (in alphabetical order) */
1702
1703static PyObject*
1704treebuilder_data(TreeBuilderObject* self, PyObject* args)
1705{
1706 PyObject* data;
1707 if (!PyArg_ParseTuple(args, "O:data", &data))
1708 return NULL;
1709
1710 return treebuilder_handle_data(self, data);
1711}
1712
1713static PyObject*
1714treebuilder_end(TreeBuilderObject* self, PyObject* args)
1715{
1716 PyObject* tag;
1717 if (!PyArg_ParseTuple(args, "O:end", &tag))
1718 return NULL;
1719
1720 return treebuilder_handle_end(self, tag);
1721}
1722
1723LOCAL(PyObject*)
1724treebuilder_done(TreeBuilderObject* self)
1725{
1726 PyObject* res;
1727
1728 /* FIXME: check stack size? */
1729
1730 if (self->root)
1731 res = self->root;
1732 else
1733 res = Py_None;
1734
1735 Py_INCREF(res);
1736 return res;
1737}
1738
1739static PyObject*
1740treebuilder_close(TreeBuilderObject* self, PyObject* args)
1741{
1742 if (!PyArg_ParseTuple(args, ":close"))
1743 return NULL;
1744
1745 return treebuilder_done(self);
1746}
1747
1748static PyObject*
1749treebuilder_start(TreeBuilderObject* self, PyObject* args)
1750{
1751 PyObject* tag;
1752 PyObject* attrib = Py_None;
1753 if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib))
1754 return NULL;
1755
1756 return treebuilder_handle_start(self, tag, attrib);
1757}
1758
1759static PyObject*
1760treebuilder_xml(TreeBuilderObject* self, PyObject* args)
1761{
1762 PyObject* encoding;
1763 PyObject* standalone;
1764 if (!PyArg_ParseTuple(args, "OO:xml", &encoding, &standalone))
1765 return NULL;
1766
1767 return treebuilder_handle_xml(self, encoding, standalone);
1768}
1769
1770static PyMethodDef treebuilder_methods[] = {
1771 {"data", (PyCFunction) treebuilder_data, METH_VARARGS},
1772 {"start", (PyCFunction) treebuilder_start, METH_VARARGS},
1773 {"end", (PyCFunction) treebuilder_end, METH_VARARGS},
1774 {"xml", (PyCFunction) treebuilder_xml, METH_VARARGS},
1775 {"close", (PyCFunction) treebuilder_close, METH_VARARGS},
1776 {NULL, NULL}
1777};
1778
1779static PyObject*
1780treebuilder_getattr(TreeBuilderObject* self, char* name)
1781{
1782 return Py_FindMethod(treebuilder_methods, (PyObject*) self, name);
1783}
1784
1785statichere PyTypeObject TreeBuilder_Type = {
1786 PyObject_HEAD_INIT(NULL)
1787 0, "TreeBuilder", sizeof(TreeBuilderObject), 0,
1788 /* methods */
1789 (destructor)treebuilder_dealloc, /* tp_dealloc */
1790 0, /* tp_print */
1791 (getattrfunc)treebuilder_getattr, /* tp_getattr */
1792};
1793
1794/* ==================================================================== */
1795/* the expat interface */
1796
1797#if defined(USE_EXPAT)
1798
1799#include "expat.h"
1800
1801#if defined(USE_PYEXPAT_CAPI)
1802#include "pyexpat.h"
1803static struct PyExpat_CAPI* expat_capi;
1804#define EXPAT(func) (expat_capi->func)
1805#else
1806#define EXPAT(func) (XML_##func)
1807#endif
1808
1809typedef struct {
1810 PyObject_HEAD
1811
1812 XML_Parser parser;
1813
1814 PyObject* target;
1815 PyObject* entity;
1816
1817 PyObject* names;
1818
1819 PyObject* handle_xml;
1820 PyObject* handle_start;
1821 PyObject* handle_data;
1822 PyObject* handle_end;
1823
1824 PyObject* handle_comment;
1825 PyObject* handle_pi;
1826
1827} XMLParserObject;
1828
1829staticforward PyTypeObject XMLParser_Type;
1830
1831/* helpers */
1832
1833#if defined(Py_USING_UNICODE)
1834LOCAL(int)
1835checkstring(const char* string, int size)
1836{
1837 int i;
1838
1839 /* check if an 8-bit string contains UTF-8 characters */
1840 for (i = 0; i < size; i++)
1841 if (string[i] & 0x80)
1842 return 1;
1843
1844 return 0;
1845}
1846#endif
1847
1848LOCAL(PyObject*)
1849makestring(const char* string, int size)
1850{
1851 /* convert a UTF-8 string to either a 7-bit ascii string or a
1852 Unicode string */
1853
1854#if defined(Py_USING_UNICODE)
1855 if (checkstring(string, size))
1856 return PyUnicode_DecodeUTF8(string, size, "strict");
1857#endif
1858
1859 return PyString_FromStringAndSize(string, size);
1860}
1861
1862LOCAL(PyObject*)
1863makeuniversal(XMLParserObject* self, const char* string)
1864{
1865 /* convert a UTF-8 tag/attribute name from the expat parser
1866 to a universal name string */
1867
1868 int size = strlen(string);
1869 PyObject* key;
1870 PyObject* value;
1871
1872 /* look the 'raw' name up in the names dictionary */
1873 key = PyString_FromStringAndSize(string, size);
1874 if (!key)
1875 return NULL;
1876
1877 value = PyDict_GetItem(self->names, key);
1878
1879 if (value) {
1880 Py_INCREF(value);
1881 } else {
1882 /* new name. convert to universal name, and decode as
1883 necessary */
1884
1885 PyObject* tag;
1886 char* p;
1887 int i;
1888
1889 /* look for namespace separator */
1890 for (i = 0; i < size; i++)
1891 if (string[i] == '}')
1892 break;
1893 if (i != size) {
1894 /* convert to universal name */
1895 tag = PyString_FromStringAndSize(NULL, size+1);
1896 p = PyString_AS_STRING(tag);
1897 p[0] = '{';
1898 memcpy(p+1, string, size);
1899 size++;
1900 } else {
1901 /* plain name; use key as tag */
1902 Py_INCREF(key);
1903 tag = key;
1904 }
1905
1906 /* decode universal name */
1907#if defined(Py_USING_UNICODE)
1908 /* inline makestring, to avoid duplicating the source string if
1909 it's not an utf-8 string */
1910 p = PyString_AS_STRING(tag);
1911 if (checkstring(p, size)) {
1912 value = PyUnicode_DecodeUTF8(p, size, "strict");
1913 Py_DECREF(tag);
1914 if (!value) {
1915 Py_DECREF(key);
1916 return NULL;
1917 }
1918 } else
1919#endif
1920 value = tag; /* use tag as is */
1921
1922 /* add to names dictionary */
1923 if (PyDict_SetItem(self->names, key, value) < 0) {
1924 Py_DECREF(key);
1925 Py_DECREF(value);
1926 return NULL;
1927 }
1928 }
1929
1930 Py_DECREF(key);
1931 return value;
1932}
1933
1934/* -------------------------------------------------------------------- */
1935/* handlers */
1936
1937static void
1938expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
1939 int data_len)
1940{
1941 PyObject* key;
1942 PyObject* value;
1943 PyObject* res;
1944
1945 if (data_len < 2 || data_in[0] != '&')
1946 return;
1947
1948 key = makestring(data_in + 1, data_len - 2);
1949 if (!key)
1950 return;
1951
1952 value = PyDict_GetItem(self->entity, key);
1953
1954 if (value) {
1955 if (TreeBuilder_CheckExact(self->target))
1956 res = treebuilder_handle_data(
1957 (TreeBuilderObject*) self->target, value
1958 );
1959 else if (self->handle_data)
1960 res = PyObject_CallFunction(self->handle_data, "O", value);
1961 else
1962 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001963 Py_XDECREF(res);
1964 } else {
1965 PyErr_Format(
1966 PyExc_SyntaxError, "undefined entity &%s;: line %d, column %d",
1967 PyString_AS_STRING(key),
1968 EXPAT(GetErrorLineNumber)(self->parser),
1969 EXPAT(GetErrorColumnNumber)(self->parser)
1970 );
1971 }
1972
1973 Py_DECREF(key);
1974}
1975
1976static void
1977expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
1978 const XML_Char **attrib_in)
1979{
1980 PyObject* res;
1981 PyObject* tag;
1982 PyObject* attrib;
1983 int ok;
1984
1985 /* tag name */
1986 tag = makeuniversal(self, tag_in);
1987 if (!tag)
1988 return; /* parser will look for errors */
1989
1990 /* attributes */
1991 if (attrib_in[0]) {
1992 attrib = PyDict_New();
1993 if (!attrib)
1994 return;
1995 while (attrib_in[0] && attrib_in[1]) {
1996 PyObject* key = makeuniversal(self, attrib_in[0]);
1997 PyObject* value = makestring(attrib_in[1], strlen(attrib_in[1]));
1998 if (!key || !value) {
1999 Py_XDECREF(value);
2000 Py_XDECREF(key);
2001 Py_DECREF(attrib);
2002 return;
2003 }
2004 ok = PyDict_SetItem(attrib, key, value);
2005 Py_DECREF(value);
2006 Py_DECREF(key);
2007 if (ok < 0) {
2008 Py_DECREF(attrib);
2009 return;
2010 }
2011 attrib_in += 2;
2012 }
2013 } else {
2014 Py_INCREF(Py_None);
2015 attrib = Py_None;
2016 }
2017
2018 if (TreeBuilder_CheckExact(self->target))
2019 /* shortcut */
2020 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2021 tag, attrib);
2022 else if (self->handle_start)
2023 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
2024 else
2025 res = NULL;
2026
2027 Py_DECREF(tag);
2028 Py_DECREF(attrib);
2029
2030 Py_XDECREF(res);
2031}
2032
2033static void
2034expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2035 int data_len)
2036{
2037 PyObject* data;
2038 PyObject* res;
2039
2040 data = makestring(data_in, data_len);
2041
2042 if (TreeBuilder_CheckExact(self->target))
2043 /* shortcut */
2044 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
2045 else if (self->handle_data)
2046 res = PyObject_CallFunction(self->handle_data, "O", data);
2047 else
2048 res = NULL;
2049
2050 Py_DECREF(data);
2051
2052 Py_XDECREF(res);
2053}
2054
2055static void
2056expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
2057{
2058 PyObject* tag;
2059 PyObject* res = NULL;
2060
2061 if (TreeBuilder_CheckExact(self->target))
2062 /* shortcut */
2063 /* the standard tree builder doesn't look at the end tag */
2064 res = treebuilder_handle_end(
2065 (TreeBuilderObject*) self->target, Py_None
2066 );
2067 else if (self->handle_end) {
2068 tag = makeuniversal(self, tag_in);
2069 if (tag) {
2070 res = PyObject_CallFunction(self->handle_end, "O", tag);
2071 Py_DECREF(tag);
2072 }
2073 }
2074
2075 Py_XDECREF(res);
2076}
2077
2078static void
2079expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
2080 const XML_Char *uri)
2081{
2082 treebuilder_handle_namespace(
2083 (TreeBuilderObject*) self->target, 1, prefix, uri
2084 );
2085}
2086
2087static void
2088expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
2089{
2090 treebuilder_handle_namespace(
2091 (TreeBuilderObject*) self->target, 0, NULL, NULL
2092 );
2093}
2094
2095static void
2096expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
2097{
2098 PyObject* comment;
2099 PyObject* res;
2100
2101 if (self->handle_comment) {
2102 comment = makestring(comment_in, strlen(comment_in));
2103 if (comment) {
2104 res = PyObject_CallFunction(self->handle_comment, "O", comment);
2105 Py_XDECREF(res);
2106 Py_DECREF(comment);
2107 }
2108 }
2109}
2110
2111static void
2112expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
2113 const XML_Char* data_in)
2114{
2115 PyObject* target;
2116 PyObject* data;
2117 PyObject* res;
2118
2119 if (self->handle_pi) {
2120 target = makestring(target_in, strlen(target_in));
2121 data = makestring(data_in, strlen(data_in));
2122 if (target && data) {
2123 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
2124 Py_XDECREF(res);
2125 Py_DECREF(data);
2126 Py_DECREF(target);
2127 } else {
2128 Py_XDECREF(data);
2129 Py_XDECREF(target);
2130 }
2131 }
2132}
2133
2134#if defined(Py_USING_UNICODE)
2135static int
2136expat_unknown_encoding_handler(XMLParserObject *self, const XML_Char *name,
2137 XML_Encoding *info)
2138{
2139 PyObject* u;
2140 Py_UNICODE* p;
2141 unsigned char s[256];
2142 int i;
2143
2144 memset(info, 0, sizeof(XML_Encoding));
2145
2146 for (i = 0; i < 256; i++)
2147 s[i] = i;
2148
2149 u = PyUnicode_Decode(s, 256, name, "replace");
2150 if (!u)
2151 return XML_STATUS_ERROR;
2152
2153 if (PyUnicode_GET_SIZE(u) != 256) {
2154 Py_DECREF(u);
2155 return XML_STATUS_ERROR;
2156 }
2157
2158 p = PyUnicode_AS_UNICODE(u);
2159
2160 for (i = 0; i < 256; i++) {
2161 if (p[i] != Py_UNICODE_REPLACEMENT_CHARACTER)
2162 info->map[i] = p[i];
2163 else
2164 info->map[i] = -1;
2165 }
2166
2167 Py_DECREF(u);
2168
2169 return XML_STATUS_OK;
2170}
2171#endif
2172
2173/* -------------------------------------------------------------------- */
2174/* constructor and destructor */
2175
2176static PyObject*
2177xmlparser(PyObject* _self, PyObject* args, PyObject* kw)
2178{
2179 XMLParserObject* self;
2180 /* FIXME: does this need to be static? */
2181 static XML_Memory_Handling_Suite memory_handler;
2182
2183 PyObject* target = NULL;
2184 char* encoding = NULL;
2185 static PY_CONST char* kwlist[] = { "target", "encoding", NULL };
2186 if (!PyArg_ParseTupleAndKeywords(args, kw, "|Oz:XMLParser", kwlist,
2187 &target, &encoding))
2188 return NULL;
2189
2190#if defined(USE_PYEXPAT_CAPI)
2191 if (!expat_capi) {
2192 PyErr_SetString(
2193 PyExc_RuntimeError, "cannot load dispatch table from pyexpat"
2194 );
2195 return NULL;
2196 }
2197#endif
2198
2199 self = PyObject_New(XMLParserObject, &XMLParser_Type);
2200 if (self == NULL)
2201 return NULL;
2202
2203 self->entity = PyDict_New();
2204 if (!self->entity) {
2205 PyObject_Del(self);
2206 return NULL; /* FIXME: cleanup on error */
2207 }
2208
2209 self->names = PyDict_New();
2210 if (!self->names) {
2211 PyObject_Del(self);
2212 return NULL; /* FIXME: cleanup on error */
2213 }
2214
2215 memory_handler.malloc_fcn = PyObject_Malloc;
2216 memory_handler.realloc_fcn = PyObject_Realloc;
2217 memory_handler.free_fcn = PyObject_Free;
2218
2219 self->parser = EXPAT(ParserCreate_MM)(encoding, &memory_handler, "}");
2220 if (!self->parser) {
2221 PyErr_NoMemory();
2222 return NULL; /* FIXME: cleanup on error */
2223 }
2224
2225 /* setup target handlers */
2226 if (!target) {
2227 target = treebuilder_new();
2228 if (!target) {
2229 PyObject_Del(self);
2230 return NULL; /* FIXME: cleanup on error */
2231 }
2232 } else
2233 Py_INCREF(target);
2234 self->target = target;
2235
2236 self->handle_xml = PyObject_GetAttrString(target, "xml");
2237 self->handle_start = PyObject_GetAttrString(target, "start");
2238 self->handle_data = PyObject_GetAttrString(target, "data");
2239 self->handle_end = PyObject_GetAttrString(target, "end");
2240 self->handle_comment = PyObject_GetAttrString(target, "comment");
2241 self->handle_pi = PyObject_GetAttrString(target, "pi");
2242
2243 PyErr_Clear();
2244
2245 /* configure parser */
2246 EXPAT(SetUserData)(self->parser, self);
2247 EXPAT(SetElementHandler)(
2248 self->parser,
2249 (XML_StartElementHandler) expat_start_handler,
2250 (XML_EndElementHandler) expat_end_handler
2251 );
2252 EXPAT(SetDefaultHandlerExpand)(
2253 self->parser,
2254 (XML_DefaultHandler) expat_default_handler
2255 );
2256 EXPAT(SetCharacterDataHandler)(
2257 self->parser,
2258 (XML_CharacterDataHandler) expat_data_handler
2259 );
2260 if (self->handle_comment)
2261 EXPAT(SetCommentHandler)(
2262 self->parser,
2263 (XML_CommentHandler) expat_comment_handler
2264 );
2265 if (self->handle_pi)
2266 EXPAT(SetProcessingInstructionHandler)(
2267 self->parser,
2268 (XML_ProcessingInstructionHandler) expat_pi_handler
2269 );
2270#if defined(Py_USING_UNICODE)
2271 EXPAT(SetUnknownEncodingHandler)(
2272 self->parser,
2273 (XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL
2274 );
2275#endif
2276
2277 ALLOC(sizeof(XMLParserObject), "create expatparser");
2278
2279 return (PyObject*) self;
2280}
2281
2282static void
2283xmlparser_dealloc(XMLParserObject* self)
2284{
2285 EXPAT(ParserFree)(self->parser);
2286
2287 Py_XDECREF(self->handle_pi);
2288 Py_XDECREF(self->handle_comment);
2289 Py_XDECREF(self->handle_end);
2290 Py_XDECREF(self->handle_data);
2291 Py_XDECREF(self->handle_start);
2292 Py_XDECREF(self->handle_xml);
2293
2294 Py_DECREF(self->target);
2295 Py_DECREF(self->entity);
2296 Py_DECREF(self->names);
2297
2298 RELEASE(sizeof(XMLParserObject), "destroy expatparser");
2299
2300 PyObject_Del(self);
2301}
2302
2303/* -------------------------------------------------------------------- */
2304/* methods (in alphabetical order) */
2305
2306LOCAL(PyObject*)
2307expat_parse(XMLParserObject* self, char* data, int data_len, int final)
2308{
2309 int ok;
2310
2311 ok = EXPAT(Parse)(self->parser, data, data_len, final);
2312
2313 if (PyErr_Occurred())
2314 return NULL;
2315
2316 if (!ok) {
2317 PyErr_Format(
2318 PyExc_SyntaxError, "%s: line %d, column %d",
2319 EXPAT(ErrorString)(EXPAT(GetErrorCode)(self->parser)),
2320 EXPAT(GetErrorLineNumber)(self->parser),
2321 EXPAT(GetErrorColumnNumber)(self->parser)
2322 );
2323 return NULL;
2324 }
2325
2326 Py_RETURN_NONE;
2327}
2328
2329static PyObject*
2330xmlparser_close(XMLParserObject* self, PyObject* args)
2331{
2332 /* end feeding data to parser */
2333
2334 PyObject* res;
2335 if (!PyArg_ParseTuple(args, ":close"))
2336 return NULL;
2337
2338 res = expat_parse(self, "", 0, 1);
2339
2340 if (res && TreeBuilder_CheckExact(self->target)) {
2341 Py_DECREF(res);
2342 return treebuilder_done((TreeBuilderObject*) self->target);
2343 }
2344
2345 return res;
2346}
2347
2348static PyObject*
2349xmlparser_feed(XMLParserObject* self, PyObject* args)
2350{
2351 /* feed data to parser */
2352
2353 char* data;
2354 int data_len;
2355 if (!PyArg_ParseTuple(args, "s#:feed", &data, &data_len))
2356 return NULL;
2357
2358 return expat_parse(self, data, data_len, 0);
2359}
2360
2361static PyObject*
2362xmlparser_parse(XMLParserObject* self, PyObject* args)
2363{
2364 /* (internal) parse until end of input stream */
2365
2366 PyObject* reader;
2367 PyObject* buffer;
2368 PyObject* res;
2369
2370 PyObject* fileobj;
2371 if (!PyArg_ParseTuple(args, "O:_parse", &fileobj))
2372 return NULL;
2373
2374 reader = PyObject_GetAttrString(fileobj, "read");
2375 if (!reader)
2376 return NULL;
2377
2378 /* read from open file object */
2379 for (;;) {
2380
2381 buffer = PyObject_CallFunction(reader, "i", 64*1024);
2382
2383 if (!buffer) {
2384 /* read failed (e.g. due to KeyboardInterrupt) */
2385 Py_DECREF(reader);
2386 return NULL;
2387 }
2388
2389 if (!PyString_CheckExact(buffer) || PyString_GET_SIZE(buffer) == 0) {
2390 Py_DECREF(buffer);
2391 break;
2392 }
2393
2394 res = expat_parse(
2395 self, PyString_AS_STRING(buffer), PyString_GET_SIZE(buffer), 0
2396 );
2397
2398 Py_DECREF(buffer);
2399
2400 if (!res) {
2401 Py_DECREF(reader);
2402 return NULL;
2403 }
2404 Py_DECREF(res);
2405
2406 }
2407
2408 Py_DECREF(reader);
2409
2410 res = expat_parse(self, "", 0, 1);
2411
2412 if (res && TreeBuilder_CheckExact(self->target)) {
2413 Py_DECREF(res);
2414 return treebuilder_done((TreeBuilderObject*) self->target);
2415 }
2416
2417 return res;
2418}
2419
2420static PyObject*
2421xmlparser_setevents(XMLParserObject* self, PyObject* args)
2422{
2423 /* activate element event reporting */
2424
2425 int i;
2426 TreeBuilderObject* target;
2427
2428 PyObject* events; /* event collector */
2429 PyObject* event_set = Py_None;
2430 if (!PyArg_ParseTuple(args, "O!|O:_setevents", &PyList_Type, &events,
2431 &event_set))
2432 return NULL;
2433
2434 if (!TreeBuilder_CheckExact(self->target)) {
2435 PyErr_SetString(
2436 PyExc_TypeError,
2437 "event handling only supported for cElementTree.Treebuilder "
2438 "targets"
2439 );
2440 return NULL;
2441 }
2442
2443 target = (TreeBuilderObject*) self->target;
2444
2445 Py_INCREF(events);
2446 Py_XDECREF(target->events);
2447 target->events = events;
2448
2449 /* clear out existing events */
2450 Py_XDECREF(target->start_event_obj); target->start_event_obj = NULL;
2451 Py_XDECREF(target->end_event_obj); target->end_event_obj = NULL;
2452 Py_XDECREF(target->start_ns_event_obj); target->start_ns_event_obj = NULL;
2453 Py_XDECREF(target->end_ns_event_obj); target->end_ns_event_obj = NULL;
2454
2455 if (event_set == Py_None) {
2456 /* default is "end" only */
2457 target->end_event_obj = PyString_FromString("end");
2458 Py_RETURN_NONE;
2459 }
2460
2461 if (!PyTuple_Check(event_set)) /* FIXME: handle arbitrary sequences */
2462 goto error;
2463
2464 for (i = 0; i < PyTuple_GET_SIZE(event_set); i++) {
2465 PyObject* item = PyTuple_GET_ITEM(event_set, i);
2466 char* event;
2467 if (!PyString_Check(item))
2468 goto error;
2469 event = PyString_AS_STRING(item);
2470 if (strcmp(event, "start") == 0) {
2471 Py_INCREF(item);
2472 target->start_event_obj = item;
2473 } else if (strcmp(event, "end") == 0) {
2474 Py_INCREF(item);
2475 Py_XDECREF(target->end_event_obj);
2476 target->end_event_obj = item;
2477 } else if (strcmp(event, "start-ns") == 0) {
2478 Py_INCREF(item);
2479 Py_XDECREF(target->start_ns_event_obj);
2480 target->start_ns_event_obj = item;
2481 EXPAT(SetNamespaceDeclHandler)(
2482 self->parser,
2483 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2484 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2485 );
2486 } else if (strcmp(event, "end-ns") == 0) {
2487 Py_INCREF(item);
2488 Py_XDECREF(target->end_ns_event_obj);
2489 target->end_ns_event_obj = item;
2490 EXPAT(SetNamespaceDeclHandler)(
2491 self->parser,
2492 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2493 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2494 );
2495 } else {
2496 PyErr_Format(
2497 PyExc_ValueError,
2498 "unknown event '%s'", event
2499 );
2500 return NULL;
2501 }
2502 }
2503
2504 Py_RETURN_NONE;
2505
2506 error:
2507 PyErr_SetString(
2508 PyExc_TypeError,
2509 "invalid event tuple"
2510 );
2511 return NULL;
2512}
2513
2514static PyMethodDef xmlparser_methods[] = {
2515 {"feed", (PyCFunction) xmlparser_feed, METH_VARARGS},
2516 {"close", (PyCFunction) xmlparser_close, METH_VARARGS},
2517 {"_parse", (PyCFunction) xmlparser_parse, METH_VARARGS},
2518 {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
2519 {NULL, NULL}
2520};
2521
2522static PyObject*
2523xmlparser_getattr(XMLParserObject* self, char* name)
2524{
2525 PyObject* res;
2526
2527 res = Py_FindMethod(xmlparser_methods, (PyObject*) self, name);
2528 if (res)
2529 return res;
2530
2531 PyErr_Clear();
2532
2533 if (strcmp(name, "entity") == 0)
2534 res = self->entity;
2535 else if (strcmp(name, "target") == 0)
2536 res = self->target;
2537 else if (strcmp(name, "version") == 0) {
2538 char buffer[100];
2539 sprintf(buffer, "Expat %d.%d.%d", XML_MAJOR_VERSION,
2540 XML_MINOR_VERSION, XML_MICRO_VERSION);
2541 return PyString_FromString(buffer);
2542 } else {
2543 PyErr_SetString(PyExc_AttributeError, name);
2544 return NULL;
2545 }
2546
2547 Py_INCREF(res);
2548 return res;
2549}
2550
2551statichere PyTypeObject XMLParser_Type = {
2552 PyObject_HEAD_INIT(NULL)
2553 0, "XMLParser", sizeof(XMLParserObject), 0,
2554 /* methods */
2555 (destructor)xmlparser_dealloc, /* tp_dealloc */
2556 0, /* tp_print */
2557 (getattrfunc)xmlparser_getattr, /* tp_getattr */
2558};
2559
2560#endif
2561
2562/* ==================================================================== */
2563/* python module interface */
2564
2565static PyMethodDef _functions[] = {
2566 {"Element", (PyCFunction) element, METH_VARARGS|METH_KEYWORDS},
2567 {"SubElement", (PyCFunction) subelement, METH_VARARGS|METH_KEYWORDS},
2568 {"TreeBuilder", (PyCFunction) treebuilder, METH_VARARGS},
2569#if defined(USE_EXPAT)
2570 {"XMLParser", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
2571 {"XMLTreeBuilder", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
2572#endif
2573 {NULL, NULL}
2574};
2575
2576DL_EXPORT(void)
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002577init_elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002578{
2579 PyObject* m;
2580 PyObject* g;
2581 char* bootstrap;
2582#if defined(USE_PYEXPAT_CAPI)
2583 struct PyExpat_CAPI* capi;
2584#endif
2585
2586 /* Patch object type */
2587 Element_Type.ob_type = TreeBuilder_Type.ob_type = &PyType_Type;
2588#if defined(USE_EXPAT)
2589 XMLParser_Type.ob_type = &PyType_Type;
2590#endif
2591
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002592 m = Py_InitModule("_elementtree", _functions);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002593
2594 /* python glue code */
2595
2596 g = PyDict_New();
2597
2598 PyDict_SetItemString(g, "__builtins__", PyEval_GetBuiltins());
2599
2600 bootstrap = (
2601
2602#if (PY_VERSION_HEX >= 0x02020000 && PY_VERSION_HEX < 0x02030000)
2603 "from __future__ import generators\n" /* enable yield under 2.2 */
2604#endif
2605
2606 "from copy import copy, deepcopy\n"
2607
2608 "try:\n"
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002609 " from xml.etree import ElementTree\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002610 "except ImportError:\n"
2611 " import ElementTree\n"
2612 "ET = ElementTree\n"
2613 "del ElementTree\n"
2614
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002615 "import _elementtree as cElementTree\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002616
2617 "try:\n" /* check if copy works as is */
2618 " copy(cElementTree.Element('x'))\n"
2619 "except:\n"
2620 " def copyelement(elem):\n"
2621 " return elem\n"
2622
2623 "def Comment(text=None):\n" /* public */
2624 " element = cElementTree.Element(ET.Comment)\n"
2625 " element.text = text\n"
2626 " return element\n"
2627 "cElementTree.Comment = Comment\n"
2628
2629 "class ElementTree(ET.ElementTree):\n" /* public */
2630 " def parse(self, source, parser=None):\n"
2631 " if not hasattr(source, 'read'):\n"
2632 " source = open(source, 'rb')\n"
2633 " if parser is not None:\n"
2634 " while 1:\n"
2635 " data = source.read(65536)\n"
2636 " if not data:\n"
2637 " break\n"
2638 " parser.feed(data)\n"
2639 " self._root = parser.close()\n"
2640 " else:\n"
2641 " parser = cElementTree.XMLParser()\n"
2642 " self._root = parser._parse(source)\n"
2643 " return self._root\n"
2644 "cElementTree.ElementTree = ElementTree\n"
2645
2646 "def getiterator(node, tag=None):\n" /* helper */
2647 " if tag == '*':\n"
2648 " tag = None\n"
2649#if (PY_VERSION_HEX < 0x02020000)
2650 " nodes = []\n" /* 2.1 doesn't have yield */
2651 " if tag is None or node.tag == tag:\n"
2652 " nodes.append(node)\n"
2653 " for node in node:\n"
2654 " nodes.extend(getiterator(node, tag))\n"
2655 " return nodes\n"
2656#else
2657 " if tag is None or node.tag == tag:\n"
2658 " yield node\n"
2659 " for node in node:\n"
2660 " for node in getiterator(node, tag):\n"
2661 " yield node\n"
2662#endif
2663
2664 "def parse(source, parser=None):\n" /* public */
2665 " tree = ElementTree()\n"
2666 " tree.parse(source, parser)\n"
2667 " return tree\n"
2668 "cElementTree.parse = parse\n"
2669
2670#if (PY_VERSION_HEX < 0x02020000)
2671 "if hasattr(ET, 'iterparse'):\n"
2672 " cElementTree.iterparse = ET.iterparse\n" /* delegate on 2.1 */
2673#else
2674 "class iterparse(object):\n"
2675 " root = None\n"
2676 " def __init__(self, file, events=None):\n"
2677 " if not hasattr(file, 'read'):\n"
2678 " file = open(file, 'rb')\n"
2679 " self._file = file\n"
2680 " self._events = events\n"
2681 " def __iter__(self):\n"
2682 " events = []\n"
2683 " b = cElementTree.TreeBuilder()\n"
2684 " p = cElementTree.XMLParser(b)\n"
2685 " p._setevents(events, self._events)\n"
2686 " while 1:\n"
2687 " data = self._file.read(16384)\n"
2688 " if not data:\n"
2689 " break\n"
2690 " p.feed(data)\n"
2691 " for event in events:\n"
2692 " yield event\n"
2693 " del events[:]\n"
2694 " root = p.close()\n"
2695 " for event in events:\n"
2696 " yield event\n"
2697 " self.root = root\n"
2698 "cElementTree.iterparse = iterparse\n"
2699#endif
2700
2701 "def PI(target, text=None):\n" /* public */
2702 " element = cElementTree.Element(ET.ProcessingInstruction)\n"
2703 " element.text = target\n"
2704 " if text:\n"
2705 " element.text = element.text + ' ' + text\n"
2706 " return element\n"
2707
2708 " elem = cElementTree.Element(ET.PI)\n"
2709 " elem.text = text\n"
2710 " return elem\n"
2711 "cElementTree.PI = cElementTree.ProcessingInstruction = PI\n"
2712
2713 "def XML(text):\n" /* public */
2714 " parser = cElementTree.XMLParser()\n"
2715 " parser.feed(text)\n"
2716 " return parser.close()\n"
2717 "cElementTree.XML = cElementTree.fromstring = XML\n"
2718
2719 "def XMLID(text):\n" /* public */
2720 " tree = XML(text)\n"
2721 " ids = {}\n"
2722 " for elem in tree.getiterator():\n"
2723 " id = elem.get('id')\n"
2724 " if id:\n"
2725 " ids[id] = elem\n"
2726 " return tree, ids\n"
2727 "cElementTree.XMLID = XMLID\n"
2728
2729 "cElementTree.dump = ET.dump\n"
2730 "cElementTree.ElementPath = ElementPath = ET.ElementPath\n"
2731 "cElementTree.iselement = ET.iselement\n"
2732 "cElementTree.QName = ET.QName\n"
2733 "cElementTree.tostring = ET.tostring\n"
2734 "cElementTree.VERSION = '" VERSION "'\n"
2735 "cElementTree.__version__ = '" VERSION "'\n"
2736 "cElementTree.XMLParserError = SyntaxError\n"
2737
2738 );
2739
2740 PyRun_String(bootstrap, Py_file_input, g, NULL);
2741
2742 elementpath_obj = PyDict_GetItemString(g, "ElementPath");
2743
2744 elementtree_copyelement_obj = PyDict_GetItemString(g, "copyelement");
2745 if (elementtree_copyelement_obj) {
2746 /* reduce hack needed; enable reduce method */
2747 PyMethodDef* mp;
2748 for (mp = element_methods; mp->ml_name; mp++)
2749 if (mp->ml_meth == (PyCFunction) element_reduce) {
2750 mp->ml_name = "__reduce__";
2751 break;
2752 }
2753 } else
2754 PyErr_Clear();
2755 elementtree_deepcopy_obj = PyDict_GetItemString(g, "deepcopy");
2756 elementtree_getiterator_obj = PyDict_GetItemString(g, "getiterator");
2757
2758#if defined(USE_PYEXPAT_CAPI)
2759 /* link against pyexpat, if possible */
2760 capi = PyCObject_Import("pyexpat", "expat_CAPI");
2761 if (capi &&
2762 strcmp(capi->magic, PyExpat_CAPI_MAGIC) == 0 &&
2763 capi->size <= sizeof(*expat_capi) &&
2764 capi->MAJOR_VERSION == XML_MAJOR_VERSION &&
2765 capi->MINOR_VERSION == XML_MINOR_VERSION &&
2766 capi->MICRO_VERSION == XML_MICRO_VERSION)
2767 expat_capi = capi;
2768 else
2769 expat_capi = NULL;
2770#endif
2771
2772}