blob: a43fe2ec33b9433e0dc565a1382559312a69c80b [file] [log] [blame]
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001/*
2 * ElementTree
3 * $Id: /work/modules/celementtree/cElementTree.c 1128 2005-12-16T21:57:13.668520Z Fredrik $
4 *
5 * elementtree accelerator
6 *
7 * History:
8 * 1999-06-20 fl created (as part of sgmlop)
9 * 2001-05-29 fl effdom edition
10 * 2001-06-05 fl backported to unix; fixed bogus free in clear
11 * 2001-07-10 fl added findall helper
12 * 2003-02-27 fl elementtree edition (alpha)
13 * 2004-06-03 fl updates for elementtree 1.2
14 * 2005-01-05 fl added universal name cache, Element/SubElement factories
15 * 2005-01-06 fl moved python helpers into C module; removed 1.5.2 support
16 * 2005-01-07 fl added 2.1 support; work around broken __copy__ in 2.3
17 * 2005-01-08 fl added makeelement method; fixed path support
18 * 2005-01-10 fl optimized memory usage
19 * 2005-01-11 fl first public release (cElementTree 0.8)
20 * 2005-01-12 fl split element object into base and extras
21 * 2005-01-13 fl use tagged pointers for tail/text (cElementTree 0.9)
22 * 2005-01-17 fl added treebuilder close method
23 * 2005-01-17 fl fixed crash in getchildren
24 * 2005-01-18 fl removed observer api, added iterparse (cElementTree 0.9.3)
25 * 2005-01-23 fl revised iterparse api; added namespace event support (0.9.8)
26 * 2005-01-26 fl added VERSION module property (cElementTree 1.0)
27 * 2005-01-28 fl added remove method (1.0.1)
28 * 2005-03-01 fl added iselement function; fixed makeelement aliasing (1.0.2)
29 * 2005-03-13 fl export Comment and ProcessingInstruction/PI helpers
30 * 2005-03-26 fl added Comment and PI support to XMLParser
31 * 2005-03-27 fl event optimizations; complain about bogus events
32 * 2005-08-08 fl fixed read error handling in parse
33 * 2005-08-11 fl added runtime test for copy workaround (1.0.3)
34 * 2005-12-13 fl added expat_capi support (for xml.etree) (1.0.4)
35 * 2005-12-16 fl added support for non-standard encodings
36 *
37 * Copyright (c) 1999-2005 by Secret Labs AB. All rights reserved.
38 * Copyright (c) 1999-2005 by Fredrik Lundh.
39 *
40 * info@pythonware.com
41 * http://www.pythonware.com
42 */
43
Fredrik Lundh6d52b552005-12-16 22:06:43 +000044/* Licensed to PSF under a Contributor Agreement. */
45/* See http://www.python.org/2.4/license for licensing details. */
46
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000047#include "Python.h"
48
49#define VERSION "1.0.5"
50
51/* -------------------------------------------------------------------- */
52/* configuration */
53
54/* Leave defined to include the expat-based XMLParser type */
55#define USE_EXPAT
56
57/* Define to to all expat calls via pyexpat's embedded expat library */
58/* #define USE_PYEXPAT_CAPI */
59
60/* An element can hold this many children without extra memory
61 allocations. */
62#define STATIC_CHILDREN 4
63
64/* For best performance, chose a value so that 80-90% of all nodes
65 have no more than the given number of children. Set this to zero
66 to minimize the size of the element structure itself (this only
67 helps if you have lots of leaf nodes with attributes). */
68
69/* Also note that pymalloc always allocates blocks in multiples of
70 eight bytes. For the current version of cElementTree, this means
71 that the number of children should be an even number, at least on
72 32-bit platforms. */
73
74/* -------------------------------------------------------------------- */
75
76#if 0
77static int memory = 0;
78#define ALLOC(size, comment)\
79do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
80#define RELEASE(size, comment)\
81do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
82#else
83#define ALLOC(size, comment)
84#define RELEASE(size, comment)
85#endif
86
87/* compiler tweaks */
88#if defined(_MSC_VER)
89#define LOCAL(type) static __inline type __fastcall
90#else
91#define LOCAL(type) static type
92#endif
93
94/* compatibility macros */
95#if (PY_VERSION_HEX < 0x02040000)
96#define PyDict_CheckExact PyDict_Check
97#if (PY_VERSION_HEX < 0x02020000)
98#define PyList_CheckExact PyList_Check
99#define PyString_CheckExact PyString_Check
100#if (PY_VERSION_HEX >= 0x01060000)
101#define Py_USING_UNICODE /* always enabled for 2.0 and 2.1 */
102#endif
103#endif
104#endif
105
106#if (PY_VERSION_HEX >= 0x02050000)
107#define PY_CONST const /* 2.5 adds const to some API:s */
108#else
109#define PY_CONST
110#endif
111
112#if !defined(Py_RETURN_NONE)
113#define Py_RETURN_NONE return Py_INCREF(Py_None), Py_None
114#endif
115
116/* macros used to store 'join' flags in string object pointers. note
117 that all use of text and tail as object pointers must be wrapped in
118 JOIN_OBJ. see comments in the ElementObject definition for more
119 info. */
120#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
121#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
122#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~1))
123
124/* glue functions (see the init function for details) */
125static PyObject* elementtree_copyelement_obj;
126static PyObject* elementtree_deepcopy_obj;
127static PyObject* elementtree_getiterator_obj;
128static PyObject* elementpath_obj;
129
130/* helpers */
131
132LOCAL(PyObject*)
133deepcopy(PyObject* object, PyObject* memo)
134{
135 /* do a deep copy of the given object */
136
137 PyObject* args;
138 PyObject* result;
139
140 if (!elementtree_deepcopy_obj) {
141 PyErr_SetString(
142 PyExc_RuntimeError,
143 "deepcopy helper not found"
144 );
145 return NULL;
146 }
147
148 args = PyTuple_New(2);
149 Py_INCREF(object); PyTuple_SET_ITEM(args, 0, (PyObject*) object);
150 Py_INCREF(memo); PyTuple_SET_ITEM(args, 1, (PyObject*) memo);
151
152 result = PyObject_CallObject(elementtree_deepcopy_obj, args);
153
154 Py_DECREF(args);
155
156 return result;
157}
158
159LOCAL(PyObject*)
160list_join(PyObject* list)
161{
162 /* join list elements (destroying the list in the process) */
163
164 PyObject* joiner;
165 PyObject* function;
166 PyObject* args;
167 PyObject* result;
168
169 switch (PyList_GET_SIZE(list)) {
170 case 0:
171 Py_DECREF(list);
172 return PyString_FromString("");
173 case 1:
174 result = PyList_GET_ITEM(list, 0);
175 Py_INCREF(result);
176 Py_DECREF(list);
177 return result;
178 }
179
180 /* two or more elements: slice out a suitable separator from the
181 first member, and use that to join the entire list */
182
183 joiner = PySequence_GetSlice(PyList_GET_ITEM(list, 0), 0, 0);
184 if (!joiner)
185 return NULL;
186
187 function = PyObject_GetAttrString(joiner, "join");
188 if (!function) {
189 Py_DECREF(joiner);
190 return NULL;
191 }
192
193 args = PyTuple_New(1);
194 PyTuple_SET_ITEM(args, 0, list);
195
196 result = PyObject_CallObject(function, args);
197
198 Py_DECREF(args); /* also removes list */
199 Py_DECREF(function);
200 Py_DECREF(joiner);
201
202 return result;
203}
204
205#if (PY_VERSION_HEX < 0x02020000)
206LOCAL(int)
207PyDict_Update(PyObject* dict, PyObject* other)
208{
209 /* PyDict_Update emulation for 2.1 and earlier */
210
211 PyObject* res;
212
213 res = PyObject_CallMethod(dict, "update", "O", other);
214 if (!res)
215 return -1;
216
217 Py_DECREF(res);
218 return 0;
219}
220#endif
221
222/* -------------------------------------------------------------------- */
223/* the element type */
224
225typedef struct {
226
227 /* attributes (a dictionary object), or None if no attributes */
228 PyObject* attrib;
229
230 /* child elements */
231 int length; /* actual number of items */
232 int allocated; /* allocated items */
233
234 /* this either points to _children or to a malloced buffer */
235 PyObject* *children;
236
237 PyObject* _children[STATIC_CHILDREN];
238
239} ElementObjectExtra;
240
241typedef struct {
242 PyObject_HEAD
243
244 /* element tag (a string). */
245 PyObject* tag;
246
247 /* text before first child. note that this is a tagged pointer;
248 use JOIN_OBJ to get the object pointer. the join flag is used
249 to distinguish lists created by the tree builder from lists
250 assigned to the attribute by application code; the former
251 should be joined before being returned to the user, the latter
252 should be left intact. */
253 PyObject* text;
254
255 /* text after this element, in parent. note that this is a tagged
256 pointer; use JOIN_OBJ to get the object pointer. */
257 PyObject* tail;
258
259 ElementObjectExtra* extra;
260
261} ElementObject;
262
263staticforward PyTypeObject Element_Type;
264
265#define Element_CheckExact(op) ((op)->ob_type == &Element_Type)
266
267/* -------------------------------------------------------------------- */
268/* element constructor and destructor */
269
270LOCAL(int)
271element_new_extra(ElementObject* self, PyObject* attrib)
272{
273 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
274 if (!self->extra)
275 return -1;
276
277 if (!attrib)
278 attrib = Py_None;
279
280 Py_INCREF(attrib);
281 self->extra->attrib = attrib;
282
283 self->extra->length = 0;
284 self->extra->allocated = STATIC_CHILDREN;
285 self->extra->children = self->extra->_children;
286
287 return 0;
288}
289
290LOCAL(void)
291element_dealloc_extra(ElementObject* self)
292{
293 int i;
294
295 Py_DECREF(self->extra->attrib);
296
297 for (i = 0; i < self->extra->length; i++)
298 Py_DECREF(self->extra->children[i]);
299
300 if (self->extra->children != self->extra->_children)
301 PyObject_Free(self->extra->children);
302
303 PyObject_Free(self->extra);
304}
305
306LOCAL(PyObject*)
307element_new(PyObject* tag, PyObject* attrib)
308{
309 ElementObject* self;
310
311 self = PyObject_New(ElementObject, &Element_Type);
312 if (self == NULL)
313 return NULL;
314
315 /* use None for empty dictionaries */
316 if (PyDict_CheckExact(attrib) && !PyDict_Size(attrib))
317 attrib = Py_None;
318
319 self->extra = NULL;
320
321 if (attrib != Py_None) {
322
323 if (element_new_extra(self, attrib) < 0)
324 return NULL;
325
326 self->extra->length = 0;
327 self->extra->allocated = STATIC_CHILDREN;
328 self->extra->children = self->extra->_children;
329
330 }
331
332 Py_INCREF(tag);
333 self->tag = tag;
334
335 Py_INCREF(Py_None);
336 self->text = Py_None;
337
338 Py_INCREF(Py_None);
339 self->tail = Py_None;
340
341 ALLOC(sizeof(ElementObject), "create element");
342
343 return (PyObject*) self;
344}
345
346LOCAL(int)
347element_resize(ElementObject* self, int extra)
348{
349 int size;
350 PyObject* *children;
351
352 /* make sure self->children can hold the given number of extra
353 elements. set an exception and return -1 if allocation failed */
354
355 if (!self->extra)
356 element_new_extra(self, NULL);
357
358 size = self->extra->length + extra;
359
360 if (size > self->extra->allocated) {
361 /* use Python 2.4's list growth strategy */
362 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
363 if (self->extra->children != self->extra->_children) {
364 children = PyObject_Realloc(self->extra->children,
365 size * sizeof(PyObject*));
366 if (!children)
367 goto nomemory;
368 } else {
369 children = PyObject_Malloc(size * sizeof(PyObject*));
370 if (!children)
371 goto nomemory;
372 /* copy existing children from static area to malloc buffer */
373 memcpy(children, self->extra->children,
374 self->extra->length * sizeof(PyObject*));
375 }
376 self->extra->children = children;
377 self->extra->allocated = size;
378 }
379
380 return 0;
381
382 nomemory:
383 PyErr_NoMemory();
384 return -1;
385}
386
387LOCAL(int)
388element_add_subelement(ElementObject* self, PyObject* element)
389{
390 /* add a child element to a parent */
391
392 if (element_resize(self, 1) < 0)
393 return -1;
394
395 Py_INCREF(element);
396 self->extra->children[self->extra->length] = element;
397
398 self->extra->length++;
399
400 return 0;
401}
402
403LOCAL(PyObject*)
404element_get_attrib(ElementObject* self)
405{
406 /* return borrowed reference to attrib dictionary */
407 /* note: this function assumes that the extra section exists */
408
409 PyObject* res = self->extra->attrib;
410
411 if (res == Py_None) {
412 /* create missing dictionary */
413 res = PyDict_New();
414 if (!res)
415 return NULL;
416 self->extra->attrib = res;
417 }
418
419 return res;
420}
421
422LOCAL(PyObject*)
423element_get_text(ElementObject* self)
424{
425 /* return borrowed reference to text attribute */
426
427 PyObject* res = self->text;
428
429 if (JOIN_GET(res)) {
430 res = JOIN_OBJ(res);
431 if (PyList_CheckExact(res)) {
432 res = list_join(res);
433 if (!res)
434 return NULL;
435 self->text = res;
436 }
437 }
438
439 return res;
440}
441
442LOCAL(PyObject*)
443element_get_tail(ElementObject* self)
444{
445 /* return borrowed reference to text attribute */
446
447 PyObject* res = self->tail;
448
449 if (JOIN_GET(res)) {
450 res = JOIN_OBJ(res);
451 if (PyList_CheckExact(res)) {
452 res = list_join(res);
453 if (!res)
454 return NULL;
455 self->tail = res;
456 }
457 }
458
459 return res;
460}
461
462static PyObject*
463element(PyObject* self, PyObject* args, PyObject* kw)
464{
465 PyObject* elem;
466
467 PyObject* tag;
468 PyObject* attrib = NULL;
469 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag,
470 &PyDict_Type, &attrib))
471 return NULL;
472
473 if (attrib || kw) {
474 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
475 if (!attrib)
476 return NULL;
477 if (kw)
478 PyDict_Update(attrib, kw);
479 } else {
480 Py_INCREF(Py_None);
481 attrib = Py_None;
482 }
483
484 elem = element_new(tag, attrib);
485
486 Py_DECREF(attrib);
487
488 return elem;
489}
490
491static PyObject*
492subelement(PyObject* self, PyObject* args, PyObject* kw)
493{
494 PyObject* elem;
495
496 ElementObject* parent;
497 PyObject* tag;
498 PyObject* attrib = NULL;
499 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
500 &Element_Type, &parent, &tag,
501 &PyDict_Type, &attrib))
502 return NULL;
503
504 if (attrib || kw) {
505 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
506 if (!attrib)
507 return NULL;
508 if (kw)
509 PyDict_Update(attrib, kw);
510 } else {
511 Py_INCREF(Py_None);
512 attrib = Py_None;
513 }
514
515 elem = element_new(tag, attrib);
516
517 Py_DECREF(attrib);
518
519 if (element_add_subelement(parent, elem) < 0)
520 return NULL;
521
522 return elem;
523}
524
525static void
526element_dealloc(ElementObject* self)
527{
528 if (self->extra)
529 element_dealloc_extra(self);
530
531 /* discard attributes */
532 Py_DECREF(self->tag);
533 Py_DECREF(JOIN_OBJ(self->text));
534 Py_DECREF(JOIN_OBJ(self->tail));
535
536 RELEASE(sizeof(ElementObject), "destroy element");
537
538 PyObject_Del(self);
539}
540
541/* -------------------------------------------------------------------- */
542/* methods (in alphabetical order) */
543
544static PyObject*
545element_append(ElementObject* self, PyObject* args)
546{
547 PyObject* element;
548 if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element))
549 return NULL;
550
551 if (element_add_subelement(self, element) < 0)
552 return NULL;
553
554 Py_RETURN_NONE;
555}
556
557static PyObject*
558element_clear(ElementObject* self, PyObject* args)
559{
560 if (!PyArg_ParseTuple(args, ":clear"))
561 return NULL;
562
563 if (self->extra) {
564 element_dealloc_extra(self);
565 self->extra = NULL;
566 }
567
568 Py_INCREF(Py_None);
569 Py_DECREF(JOIN_OBJ(self->text));
570 self->text = Py_None;
571
572 Py_INCREF(Py_None);
573 Py_DECREF(JOIN_OBJ(self->tail));
574 self->tail = Py_None;
575
576 Py_RETURN_NONE;
577}
578
579static PyObject*
580element_copy(ElementObject* self, PyObject* args)
581{
582 int i;
583 ElementObject* element;
584
585 if (!PyArg_ParseTuple(args, ":__copy__"))
586 return NULL;
587
588 element = (ElementObject*) element_new(
589 self->tag, (self->extra) ? self->extra->attrib : Py_None
590 );
591 if (!element)
592 return NULL;
593
594 Py_DECREF(JOIN_OBJ(element->text));
595 element->text = self->text;
596 Py_INCREF(JOIN_OBJ(element->text));
597
598 Py_DECREF(JOIN_OBJ(element->tail));
599 element->tail = self->tail;
600 Py_INCREF(JOIN_OBJ(element->tail));
601
602 if (self->extra) {
603
604 if (element_resize(element, self->extra->length) < 0)
605 return NULL;
606
607 for (i = 0; i < self->extra->length; i++) {
608 Py_INCREF(self->extra->children[i]);
609 element->extra->children[i] = self->extra->children[i];
610 }
611
612 element->extra->length = self->extra->length;
613
614 }
615
616 return (PyObject*) element;
617}
618
619static PyObject*
620element_deepcopy(ElementObject* self, PyObject* args)
621{
622 int i;
623 ElementObject* element;
624 PyObject* tag;
625 PyObject* attrib;
626 PyObject* text;
627 PyObject* tail;
628 PyObject* id;
629
630 PyObject* memo;
631 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
632 return NULL;
633
634 tag = deepcopy(self->tag, memo);
635 if (!tag)
636 return NULL;
637
638 if (self->extra) {
639 attrib = deepcopy(self->extra->attrib, memo);
640 if (!attrib) {
641 Py_DECREF(tag);
642 return NULL;
643 }
644 } else {
645 Py_INCREF(Py_None);
646 attrib = Py_None;
647 }
648
649 element = (ElementObject*) element_new(tag, attrib);
650
651 Py_DECREF(tag);
652 Py_DECREF(attrib);
653
654 if (!element)
655 return NULL;
656
657 text = deepcopy(JOIN_OBJ(self->text), memo);
658 if (!text)
659 goto error;
660 Py_DECREF(element->text);
661 element->text = JOIN_SET(text, JOIN_GET(self->text));
662
663 tail = deepcopy(JOIN_OBJ(self->tail), memo);
664 if (!tail)
665 goto error;
666 Py_DECREF(element->tail);
667 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
668
669 if (self->extra) {
670
671 if (element_resize(element, self->extra->length) < 0)
672 goto error;
673
674 for (i = 0; i < self->extra->length; i++) {
675 PyObject* child = deepcopy(self->extra->children[i], memo);
676 if (!child) {
677 element->extra->length = i;
678 goto error;
679 }
680 element->extra->children[i] = child;
681 }
682
683 element->extra->length = self->extra->length;
684
685 }
686
687 /* add object to memo dictionary (so deepcopy won't visit it again) */
688 id = PyInt_FromLong((Py_uintptr_t) self);
689
690 i = PyDict_SetItem(memo, id, (PyObject*) element);
691
692 Py_DECREF(id);
693
694 if (i < 0)
695 goto error;
696
697 return (PyObject*) element;
698
699 error:
700 Py_DECREF(element);
701 return NULL;
702}
703
704LOCAL(int)
705checkpath(PyObject* tag)
706{
707 int i, check = 1;
708
709 /* check if a tag contains an xpath character */
710
711#define PATHCHAR(ch) (ch == '/' || ch == '*' || ch == '[' || ch == '@')
712
713#if defined(Py_USING_UNICODE)
714 if (PyUnicode_Check(tag)) {
715 Py_UNICODE *p = PyUnicode_AS_UNICODE(tag);
716 for (i = 0; i < PyUnicode_GET_SIZE(tag); i++) {
717 if (p[i] == '{')
718 check = 0;
719 else if (p[i] == '}')
720 check = 1;
721 else if (check && PATHCHAR(p[i]))
722 return 1;
723 }
724 return 0;
725 }
726#endif
727 if (PyString_Check(tag)) {
728 char *p = PyString_AS_STRING(tag);
729 for (i = 0; i < PyString_GET_SIZE(tag); i++) {
730 if (p[i] == '{')
731 check = 0;
732 else if (p[i] == '}')
733 check = 1;
734 else if (check && PATHCHAR(p[i]))
735 return 1;
736 }
737 return 0;
738 }
739
740 return 1; /* unknown type; might be path expression */
741}
742
743static PyObject*
744element_find(ElementObject* self, PyObject* args)
745{
746 int i;
747
748 PyObject* tag;
749 if (!PyArg_ParseTuple(args, "O:find", &tag))
750 return NULL;
751
752 if (checkpath(tag))
753 return PyObject_CallMethod(
754 elementpath_obj, "find", "OO", self, tag
755 );
756
757 if (!self->extra)
758 Py_RETURN_NONE;
759
760 for (i = 0; i < self->extra->length; i++) {
761 PyObject* item = self->extra->children[i];
762 if (Element_CheckExact(item) &&
763 PyObject_Compare(((ElementObject*)item)->tag, tag) == 0) {
764 Py_INCREF(item);
765 return item;
766 }
767 }
768
769 Py_RETURN_NONE;
770}
771
772static PyObject*
773element_findtext(ElementObject* self, PyObject* args)
774{
775 int i;
776
777 PyObject* tag;
778 PyObject* default_value = Py_None;
779 if (!PyArg_ParseTuple(args, "O|O:findtext", &tag, &default_value))
780 return NULL;
781
782 if (checkpath(tag))
783 return PyObject_CallMethod(
784 elementpath_obj, "findtext", "OOO", self, tag, default_value
785 );
786
787 if (!self->extra) {
788 Py_INCREF(default_value);
789 return default_value;
790 }
791
792 for (i = 0; i < self->extra->length; i++) {
793 ElementObject* item = (ElementObject*) self->extra->children[i];
794 if (Element_CheckExact(item) && !PyObject_Compare(item->tag, tag)) {
795 PyObject* text = element_get_text(item);
796 if (text == Py_None)
797 return PyString_FromString("");
798 Py_INCREF(text);
799 return text;
800 }
801 }
802
803 Py_INCREF(default_value);
804 return default_value;
805}
806
807static PyObject*
808element_findall(ElementObject* self, PyObject* args)
809{
810 int i;
811 PyObject* out;
812
813 PyObject* tag;
814 if (!PyArg_ParseTuple(args, "O:findall", &tag))
815 return NULL;
816
817 if (checkpath(tag))
818 return PyObject_CallMethod(
819 elementpath_obj, "findall", "OO", self, tag
820 );
821
822 out = PyList_New(0);
823 if (!out)
824 return NULL;
825
826 if (!self->extra)
827 return out;
828
829 for (i = 0; i < self->extra->length; i++) {
830 PyObject* item = self->extra->children[i];
831 if (Element_CheckExact(item) &&
832 PyObject_Compare(((ElementObject*)item)->tag, tag) == 0) {
833 if (PyList_Append(out, item) < 0) {
834 Py_DECREF(out);
835 return NULL;
836 }
837 }
838 }
839
840 return out;
841}
842
843static PyObject*
844element_get(ElementObject* self, PyObject* args)
845{
846 PyObject* value;
847
848 PyObject* key;
849 PyObject* default_value = Py_None;
850 if (!PyArg_ParseTuple(args, "O|O:get", &key, &default_value))
851 return NULL;
852
853 if (!self->extra || self->extra->attrib == Py_None)
854 value = default_value;
855 else {
856 value = PyDict_GetItem(self->extra->attrib, key);
857 if (!value)
858 value = default_value;
859 }
860
861 Py_INCREF(value);
862 return value;
863}
864
865static PyObject*
866element_getchildren(ElementObject* self, PyObject* args)
867{
868 int i;
869 PyObject* list;
870
871 if (!PyArg_ParseTuple(args, ":getchildren"))
872 return NULL;
873
874 if (!self->extra)
875 return PyList_New(0);
876
877 list = PyList_New(self->extra->length);
878 if (!list)
879 return NULL;
880
881 for (i = 0; i < self->extra->length; i++) {
882 PyObject* item = self->extra->children[i];
883 Py_INCREF(item);
884 PyList_SET_ITEM(list, i, item);
885 }
886
887 return list;
888}
889
890static PyObject*
891element_getiterator(ElementObject* self, PyObject* args)
892{
893 PyObject* result;
894
895 PyObject* tag = Py_None;
896 if (!PyArg_ParseTuple(args, "|O:getiterator", &tag))
897 return NULL;
898
899 if (!elementtree_getiterator_obj) {
900 PyErr_SetString(
901 PyExc_RuntimeError,
902 "getiterator helper not found"
903 );
904 return NULL;
905 }
906
907 args = PyTuple_New(2);
Neal Norwitz02876df2006-02-07 06:58:52 +0000908 if (args == NULL)
909 return NULL;
910
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000911 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
912 Py_INCREF(tag); PyTuple_SET_ITEM(args, 1, (PyObject*) tag);
913
914 result = PyObject_CallObject(elementtree_getiterator_obj, args);
915
916 Py_DECREF(args);
917
918 return result;
919}
920
921static PyObject*
922element_getitem(ElementObject* self, int index)
923{
924 if (!self->extra || index < 0 || index >= self->extra->length) {
925 PyErr_SetString(
926 PyExc_IndexError,
927 "child index out of range"
928 );
929 return NULL;
930 }
931
932 Py_INCREF(self->extra->children[index]);
933 return self->extra->children[index];
934}
935
936static PyObject*
937element_getslice(ElementObject* self, int start, int end)
938{
939 int i;
940 PyObject* list;
941
942 if (!self->extra)
943 return PyList_New(0);
944
945 /* standard clamping */
946 if (start < 0)
947 start = 0;
948 if (end < 0)
949 end = 0;
950 if (end > self->extra->length)
951 end = self->extra->length;
952 if (start > end)
953 start = end;
954
955 list = PyList_New(end - start);
956 if (!list)
957 return NULL;
958
959 for (i = start; i < end; i++) {
960 PyObject* item = self->extra->children[i];
961 Py_INCREF(item);
962 PyList_SET_ITEM(list, i - start, item);
963 }
964
965 return list;
966}
967
968static PyObject*
969element_insert(ElementObject* self, PyObject* args)
970{
971 int i;
972
973 int index;
974 PyObject* element;
975 if (!PyArg_ParseTuple(args, "iO!:insert", &index,
976 &Element_Type, &element))
977 return NULL;
978
979 if (!self->extra)
980 element_new_extra(self, NULL);
981
982 if (index < 0)
983 index = 0;
984 if (index > self->extra->length)
985 index = self->extra->length;
986
987 if (element_resize(self, 1) < 0)
988 return NULL;
989
990 for (i = self->extra->length; i > index; i--)
991 self->extra->children[i] = self->extra->children[i-1];
992
993 Py_INCREF(element);
994 self->extra->children[index] = element;
995
996 self->extra->length++;
997
998 Py_RETURN_NONE;
999}
1000
1001static PyObject*
1002element_items(ElementObject* self, PyObject* args)
1003{
1004 if (!PyArg_ParseTuple(args, ":items"))
1005 return NULL;
1006
1007 if (!self->extra || self->extra->attrib == Py_None)
1008 return PyList_New(0);
1009
1010 return PyDict_Items(self->extra->attrib);
1011}
1012
1013static PyObject*
1014element_keys(ElementObject* self, PyObject* args)
1015{
1016 if (!PyArg_ParseTuple(args, ":keys"))
1017 return NULL;
1018
1019 if (!self->extra || self->extra->attrib == Py_None)
1020 return PyList_New(0);
1021
1022 return PyDict_Keys(self->extra->attrib);
1023}
1024
1025static int
1026element_length(ElementObject* self)
1027{
1028 if (!self->extra)
1029 return 0;
1030
1031 return self->extra->length;
1032}
1033
1034static PyObject*
1035element_makeelement(PyObject* self, PyObject* args, PyObject* kw)
1036{
1037 PyObject* elem;
1038
1039 PyObject* tag;
1040 PyObject* attrib;
1041 if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib))
1042 return NULL;
1043
1044 attrib = PyDict_Copy(attrib);
1045 if (!attrib)
1046 return NULL;
1047
1048 elem = element_new(tag, attrib);
1049
1050 Py_DECREF(attrib);
1051
1052 return elem;
1053}
1054
1055static PyObject*
1056element_reduce(ElementObject* self, PyObject* args)
1057{
1058 if (!PyArg_ParseTuple(args, ":__reduce__"))
1059 return NULL;
1060
1061 /* Hack alert: This method is used to work around a __copy__
1062 problem on certain 2.3 and 2.4 versions. To save time and
1063 simplify the code, we create the copy in here, and use a dummy
1064 copyelement helper to trick the copy module into doing the
1065 right thing. */
1066
1067 if (!elementtree_copyelement_obj) {
1068 PyErr_SetString(
1069 PyExc_RuntimeError,
1070 "copyelement helper not found"
1071 );
1072 return NULL;
1073 }
1074
1075 return Py_BuildValue(
1076 "O(N)", elementtree_copyelement_obj, element_copy(self, args)
1077 );
1078}
1079
1080static PyObject*
1081element_remove(ElementObject* self, PyObject* args)
1082{
1083 int i;
1084
1085 PyObject* element;
1086 if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element))
1087 return NULL;
1088
1089 if (!self->extra) {
1090 /* element has no children, so raise exception */
1091 PyErr_SetString(
1092 PyExc_ValueError,
1093 "list.remove(x): x not in list"
1094 );
1095 return NULL;
1096 }
1097
1098 for (i = 0; i < self->extra->length; i++) {
1099 if (self->extra->children[i] == element)
1100 break;
1101 if (PyObject_Compare(self->extra->children[i], element) == 0)
1102 break;
1103 }
1104
1105 if (i == self->extra->length) {
1106 /* element is not in children, so raise exception */
1107 PyErr_SetString(
1108 PyExc_ValueError,
1109 "list.remove(x): x not in list"
1110 );
1111 return NULL;
1112 }
1113
1114 Py_DECREF(self->extra->children[i]);
1115
1116 self->extra->length--;
1117
1118 for (; i < self->extra->length; i++)
1119 self->extra->children[i] = self->extra->children[i+1];
1120
1121 Py_RETURN_NONE;
1122}
1123
1124static PyObject*
1125element_repr(ElementObject* self)
1126{
1127 PyObject* repr;
1128 char buffer[100];
1129
1130 repr = PyString_FromString("<Element ");
1131
1132 PyString_ConcatAndDel(&repr, PyObject_Repr(self->tag));
1133
1134 sprintf(buffer, " at %p>", self);
1135 PyString_ConcatAndDel(&repr, PyString_FromString(buffer));
1136
1137 return repr;
1138}
1139
1140static PyObject*
1141element_set(ElementObject* self, PyObject* args)
1142{
1143 PyObject* attrib;
1144
1145 PyObject* key;
1146 PyObject* value;
1147 if (!PyArg_ParseTuple(args, "OO:set", &key, &value))
1148 return NULL;
1149
1150 if (!self->extra)
1151 element_new_extra(self, NULL);
1152
1153 attrib = element_get_attrib(self);
1154 if (!attrib)
1155 return NULL;
1156
1157 if (PyDict_SetItem(attrib, key, value) < 0)
1158 return NULL;
1159
1160 Py_RETURN_NONE;
1161}
1162
1163static int
1164element_setslice(ElementObject* self, int start, int end, PyObject* item)
1165{
1166 int i, new, old;
1167 PyObject* recycle = NULL;
1168
1169 if (!self->extra)
1170 element_new_extra(self, NULL);
1171
1172 /* standard clamping */
1173 if (start < 0)
1174 start = 0;
1175 if (end < 0)
1176 end = 0;
1177 if (end > self->extra->length)
1178 end = self->extra->length;
1179 if (start > end)
1180 start = end;
1181
1182 old = end - start;
1183
1184 if (item == NULL)
1185 new = 0;
1186 else if (PyList_CheckExact(item)) {
1187 new = PyList_GET_SIZE(item);
1188 } else {
1189 /* FIXME: support arbitrary sequences? */
1190 PyErr_Format(
1191 PyExc_TypeError,
1192 "expected list, not \"%.200s\"", item->ob_type->tp_name
1193 );
1194 return -1;
1195 }
1196
1197 if (old > 0) {
1198 /* to avoid recursive calls to this method (via decref), move
1199 old items to the recycle bin here, and get rid of them when
1200 we're done modifying the element */
1201 recycle = PyList_New(old);
1202 for (i = 0; i < old; i++)
1203 PyList_SET_ITEM(recycle, i, self->extra->children[i + start]);
1204 }
1205
1206 if (new < old) {
1207 /* delete slice */
1208 for (i = end; i < self->extra->length; i++)
1209 self->extra->children[i + new - old] = self->extra->children[i];
1210 } else if (new > old) {
1211 /* insert slice */
1212 if (element_resize(self, new - old) < 0)
1213 return -1;
1214 for (i = self->extra->length-1; i >= end; i--)
1215 self->extra->children[i + new - old] = self->extra->children[i];
1216 }
1217
1218 /* replace the slice */
1219 for (i = 0; i < new; i++) {
1220 PyObject* element = PyList_GET_ITEM(item, i);
1221 Py_INCREF(element);
1222 self->extra->children[i + start] = element;
1223 }
1224
1225 self->extra->length += new - old;
1226
1227 /* discard the recycle bin, and everything in it */
1228 Py_XDECREF(recycle);
1229
1230 return 0;
1231}
1232
1233static int
1234element_setitem(ElementObject* self, int index, PyObject* item)
1235{
1236 int i;
1237 PyObject* old;
1238
1239 if (!self->extra || index < 0 || index >= self->extra->length) {
1240 PyErr_SetString(
1241 PyExc_IndexError,
1242 "child assignment index out of range");
1243 return -1;
1244 }
1245
1246 old = self->extra->children[index];
1247
1248 if (item) {
1249 Py_INCREF(item);
1250 self->extra->children[index] = item;
1251 } else {
1252 self->extra->length--;
1253 for (i = index; i < self->extra->length; i++)
1254 self->extra->children[i] = self->extra->children[i+1];
1255 }
1256
1257 Py_DECREF(old);
1258
1259 return 0;
1260}
1261
1262static PyMethodDef element_methods[] = {
1263
1264 {"clear", (PyCFunction) element_clear, METH_VARARGS},
1265
1266 {"get", (PyCFunction) element_get, METH_VARARGS},
1267 {"set", (PyCFunction) element_set, METH_VARARGS},
1268
1269 {"find", (PyCFunction) element_find, METH_VARARGS},
1270 {"findtext", (PyCFunction) element_findtext, METH_VARARGS},
1271 {"findall", (PyCFunction) element_findall, METH_VARARGS},
1272
1273 {"append", (PyCFunction) element_append, METH_VARARGS},
1274 {"insert", (PyCFunction) element_insert, METH_VARARGS},
1275 {"remove", (PyCFunction) element_remove, METH_VARARGS},
1276
1277 {"getiterator", (PyCFunction) element_getiterator, METH_VARARGS},
1278 {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS},
1279
1280 {"items", (PyCFunction) element_items, METH_VARARGS},
1281 {"keys", (PyCFunction) element_keys, METH_VARARGS},
1282
1283 {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS},
1284
1285 {"__copy__", (PyCFunction) element_copy, METH_VARARGS},
1286 {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS},
1287
1288 /* Some 2.3 and 2.4 versions do not handle the __copy__ method on
1289 C objects correctly, so we have to fake it using a __reduce__-
1290 based hack (see the element_reduce implementation above for
1291 details). */
1292
1293 /* The behaviour has been changed in 2.3.5 and 2.4.1, so we're
1294 using a runtime test to figure out if we need to fake things
1295 or now (see the init code below). The following entry is
1296 enabled only if the hack is needed. */
1297
1298 {"!__reduce__", (PyCFunction) element_reduce, METH_VARARGS},
1299
1300 {NULL, NULL}
1301};
1302
1303static PyObject*
1304element_getattr(ElementObject* self, char* name)
1305{
1306 PyObject* res;
1307
1308 res = Py_FindMethod(element_methods, (PyObject*) self, name);
1309 if (res)
1310 return res;
1311
1312 PyErr_Clear();
1313
1314 if (strcmp(name, "tag") == 0)
1315 res = self->tag;
1316 else if (strcmp(name, "text") == 0)
1317 res = element_get_text(self);
1318 else if (strcmp(name, "tail") == 0) {
1319 res = element_get_tail(self);
1320 } else if (strcmp(name, "attrib") == 0) {
1321 if (!self->extra)
1322 element_new_extra(self, NULL);
1323 res = element_get_attrib(self);
1324 } else {
1325 PyErr_SetString(PyExc_AttributeError, name);
1326 return NULL;
1327 }
1328
1329 if (!res)
1330 return NULL;
1331
1332 Py_INCREF(res);
1333 return res;
1334}
1335
1336static int
1337element_setattr(ElementObject* self, const char* name, PyObject* value)
1338{
1339 if (value == NULL) {
1340 PyErr_SetString(
1341 PyExc_AttributeError,
1342 "can't delete element attributes"
1343 );
1344 return -1;
1345 }
1346
1347 if (strcmp(name, "tag") == 0) {
1348 Py_DECREF(self->tag);
1349 self->tag = value;
1350 Py_INCREF(self->tag);
1351 } else if (strcmp(name, "text") == 0) {
1352 Py_DECREF(JOIN_OBJ(self->text));
1353 self->text = value;
1354 Py_INCREF(self->text);
1355 } else if (strcmp(name, "tail") == 0) {
1356 Py_DECREF(JOIN_OBJ(self->tail));
1357 self->tail = value;
1358 Py_INCREF(self->tail);
1359 } else if (strcmp(name, "attrib") == 0) {
1360 if (!self->extra)
1361 element_new_extra(self, NULL);
1362 Py_DECREF(self->extra->attrib);
1363 self->extra->attrib = value;
1364 Py_INCREF(self->extra->attrib);
1365 } else {
1366 PyErr_SetString(PyExc_AttributeError, name);
1367 return -1;
1368 }
1369
1370 return 0;
1371}
1372
1373static PySequenceMethods element_as_sequence = {
1374 (inquiry) element_length,
1375 0, /* sq_concat */
1376 0, /* sq_repeat */
1377 (intargfunc) element_getitem,
1378 (intintargfunc) element_getslice,
1379 (intobjargproc) element_setitem,
1380 (intintobjargproc) element_setslice,
1381};
1382
1383statichere PyTypeObject Element_Type = {
1384 PyObject_HEAD_INIT(NULL)
1385 0, "Element", sizeof(ElementObject), 0,
1386 /* methods */
1387 (destructor)element_dealloc, /* tp_dealloc */
1388 0, /* tp_print */
1389 (getattrfunc)element_getattr, /* tp_getattr */
1390 (setattrfunc)element_setattr, /* tp_setattr */
1391 0, /* tp_compare */
1392 (reprfunc)element_repr, /* tp_repr */
1393 0, /* tp_as_number */
1394 &element_as_sequence, /* tp_as_sequence */
1395};
1396
1397/* ==================================================================== */
1398/* the tree builder type */
1399
1400typedef struct {
1401 PyObject_HEAD
1402
1403 PyObject* root; /* root node (first created node) */
1404
1405 ElementObject* this; /* current node */
1406 ElementObject* last; /* most recently created node */
1407
1408 PyObject* data; /* data collector (string or list), or NULL */
1409
1410 PyObject* stack; /* element stack */
1411 int index; /* current stack size (0=empty) */
1412
1413 /* element tracing */
1414 PyObject* events; /* list of events, or NULL if not collecting */
1415 PyObject* start_event_obj; /* event objects (NULL to ignore) */
1416 PyObject* end_event_obj;
1417 PyObject* start_ns_event_obj;
1418 PyObject* end_ns_event_obj;
1419
1420} TreeBuilderObject;
1421
1422staticforward PyTypeObject TreeBuilder_Type;
1423
1424#define TreeBuilder_CheckExact(op) ((op)->ob_type == &TreeBuilder_Type)
1425
1426/* -------------------------------------------------------------------- */
1427/* constructor and destructor */
1428
1429LOCAL(PyObject*)
1430treebuilder_new(void)
1431{
1432 TreeBuilderObject* self;
1433
1434 self = PyObject_New(TreeBuilderObject, &TreeBuilder_Type);
1435 if (self == NULL)
1436 return NULL;
1437
1438 self->root = NULL;
1439
1440 Py_INCREF(Py_None);
1441 self->this = (ElementObject*) Py_None;
1442
1443 Py_INCREF(Py_None);
1444 self->last = (ElementObject*) Py_None;
1445
1446 self->data = NULL;
1447
1448 self->stack = PyList_New(20);
1449 self->index = 0;
1450
1451 self->events = NULL;
1452 self->start_event_obj = self->end_event_obj = NULL;
1453 self->start_ns_event_obj = self->end_ns_event_obj = NULL;
1454
1455 ALLOC(sizeof(TreeBuilderObject), "create treebuilder");
1456
1457 return (PyObject*) self;
1458}
1459
1460static PyObject*
1461treebuilder(PyObject* _self, PyObject* args)
1462{
1463 if (!PyArg_ParseTuple(args, ":TreeBuilder"))
1464 return NULL;
1465
1466 return treebuilder_new();
1467}
1468
1469static void
1470treebuilder_dealloc(TreeBuilderObject* self)
1471{
1472 Py_XDECREF(self->end_ns_event_obj);
1473 Py_XDECREF(self->start_ns_event_obj);
1474 Py_XDECREF(self->end_event_obj);
1475 Py_XDECREF(self->start_event_obj);
1476 Py_XDECREF(self->events);
1477 Py_DECREF(self->stack);
1478 Py_XDECREF(self->data);
1479 Py_DECREF(self->last);
1480 Py_DECREF(self->this);
1481 Py_XDECREF(self->root);
1482
1483 RELEASE(sizeof(TreeBuilderObject), "destroy treebuilder");
1484
1485 PyObject_Del(self);
1486}
1487
1488/* -------------------------------------------------------------------- */
1489/* handlers */
1490
1491LOCAL(PyObject*)
1492treebuilder_handle_xml(TreeBuilderObject* self, PyObject* encoding,
1493 PyObject* standalone)
1494{
1495 Py_RETURN_NONE;
1496}
1497
1498LOCAL(PyObject*)
1499treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
1500 PyObject* attrib)
1501{
1502 PyObject* node;
1503 PyObject* this;
1504
1505 if (self->data) {
1506 if (self->this == self->last) {
Fredrik Lundh0149e3a2005-12-18 13:58:25 +00001507 Py_DECREF(self->last->text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001508 self->last->text = JOIN_SET(
1509 self->data, PyList_CheckExact(self->data)
1510 );
1511 } else {
Fredrik Lundh0149e3a2005-12-18 13:58:25 +00001512 Py_DECREF(self->last->tail);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001513 self->last->tail = JOIN_SET(
1514 self->data, PyList_CheckExact(self->data)
1515 );
1516 }
1517 self->data = NULL;
1518 }
1519
1520 node = element_new(tag, attrib);
1521 if (!node)
1522 return NULL;
1523
1524 this = (PyObject*) self->this;
1525
1526 if (this != Py_None) {
1527 if (element_add_subelement((ElementObject*) this, node) < 0)
1528 return NULL;
1529 } else {
1530 if (self->root) {
1531 PyErr_SetString(
1532 PyExc_SyntaxError,
1533 "multiple elements on top level"
1534 );
1535 return NULL;
1536 }
1537 Py_INCREF(node);
1538 self->root = node;
1539 }
1540
1541 if (self->index < PyList_GET_SIZE(self->stack)) {
1542 if (PyList_SetItem(self->stack, self->index, this) < 0)
1543 return NULL;
1544 Py_INCREF(this);
1545 } else {
1546 if (PyList_Append(self->stack, this) < 0)
1547 return NULL;
1548 }
1549 self->index++;
1550
1551 Py_DECREF(this);
1552 Py_INCREF(node);
1553 self->this = (ElementObject*) node;
1554
1555 Py_DECREF(self->last);
1556 Py_INCREF(node);
1557 self->last = (ElementObject*) node;
1558
1559 if (self->start_event_obj) {
1560 PyObject* res;
1561 PyObject* action = self->start_event_obj;
1562 res = PyTuple_New(2);
1563 if (res) {
1564 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
1565 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
1566 PyList_Append(self->events, res);
1567 Py_DECREF(res);
1568 } else
1569 PyErr_Clear(); /* FIXME: propagate error */
1570 }
1571
1572 return node;
1573}
1574
1575LOCAL(PyObject*)
1576treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
1577{
1578 if (!self->data) {
1579 /* store the first item as is */
1580 Py_INCREF(data); self->data = data;
1581 } else {
1582 /* more than one item; use a list to collect items */
1583 if (PyString_CheckExact(self->data) && self->data->ob_refcnt == 1 &&
1584 PyString_CheckExact(data) && PyString_GET_SIZE(data) == 1) {
1585 /* expat often generates single character data sections; handle
1586 the most common case by resizing the existing string... */
1587 int size = PyString_GET_SIZE(self->data);
1588 if (_PyString_Resize(&self->data, size + 1) < 0)
1589 return NULL;
1590 PyString_AS_STRING(self->data)[size] = PyString_AS_STRING(data)[0];
1591 } else if (PyList_CheckExact(self->data)) {
1592 if (PyList_Append(self->data, data) < 0)
1593 return NULL;
1594 } else {
1595 PyObject* list = PyList_New(2);
1596 if (!list)
1597 return NULL;
1598 PyList_SET_ITEM(list, 0, self->data);
1599 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
1600 self->data = list;
1601 }
1602 }
1603
1604 Py_RETURN_NONE;
1605}
1606
1607LOCAL(PyObject*)
1608treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
1609{
1610 PyObject* item;
1611
1612 if (self->data) {
1613 if (self->this == self->last) {
Fredrik Lundh0149e3a2005-12-18 13:58:25 +00001614 Py_DECREF(self->last->text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001615 self->last->text = JOIN_SET(
1616 self->data, PyList_CheckExact(self->data)
1617 );
1618 } else {
Fredrik Lundh0149e3a2005-12-18 13:58:25 +00001619 Py_DECREF(self->last->tail);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001620 self->last->tail = JOIN_SET(
1621 self->data, PyList_CheckExact(self->data)
1622 );
1623 }
1624 self->data = NULL;
1625 }
1626
1627 if (self->index == 0) {
1628 PyErr_SetString(
1629 PyExc_IndexError,
1630 "pop from empty stack"
1631 );
1632 return NULL;
1633 }
1634
1635 self->index--;
1636
1637 item = PyList_GET_ITEM(self->stack, self->index);
1638 Py_INCREF(item);
1639
1640 Py_DECREF(self->last);
1641
1642 self->last = (ElementObject*) self->this;
1643 self->this = (ElementObject*) item;
1644
1645 if (self->end_event_obj) {
1646 PyObject* res;
1647 PyObject* action = self->end_event_obj;
1648 PyObject* node = (PyObject*) self->last;
1649 res = PyTuple_New(2);
1650 if (res) {
1651 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
1652 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
1653 PyList_Append(self->events, res);
1654 Py_DECREF(res);
1655 } else
1656 PyErr_Clear(); /* FIXME: propagate error */
1657 }
1658
1659 Py_INCREF(self->last);
1660 return (PyObject*) self->last;
1661}
1662
1663LOCAL(void)
1664treebuilder_handle_namespace(TreeBuilderObject* self, int start,
1665 const char* prefix, const char *uri)
1666{
1667 PyObject* res;
1668 PyObject* action;
1669 PyObject* parcel;
1670
1671 if (!self->events)
1672 return;
1673
1674 if (start) {
1675 if (!self->start_ns_event_obj)
1676 return;
1677 action = self->start_ns_event_obj;
1678 /* FIXME: prefix and uri use utf-8 encoding! */
1679 parcel = Py_BuildValue("ss", (prefix) ? prefix : "", uri);
1680 if (!parcel)
1681 return;
1682 Py_INCREF(action);
1683 } else {
1684 if (!self->end_ns_event_obj)
1685 return;
1686 action = self->end_ns_event_obj;
1687 Py_INCREF(action);
1688 parcel = Py_None;
1689 Py_INCREF(parcel);
1690 }
1691
1692 res = PyTuple_New(2);
1693
1694 if (res) {
1695 PyTuple_SET_ITEM(res, 0, action);
1696 PyTuple_SET_ITEM(res, 1, parcel);
1697 PyList_Append(self->events, res);
1698 Py_DECREF(res);
1699 } else
1700 PyErr_Clear(); /* FIXME: propagate error */
1701}
1702
1703/* -------------------------------------------------------------------- */
1704/* methods (in alphabetical order) */
1705
1706static PyObject*
1707treebuilder_data(TreeBuilderObject* self, PyObject* args)
1708{
1709 PyObject* data;
1710 if (!PyArg_ParseTuple(args, "O:data", &data))
1711 return NULL;
1712
1713 return treebuilder_handle_data(self, data);
1714}
1715
1716static PyObject*
1717treebuilder_end(TreeBuilderObject* self, PyObject* args)
1718{
1719 PyObject* tag;
1720 if (!PyArg_ParseTuple(args, "O:end", &tag))
1721 return NULL;
1722
1723 return treebuilder_handle_end(self, tag);
1724}
1725
1726LOCAL(PyObject*)
1727treebuilder_done(TreeBuilderObject* self)
1728{
1729 PyObject* res;
1730
1731 /* FIXME: check stack size? */
1732
1733 if (self->root)
1734 res = self->root;
1735 else
1736 res = Py_None;
1737
1738 Py_INCREF(res);
1739 return res;
1740}
1741
1742static PyObject*
1743treebuilder_close(TreeBuilderObject* self, PyObject* args)
1744{
1745 if (!PyArg_ParseTuple(args, ":close"))
1746 return NULL;
1747
1748 return treebuilder_done(self);
1749}
1750
1751static PyObject*
1752treebuilder_start(TreeBuilderObject* self, PyObject* args)
1753{
1754 PyObject* tag;
1755 PyObject* attrib = Py_None;
1756 if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib))
1757 return NULL;
1758
1759 return treebuilder_handle_start(self, tag, attrib);
1760}
1761
1762static PyObject*
1763treebuilder_xml(TreeBuilderObject* self, PyObject* args)
1764{
1765 PyObject* encoding;
1766 PyObject* standalone;
1767 if (!PyArg_ParseTuple(args, "OO:xml", &encoding, &standalone))
1768 return NULL;
1769
1770 return treebuilder_handle_xml(self, encoding, standalone);
1771}
1772
1773static PyMethodDef treebuilder_methods[] = {
1774 {"data", (PyCFunction) treebuilder_data, METH_VARARGS},
1775 {"start", (PyCFunction) treebuilder_start, METH_VARARGS},
1776 {"end", (PyCFunction) treebuilder_end, METH_VARARGS},
1777 {"xml", (PyCFunction) treebuilder_xml, METH_VARARGS},
1778 {"close", (PyCFunction) treebuilder_close, METH_VARARGS},
1779 {NULL, NULL}
1780};
1781
1782static PyObject*
1783treebuilder_getattr(TreeBuilderObject* self, char* name)
1784{
1785 return Py_FindMethod(treebuilder_methods, (PyObject*) self, name);
1786}
1787
1788statichere PyTypeObject TreeBuilder_Type = {
1789 PyObject_HEAD_INIT(NULL)
1790 0, "TreeBuilder", sizeof(TreeBuilderObject), 0,
1791 /* methods */
1792 (destructor)treebuilder_dealloc, /* tp_dealloc */
1793 0, /* tp_print */
1794 (getattrfunc)treebuilder_getattr, /* tp_getattr */
1795};
1796
1797/* ==================================================================== */
1798/* the expat interface */
1799
1800#if defined(USE_EXPAT)
1801
1802#include "expat.h"
1803
1804#if defined(USE_PYEXPAT_CAPI)
1805#include "pyexpat.h"
1806static struct PyExpat_CAPI* expat_capi;
1807#define EXPAT(func) (expat_capi->func)
1808#else
1809#define EXPAT(func) (XML_##func)
1810#endif
1811
1812typedef struct {
1813 PyObject_HEAD
1814
1815 XML_Parser parser;
1816
1817 PyObject* target;
1818 PyObject* entity;
1819
1820 PyObject* names;
1821
1822 PyObject* handle_xml;
1823 PyObject* handle_start;
1824 PyObject* handle_data;
1825 PyObject* handle_end;
1826
1827 PyObject* handle_comment;
1828 PyObject* handle_pi;
1829
1830} XMLParserObject;
1831
1832staticforward PyTypeObject XMLParser_Type;
1833
1834/* helpers */
1835
1836#if defined(Py_USING_UNICODE)
1837LOCAL(int)
1838checkstring(const char* string, int size)
1839{
1840 int i;
1841
1842 /* check if an 8-bit string contains UTF-8 characters */
1843 for (i = 0; i < size; i++)
1844 if (string[i] & 0x80)
1845 return 1;
1846
1847 return 0;
1848}
1849#endif
1850
1851LOCAL(PyObject*)
1852makestring(const char* string, int size)
1853{
1854 /* convert a UTF-8 string to either a 7-bit ascii string or a
1855 Unicode string */
1856
1857#if defined(Py_USING_UNICODE)
1858 if (checkstring(string, size))
1859 return PyUnicode_DecodeUTF8(string, size, "strict");
1860#endif
1861
1862 return PyString_FromStringAndSize(string, size);
1863}
1864
1865LOCAL(PyObject*)
1866makeuniversal(XMLParserObject* self, const char* string)
1867{
1868 /* convert a UTF-8 tag/attribute name from the expat parser
1869 to a universal name string */
1870
1871 int size = strlen(string);
1872 PyObject* key;
1873 PyObject* value;
1874
1875 /* look the 'raw' name up in the names dictionary */
1876 key = PyString_FromStringAndSize(string, size);
1877 if (!key)
1878 return NULL;
1879
1880 value = PyDict_GetItem(self->names, key);
1881
1882 if (value) {
1883 Py_INCREF(value);
1884 } else {
1885 /* new name. convert to universal name, and decode as
1886 necessary */
1887
1888 PyObject* tag;
1889 char* p;
1890 int i;
1891
1892 /* look for namespace separator */
1893 for (i = 0; i < size; i++)
1894 if (string[i] == '}')
1895 break;
1896 if (i != size) {
1897 /* convert to universal name */
1898 tag = PyString_FromStringAndSize(NULL, size+1);
1899 p = PyString_AS_STRING(tag);
1900 p[0] = '{';
1901 memcpy(p+1, string, size);
1902 size++;
1903 } else {
1904 /* plain name; use key as tag */
1905 Py_INCREF(key);
1906 tag = key;
1907 }
1908
1909 /* decode universal name */
1910#if defined(Py_USING_UNICODE)
1911 /* inline makestring, to avoid duplicating the source string if
1912 it's not an utf-8 string */
1913 p = PyString_AS_STRING(tag);
1914 if (checkstring(p, size)) {
1915 value = PyUnicode_DecodeUTF8(p, size, "strict");
1916 Py_DECREF(tag);
1917 if (!value) {
1918 Py_DECREF(key);
1919 return NULL;
1920 }
1921 } else
1922#endif
1923 value = tag; /* use tag as is */
1924
1925 /* add to names dictionary */
1926 if (PyDict_SetItem(self->names, key, value) < 0) {
1927 Py_DECREF(key);
1928 Py_DECREF(value);
1929 return NULL;
1930 }
1931 }
1932
1933 Py_DECREF(key);
1934 return value;
1935}
1936
1937/* -------------------------------------------------------------------- */
1938/* handlers */
1939
1940static void
1941expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
1942 int data_len)
1943{
1944 PyObject* key;
1945 PyObject* value;
1946 PyObject* res;
1947
1948 if (data_len < 2 || data_in[0] != '&')
1949 return;
1950
1951 key = makestring(data_in + 1, data_len - 2);
1952 if (!key)
1953 return;
1954
1955 value = PyDict_GetItem(self->entity, key);
1956
1957 if (value) {
1958 if (TreeBuilder_CheckExact(self->target))
1959 res = treebuilder_handle_data(
1960 (TreeBuilderObject*) self->target, value
1961 );
1962 else if (self->handle_data)
1963 res = PyObject_CallFunction(self->handle_data, "O", value);
1964 else
1965 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001966 Py_XDECREF(res);
1967 } else {
1968 PyErr_Format(
1969 PyExc_SyntaxError, "undefined entity &%s;: line %d, column %d",
1970 PyString_AS_STRING(key),
1971 EXPAT(GetErrorLineNumber)(self->parser),
1972 EXPAT(GetErrorColumnNumber)(self->parser)
1973 );
1974 }
1975
1976 Py_DECREF(key);
1977}
1978
1979static void
1980expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
1981 const XML_Char **attrib_in)
1982{
1983 PyObject* res;
1984 PyObject* tag;
1985 PyObject* attrib;
1986 int ok;
1987
1988 /* tag name */
1989 tag = makeuniversal(self, tag_in);
1990 if (!tag)
1991 return; /* parser will look for errors */
1992
1993 /* attributes */
1994 if (attrib_in[0]) {
1995 attrib = PyDict_New();
1996 if (!attrib)
1997 return;
1998 while (attrib_in[0] && attrib_in[1]) {
1999 PyObject* key = makeuniversal(self, attrib_in[0]);
2000 PyObject* value = makestring(attrib_in[1], strlen(attrib_in[1]));
2001 if (!key || !value) {
2002 Py_XDECREF(value);
2003 Py_XDECREF(key);
2004 Py_DECREF(attrib);
2005 return;
2006 }
2007 ok = PyDict_SetItem(attrib, key, value);
2008 Py_DECREF(value);
2009 Py_DECREF(key);
2010 if (ok < 0) {
2011 Py_DECREF(attrib);
2012 return;
2013 }
2014 attrib_in += 2;
2015 }
2016 } else {
2017 Py_INCREF(Py_None);
2018 attrib = Py_None;
2019 }
2020
2021 if (TreeBuilder_CheckExact(self->target))
2022 /* shortcut */
2023 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2024 tag, attrib);
2025 else if (self->handle_start)
2026 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
2027 else
2028 res = NULL;
2029
2030 Py_DECREF(tag);
2031 Py_DECREF(attrib);
2032
2033 Py_XDECREF(res);
2034}
2035
2036static void
2037expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2038 int data_len)
2039{
2040 PyObject* data;
2041 PyObject* res;
2042
2043 data = makestring(data_in, data_len);
2044
2045 if (TreeBuilder_CheckExact(self->target))
2046 /* shortcut */
2047 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
2048 else if (self->handle_data)
2049 res = PyObject_CallFunction(self->handle_data, "O", data);
2050 else
2051 res = NULL;
2052
2053 Py_DECREF(data);
2054
2055 Py_XDECREF(res);
2056}
2057
2058static void
2059expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
2060{
2061 PyObject* tag;
2062 PyObject* res = NULL;
2063
2064 if (TreeBuilder_CheckExact(self->target))
2065 /* shortcut */
2066 /* the standard tree builder doesn't look at the end tag */
2067 res = treebuilder_handle_end(
2068 (TreeBuilderObject*) self->target, Py_None
2069 );
2070 else if (self->handle_end) {
2071 tag = makeuniversal(self, tag_in);
2072 if (tag) {
2073 res = PyObject_CallFunction(self->handle_end, "O", tag);
2074 Py_DECREF(tag);
2075 }
2076 }
2077
2078 Py_XDECREF(res);
2079}
2080
2081static void
2082expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
2083 const XML_Char *uri)
2084{
2085 treebuilder_handle_namespace(
2086 (TreeBuilderObject*) self->target, 1, prefix, uri
2087 );
2088}
2089
2090static void
2091expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
2092{
2093 treebuilder_handle_namespace(
2094 (TreeBuilderObject*) self->target, 0, NULL, NULL
2095 );
2096}
2097
2098static void
2099expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
2100{
2101 PyObject* comment;
2102 PyObject* res;
2103
2104 if (self->handle_comment) {
2105 comment = makestring(comment_in, strlen(comment_in));
2106 if (comment) {
2107 res = PyObject_CallFunction(self->handle_comment, "O", comment);
2108 Py_XDECREF(res);
2109 Py_DECREF(comment);
2110 }
2111 }
2112}
2113
2114static void
2115expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
2116 const XML_Char* data_in)
2117{
2118 PyObject* target;
2119 PyObject* data;
2120 PyObject* res;
2121
2122 if (self->handle_pi) {
2123 target = makestring(target_in, strlen(target_in));
2124 data = makestring(data_in, strlen(data_in));
2125 if (target && data) {
2126 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
2127 Py_XDECREF(res);
2128 Py_DECREF(data);
2129 Py_DECREF(target);
2130 } else {
2131 Py_XDECREF(data);
2132 Py_XDECREF(target);
2133 }
2134 }
2135}
2136
2137#if defined(Py_USING_UNICODE)
2138static int
2139expat_unknown_encoding_handler(XMLParserObject *self, const XML_Char *name,
2140 XML_Encoding *info)
2141{
2142 PyObject* u;
2143 Py_UNICODE* p;
2144 unsigned char s[256];
2145 int i;
2146
2147 memset(info, 0, sizeof(XML_Encoding));
2148
2149 for (i = 0; i < 256; i++)
2150 s[i] = i;
2151
Fredrik Lundhc3389992005-12-25 11:40:19 +00002152 u = PyUnicode_Decode((char*) s, 256, name, "replace");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002153 if (!u)
2154 return XML_STATUS_ERROR;
2155
2156 if (PyUnicode_GET_SIZE(u) != 256) {
2157 Py_DECREF(u);
2158 return XML_STATUS_ERROR;
2159 }
2160
2161 p = PyUnicode_AS_UNICODE(u);
2162
2163 for (i = 0; i < 256; i++) {
2164 if (p[i] != Py_UNICODE_REPLACEMENT_CHARACTER)
2165 info->map[i] = p[i];
2166 else
2167 info->map[i] = -1;
2168 }
2169
2170 Py_DECREF(u);
2171
2172 return XML_STATUS_OK;
2173}
2174#endif
2175
2176/* -------------------------------------------------------------------- */
2177/* constructor and destructor */
2178
2179static PyObject*
2180xmlparser(PyObject* _self, PyObject* args, PyObject* kw)
2181{
2182 XMLParserObject* self;
2183 /* FIXME: does this need to be static? */
2184 static XML_Memory_Handling_Suite memory_handler;
2185
2186 PyObject* target = NULL;
2187 char* encoding = NULL;
2188 static PY_CONST char* kwlist[] = { "target", "encoding", NULL };
2189 if (!PyArg_ParseTupleAndKeywords(args, kw, "|Oz:XMLParser", kwlist,
2190 &target, &encoding))
2191 return NULL;
2192
2193#if defined(USE_PYEXPAT_CAPI)
2194 if (!expat_capi) {
2195 PyErr_SetString(
2196 PyExc_RuntimeError, "cannot load dispatch table from pyexpat"
2197 );
2198 return NULL;
2199 }
2200#endif
2201
2202 self = PyObject_New(XMLParserObject, &XMLParser_Type);
2203 if (self == NULL)
2204 return NULL;
2205
2206 self->entity = PyDict_New();
2207 if (!self->entity) {
2208 PyObject_Del(self);
2209 return NULL; /* FIXME: cleanup on error */
2210 }
2211
2212 self->names = PyDict_New();
2213 if (!self->names) {
2214 PyObject_Del(self);
2215 return NULL; /* FIXME: cleanup on error */
2216 }
2217
2218 memory_handler.malloc_fcn = PyObject_Malloc;
2219 memory_handler.realloc_fcn = PyObject_Realloc;
2220 memory_handler.free_fcn = PyObject_Free;
2221
2222 self->parser = EXPAT(ParserCreate_MM)(encoding, &memory_handler, "}");
2223 if (!self->parser) {
2224 PyErr_NoMemory();
2225 return NULL; /* FIXME: cleanup on error */
2226 }
2227
2228 /* setup target handlers */
2229 if (!target) {
2230 target = treebuilder_new();
2231 if (!target) {
2232 PyObject_Del(self);
2233 return NULL; /* FIXME: cleanup on error */
2234 }
2235 } else
2236 Py_INCREF(target);
2237 self->target = target;
2238
2239 self->handle_xml = PyObject_GetAttrString(target, "xml");
2240 self->handle_start = PyObject_GetAttrString(target, "start");
2241 self->handle_data = PyObject_GetAttrString(target, "data");
2242 self->handle_end = PyObject_GetAttrString(target, "end");
2243 self->handle_comment = PyObject_GetAttrString(target, "comment");
2244 self->handle_pi = PyObject_GetAttrString(target, "pi");
2245
2246 PyErr_Clear();
2247
2248 /* configure parser */
2249 EXPAT(SetUserData)(self->parser, self);
2250 EXPAT(SetElementHandler)(
2251 self->parser,
2252 (XML_StartElementHandler) expat_start_handler,
2253 (XML_EndElementHandler) expat_end_handler
2254 );
2255 EXPAT(SetDefaultHandlerExpand)(
2256 self->parser,
2257 (XML_DefaultHandler) expat_default_handler
2258 );
2259 EXPAT(SetCharacterDataHandler)(
2260 self->parser,
2261 (XML_CharacterDataHandler) expat_data_handler
2262 );
2263 if (self->handle_comment)
2264 EXPAT(SetCommentHandler)(
2265 self->parser,
2266 (XML_CommentHandler) expat_comment_handler
2267 );
2268 if (self->handle_pi)
2269 EXPAT(SetProcessingInstructionHandler)(
2270 self->parser,
2271 (XML_ProcessingInstructionHandler) expat_pi_handler
2272 );
2273#if defined(Py_USING_UNICODE)
2274 EXPAT(SetUnknownEncodingHandler)(
2275 self->parser,
2276 (XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL
2277 );
2278#endif
2279
2280 ALLOC(sizeof(XMLParserObject), "create expatparser");
2281
2282 return (PyObject*) self;
2283}
2284
2285static void
2286xmlparser_dealloc(XMLParserObject* self)
2287{
2288 EXPAT(ParserFree)(self->parser);
2289
2290 Py_XDECREF(self->handle_pi);
2291 Py_XDECREF(self->handle_comment);
2292 Py_XDECREF(self->handle_end);
2293 Py_XDECREF(self->handle_data);
2294 Py_XDECREF(self->handle_start);
2295 Py_XDECREF(self->handle_xml);
2296
2297 Py_DECREF(self->target);
2298 Py_DECREF(self->entity);
2299 Py_DECREF(self->names);
2300
2301 RELEASE(sizeof(XMLParserObject), "destroy expatparser");
2302
2303 PyObject_Del(self);
2304}
2305
2306/* -------------------------------------------------------------------- */
2307/* methods (in alphabetical order) */
2308
2309LOCAL(PyObject*)
2310expat_parse(XMLParserObject* self, char* data, int data_len, int final)
2311{
2312 int ok;
2313
2314 ok = EXPAT(Parse)(self->parser, data, data_len, final);
2315
2316 if (PyErr_Occurred())
2317 return NULL;
2318
2319 if (!ok) {
2320 PyErr_Format(
2321 PyExc_SyntaxError, "%s: line %d, column %d",
2322 EXPAT(ErrorString)(EXPAT(GetErrorCode)(self->parser)),
2323 EXPAT(GetErrorLineNumber)(self->parser),
2324 EXPAT(GetErrorColumnNumber)(self->parser)
2325 );
2326 return NULL;
2327 }
2328
2329 Py_RETURN_NONE;
2330}
2331
2332static PyObject*
2333xmlparser_close(XMLParserObject* self, PyObject* args)
2334{
2335 /* end feeding data to parser */
2336
2337 PyObject* res;
2338 if (!PyArg_ParseTuple(args, ":close"))
2339 return NULL;
2340
2341 res = expat_parse(self, "", 0, 1);
2342
2343 if (res && TreeBuilder_CheckExact(self->target)) {
2344 Py_DECREF(res);
2345 return treebuilder_done((TreeBuilderObject*) self->target);
2346 }
2347
2348 return res;
2349}
2350
2351static PyObject*
2352xmlparser_feed(XMLParserObject* self, PyObject* args)
2353{
2354 /* feed data to parser */
2355
2356 char* data;
2357 int data_len;
2358 if (!PyArg_ParseTuple(args, "s#:feed", &data, &data_len))
2359 return NULL;
2360
2361 return expat_parse(self, data, data_len, 0);
2362}
2363
2364static PyObject*
2365xmlparser_parse(XMLParserObject* self, PyObject* args)
2366{
2367 /* (internal) parse until end of input stream */
2368
2369 PyObject* reader;
2370 PyObject* buffer;
2371 PyObject* res;
2372
2373 PyObject* fileobj;
2374 if (!PyArg_ParseTuple(args, "O:_parse", &fileobj))
2375 return NULL;
2376
2377 reader = PyObject_GetAttrString(fileobj, "read");
2378 if (!reader)
2379 return NULL;
2380
2381 /* read from open file object */
2382 for (;;) {
2383
2384 buffer = PyObject_CallFunction(reader, "i", 64*1024);
2385
2386 if (!buffer) {
2387 /* read failed (e.g. due to KeyboardInterrupt) */
2388 Py_DECREF(reader);
2389 return NULL;
2390 }
2391
2392 if (!PyString_CheckExact(buffer) || PyString_GET_SIZE(buffer) == 0) {
2393 Py_DECREF(buffer);
2394 break;
2395 }
2396
2397 res = expat_parse(
2398 self, PyString_AS_STRING(buffer), PyString_GET_SIZE(buffer), 0
2399 );
2400
2401 Py_DECREF(buffer);
2402
2403 if (!res) {
2404 Py_DECREF(reader);
2405 return NULL;
2406 }
2407 Py_DECREF(res);
2408
2409 }
2410
2411 Py_DECREF(reader);
2412
2413 res = expat_parse(self, "", 0, 1);
2414
2415 if (res && TreeBuilder_CheckExact(self->target)) {
2416 Py_DECREF(res);
2417 return treebuilder_done((TreeBuilderObject*) self->target);
2418 }
2419
2420 return res;
2421}
2422
2423static PyObject*
2424xmlparser_setevents(XMLParserObject* self, PyObject* args)
2425{
2426 /* activate element event reporting */
2427
2428 int i;
2429 TreeBuilderObject* target;
2430
2431 PyObject* events; /* event collector */
2432 PyObject* event_set = Py_None;
2433 if (!PyArg_ParseTuple(args, "O!|O:_setevents", &PyList_Type, &events,
2434 &event_set))
2435 return NULL;
2436
2437 if (!TreeBuilder_CheckExact(self->target)) {
2438 PyErr_SetString(
2439 PyExc_TypeError,
2440 "event handling only supported for cElementTree.Treebuilder "
2441 "targets"
2442 );
2443 return NULL;
2444 }
2445
2446 target = (TreeBuilderObject*) self->target;
2447
2448 Py_INCREF(events);
2449 Py_XDECREF(target->events);
2450 target->events = events;
2451
2452 /* clear out existing events */
2453 Py_XDECREF(target->start_event_obj); target->start_event_obj = NULL;
2454 Py_XDECREF(target->end_event_obj); target->end_event_obj = NULL;
2455 Py_XDECREF(target->start_ns_event_obj); target->start_ns_event_obj = NULL;
2456 Py_XDECREF(target->end_ns_event_obj); target->end_ns_event_obj = NULL;
2457
2458 if (event_set == Py_None) {
2459 /* default is "end" only */
2460 target->end_event_obj = PyString_FromString("end");
2461 Py_RETURN_NONE;
2462 }
2463
2464 if (!PyTuple_Check(event_set)) /* FIXME: handle arbitrary sequences */
2465 goto error;
2466
2467 for (i = 0; i < PyTuple_GET_SIZE(event_set); i++) {
2468 PyObject* item = PyTuple_GET_ITEM(event_set, i);
2469 char* event;
2470 if (!PyString_Check(item))
2471 goto error;
2472 event = PyString_AS_STRING(item);
2473 if (strcmp(event, "start") == 0) {
2474 Py_INCREF(item);
2475 target->start_event_obj = item;
2476 } else if (strcmp(event, "end") == 0) {
2477 Py_INCREF(item);
2478 Py_XDECREF(target->end_event_obj);
2479 target->end_event_obj = item;
2480 } else if (strcmp(event, "start-ns") == 0) {
2481 Py_INCREF(item);
2482 Py_XDECREF(target->start_ns_event_obj);
2483 target->start_ns_event_obj = item;
2484 EXPAT(SetNamespaceDeclHandler)(
2485 self->parser,
2486 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2487 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2488 );
2489 } else if (strcmp(event, "end-ns") == 0) {
2490 Py_INCREF(item);
2491 Py_XDECREF(target->end_ns_event_obj);
2492 target->end_ns_event_obj = item;
2493 EXPAT(SetNamespaceDeclHandler)(
2494 self->parser,
2495 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2496 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2497 );
2498 } else {
2499 PyErr_Format(
2500 PyExc_ValueError,
2501 "unknown event '%s'", event
2502 );
2503 return NULL;
2504 }
2505 }
2506
2507 Py_RETURN_NONE;
2508
2509 error:
2510 PyErr_SetString(
2511 PyExc_TypeError,
2512 "invalid event tuple"
2513 );
2514 return NULL;
2515}
2516
2517static PyMethodDef xmlparser_methods[] = {
2518 {"feed", (PyCFunction) xmlparser_feed, METH_VARARGS},
2519 {"close", (PyCFunction) xmlparser_close, METH_VARARGS},
2520 {"_parse", (PyCFunction) xmlparser_parse, METH_VARARGS},
2521 {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
2522 {NULL, NULL}
2523};
2524
2525static PyObject*
2526xmlparser_getattr(XMLParserObject* self, char* name)
2527{
2528 PyObject* res;
2529
2530 res = Py_FindMethod(xmlparser_methods, (PyObject*) self, name);
2531 if (res)
2532 return res;
2533
2534 PyErr_Clear();
2535
2536 if (strcmp(name, "entity") == 0)
2537 res = self->entity;
2538 else if (strcmp(name, "target") == 0)
2539 res = self->target;
2540 else if (strcmp(name, "version") == 0) {
2541 char buffer[100];
2542 sprintf(buffer, "Expat %d.%d.%d", XML_MAJOR_VERSION,
2543 XML_MINOR_VERSION, XML_MICRO_VERSION);
2544 return PyString_FromString(buffer);
2545 } else {
2546 PyErr_SetString(PyExc_AttributeError, name);
2547 return NULL;
2548 }
2549
2550 Py_INCREF(res);
2551 return res;
2552}
2553
2554statichere PyTypeObject XMLParser_Type = {
2555 PyObject_HEAD_INIT(NULL)
2556 0, "XMLParser", sizeof(XMLParserObject), 0,
2557 /* methods */
2558 (destructor)xmlparser_dealloc, /* tp_dealloc */
2559 0, /* tp_print */
2560 (getattrfunc)xmlparser_getattr, /* tp_getattr */
2561};
2562
2563#endif
2564
2565/* ==================================================================== */
2566/* python module interface */
2567
2568static PyMethodDef _functions[] = {
2569 {"Element", (PyCFunction) element, METH_VARARGS|METH_KEYWORDS},
2570 {"SubElement", (PyCFunction) subelement, METH_VARARGS|METH_KEYWORDS},
2571 {"TreeBuilder", (PyCFunction) treebuilder, METH_VARARGS},
2572#if defined(USE_EXPAT)
2573 {"XMLParser", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
2574 {"XMLTreeBuilder", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
2575#endif
2576 {NULL, NULL}
2577};
2578
2579DL_EXPORT(void)
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002580init_elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002581{
2582 PyObject* m;
2583 PyObject* g;
2584 char* bootstrap;
2585#if defined(USE_PYEXPAT_CAPI)
2586 struct PyExpat_CAPI* capi;
2587#endif
2588
2589 /* Patch object type */
2590 Element_Type.ob_type = TreeBuilder_Type.ob_type = &PyType_Type;
2591#if defined(USE_EXPAT)
2592 XMLParser_Type.ob_type = &PyType_Type;
2593#endif
2594
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002595 m = Py_InitModule("_elementtree", _functions);
Neal Norwitz1ac754f2006-01-19 06:09:39 +00002596 if (m == NULL)
2597 return;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002598
2599 /* python glue code */
2600
2601 g = PyDict_New();
Neal Norwitz02876df2006-02-07 06:58:52 +00002602 if (g == NULL)
2603 return;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002604
2605 PyDict_SetItemString(g, "__builtins__", PyEval_GetBuiltins());
2606
2607 bootstrap = (
2608
2609#if (PY_VERSION_HEX >= 0x02020000 && PY_VERSION_HEX < 0x02030000)
2610 "from __future__ import generators\n" /* enable yield under 2.2 */
2611#endif
2612
2613 "from copy import copy, deepcopy\n"
2614
2615 "try:\n"
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002616 " from xml.etree import ElementTree\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002617 "except ImportError:\n"
2618 " import ElementTree\n"
2619 "ET = ElementTree\n"
2620 "del ElementTree\n"
2621
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002622 "import _elementtree as cElementTree\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002623
2624 "try:\n" /* check if copy works as is */
2625 " copy(cElementTree.Element('x'))\n"
2626 "except:\n"
2627 " def copyelement(elem):\n"
2628 " return elem\n"
2629
2630 "def Comment(text=None):\n" /* public */
2631 " element = cElementTree.Element(ET.Comment)\n"
2632 " element.text = text\n"
2633 " return element\n"
2634 "cElementTree.Comment = Comment\n"
2635
2636 "class ElementTree(ET.ElementTree):\n" /* public */
2637 " def parse(self, source, parser=None):\n"
2638 " if not hasattr(source, 'read'):\n"
2639 " source = open(source, 'rb')\n"
2640 " if parser is not None:\n"
2641 " while 1:\n"
2642 " data = source.read(65536)\n"
2643 " if not data:\n"
2644 " break\n"
2645 " parser.feed(data)\n"
2646 " self._root = parser.close()\n"
2647 " else:\n"
2648 " parser = cElementTree.XMLParser()\n"
2649 " self._root = parser._parse(source)\n"
2650 " return self._root\n"
2651 "cElementTree.ElementTree = ElementTree\n"
2652
2653 "def getiterator(node, tag=None):\n" /* helper */
2654 " if tag == '*':\n"
2655 " tag = None\n"
2656#if (PY_VERSION_HEX < 0x02020000)
2657 " nodes = []\n" /* 2.1 doesn't have yield */
2658 " if tag is None or node.tag == tag:\n"
2659 " nodes.append(node)\n"
2660 " for node in node:\n"
2661 " nodes.extend(getiterator(node, tag))\n"
2662 " return nodes\n"
2663#else
2664 " if tag is None or node.tag == tag:\n"
2665 " yield node\n"
2666 " for node in node:\n"
2667 " for node in getiterator(node, tag):\n"
2668 " yield node\n"
2669#endif
2670
2671 "def parse(source, parser=None):\n" /* public */
2672 " tree = ElementTree()\n"
2673 " tree.parse(source, parser)\n"
2674 " return tree\n"
2675 "cElementTree.parse = parse\n"
2676
2677#if (PY_VERSION_HEX < 0x02020000)
2678 "if hasattr(ET, 'iterparse'):\n"
2679 " cElementTree.iterparse = ET.iterparse\n" /* delegate on 2.1 */
2680#else
2681 "class iterparse(object):\n"
2682 " root = None\n"
2683 " def __init__(self, file, events=None):\n"
2684 " if not hasattr(file, 'read'):\n"
2685 " file = open(file, 'rb')\n"
2686 " self._file = file\n"
2687 " self._events = events\n"
2688 " def __iter__(self):\n"
2689 " events = []\n"
2690 " b = cElementTree.TreeBuilder()\n"
2691 " p = cElementTree.XMLParser(b)\n"
2692 " p._setevents(events, self._events)\n"
2693 " while 1:\n"
2694 " data = self._file.read(16384)\n"
2695 " if not data:\n"
2696 " break\n"
2697 " p.feed(data)\n"
2698 " for event in events:\n"
2699 " yield event\n"
2700 " del events[:]\n"
2701 " root = p.close()\n"
2702 " for event in events:\n"
2703 " yield event\n"
2704 " self.root = root\n"
2705 "cElementTree.iterparse = iterparse\n"
2706#endif
2707
2708 "def PI(target, text=None):\n" /* public */
2709 " element = cElementTree.Element(ET.ProcessingInstruction)\n"
2710 " element.text = target\n"
2711 " if text:\n"
2712 " element.text = element.text + ' ' + text\n"
2713 " return element\n"
2714
2715 " elem = cElementTree.Element(ET.PI)\n"
2716 " elem.text = text\n"
2717 " return elem\n"
2718 "cElementTree.PI = cElementTree.ProcessingInstruction = PI\n"
2719
2720 "def XML(text):\n" /* public */
2721 " parser = cElementTree.XMLParser()\n"
2722 " parser.feed(text)\n"
2723 " return parser.close()\n"
2724 "cElementTree.XML = cElementTree.fromstring = XML\n"
2725
2726 "def XMLID(text):\n" /* public */
2727 " tree = XML(text)\n"
2728 " ids = {}\n"
2729 " for elem in tree.getiterator():\n"
2730 " id = elem.get('id')\n"
2731 " if id:\n"
2732 " ids[id] = elem\n"
2733 " return tree, ids\n"
2734 "cElementTree.XMLID = XMLID\n"
2735
2736 "cElementTree.dump = ET.dump\n"
2737 "cElementTree.ElementPath = ElementPath = ET.ElementPath\n"
2738 "cElementTree.iselement = ET.iselement\n"
2739 "cElementTree.QName = ET.QName\n"
2740 "cElementTree.tostring = ET.tostring\n"
2741 "cElementTree.VERSION = '" VERSION "'\n"
2742 "cElementTree.__version__ = '" VERSION "'\n"
2743 "cElementTree.XMLParserError = SyntaxError\n"
2744
2745 );
2746
2747 PyRun_String(bootstrap, Py_file_input, g, NULL);
2748
2749 elementpath_obj = PyDict_GetItemString(g, "ElementPath");
2750
2751 elementtree_copyelement_obj = PyDict_GetItemString(g, "copyelement");
2752 if (elementtree_copyelement_obj) {
2753 /* reduce hack needed; enable reduce method */
2754 PyMethodDef* mp;
2755 for (mp = element_methods; mp->ml_name; mp++)
2756 if (mp->ml_meth == (PyCFunction) element_reduce) {
2757 mp->ml_name = "__reduce__";
2758 break;
2759 }
2760 } else
2761 PyErr_Clear();
2762 elementtree_deepcopy_obj = PyDict_GetItemString(g, "deepcopy");
2763 elementtree_getiterator_obj = PyDict_GetItemString(g, "getiterator");
2764
2765#if defined(USE_PYEXPAT_CAPI)
2766 /* link against pyexpat, if possible */
2767 capi = PyCObject_Import("pyexpat", "expat_CAPI");
2768 if (capi &&
2769 strcmp(capi->magic, PyExpat_CAPI_MAGIC) == 0 &&
2770 capi->size <= sizeof(*expat_capi) &&
2771 capi->MAJOR_VERSION == XML_MAJOR_VERSION &&
2772 capi->MINOR_VERSION == XML_MINOR_VERSION &&
2773 capi->MICRO_VERSION == XML_MICRO_VERSION)
2774 expat_capi = capi;
2775 else
2776 expat_capi = NULL;
2777#endif
2778
2779}