blob: ab3ef23d9ab0fca8de29af2bdd472a85abbf93cb [file] [log] [blame]
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001/*
2 * ElementTree
3 * $Id: /work/modules/celementtree/cElementTree.c 1128 2005-12-16T21:57:13.668520Z Fredrik $
4 *
5 * elementtree accelerator
6 *
7 * History:
8 * 1999-06-20 fl created (as part of sgmlop)
9 * 2001-05-29 fl effdom edition
10 * 2001-06-05 fl backported to unix; fixed bogus free in clear
11 * 2001-07-10 fl added findall helper
12 * 2003-02-27 fl elementtree edition (alpha)
13 * 2004-06-03 fl updates for elementtree 1.2
14 * 2005-01-05 fl added universal name cache, Element/SubElement factories
15 * 2005-01-06 fl moved python helpers into C module; removed 1.5.2 support
16 * 2005-01-07 fl added 2.1 support; work around broken __copy__ in 2.3
17 * 2005-01-08 fl added makeelement method; fixed path support
18 * 2005-01-10 fl optimized memory usage
19 * 2005-01-11 fl first public release (cElementTree 0.8)
20 * 2005-01-12 fl split element object into base and extras
21 * 2005-01-13 fl use tagged pointers for tail/text (cElementTree 0.9)
22 * 2005-01-17 fl added treebuilder close method
23 * 2005-01-17 fl fixed crash in getchildren
24 * 2005-01-18 fl removed observer api, added iterparse (cElementTree 0.9.3)
25 * 2005-01-23 fl revised iterparse api; added namespace event support (0.9.8)
26 * 2005-01-26 fl added VERSION module property (cElementTree 1.0)
27 * 2005-01-28 fl added remove method (1.0.1)
28 * 2005-03-01 fl added iselement function; fixed makeelement aliasing (1.0.2)
29 * 2005-03-13 fl export Comment and ProcessingInstruction/PI helpers
30 * 2005-03-26 fl added Comment and PI support to XMLParser
31 * 2005-03-27 fl event optimizations; complain about bogus events
32 * 2005-08-08 fl fixed read error handling in parse
33 * 2005-08-11 fl added runtime test for copy workaround (1.0.3)
34 * 2005-12-13 fl added expat_capi support (for xml.etree) (1.0.4)
35 * 2005-12-16 fl added support for non-standard encodings
36 *
37 * Copyright (c) 1999-2005 by Secret Labs AB. All rights reserved.
38 * Copyright (c) 1999-2005 by Fredrik Lundh.
39 *
40 * info@pythonware.com
41 * http://www.pythonware.com
42 */
43
Fredrik Lundh6d52b552005-12-16 22:06:43 +000044/* Licensed to PSF under a Contributor Agreement. */
45/* See http://www.python.org/2.4/license for licensing details. */
46
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000047#include "Python.h"
48
49#define VERSION "1.0.5"
50
51/* -------------------------------------------------------------------- */
52/* configuration */
53
54/* Leave defined to include the expat-based XMLParser type */
55#define USE_EXPAT
56
57/* Define to to all expat calls via pyexpat's embedded expat library */
58/* #define USE_PYEXPAT_CAPI */
59
60/* An element can hold this many children without extra memory
61 allocations. */
62#define STATIC_CHILDREN 4
63
64/* For best performance, chose a value so that 80-90% of all nodes
65 have no more than the given number of children. Set this to zero
66 to minimize the size of the element structure itself (this only
67 helps if you have lots of leaf nodes with attributes). */
68
69/* Also note that pymalloc always allocates blocks in multiples of
70 eight bytes. For the current version of cElementTree, this means
71 that the number of children should be an even number, at least on
72 32-bit platforms. */
73
74/* -------------------------------------------------------------------- */
75
76#if 0
77static int memory = 0;
78#define ALLOC(size, comment)\
79do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
80#define RELEASE(size, comment)\
81do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
82#else
83#define ALLOC(size, comment)
84#define RELEASE(size, comment)
85#endif
86
87/* compiler tweaks */
88#if defined(_MSC_VER)
89#define LOCAL(type) static __inline type __fastcall
90#else
91#define LOCAL(type) static type
92#endif
93
94/* compatibility macros */
Martin v. Löwis18e16552006-02-15 17:27:45 +000095#if (PY_VERSION_HEX < 0x02050000)
96typedef int Py_ssize_t;
97#endif
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000098#if (PY_VERSION_HEX < 0x02040000)
99#define PyDict_CheckExact PyDict_Check
100#if (PY_VERSION_HEX < 0x02020000)
101#define PyList_CheckExact PyList_Check
102#define PyString_CheckExact PyString_Check
103#if (PY_VERSION_HEX >= 0x01060000)
104#define Py_USING_UNICODE /* always enabled for 2.0 and 2.1 */
105#endif
106#endif
107#endif
108
109#if (PY_VERSION_HEX >= 0x02050000)
110#define PY_CONST const /* 2.5 adds const to some API:s */
111#else
112#define PY_CONST
113#endif
114
115#if !defined(Py_RETURN_NONE)
116#define Py_RETURN_NONE return Py_INCREF(Py_None), Py_None
117#endif
118
119/* macros used to store 'join' flags in string object pointers. note
120 that all use of text and tail as object pointers must be wrapped in
121 JOIN_OBJ. see comments in the ElementObject definition for more
122 info. */
123#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
124#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
125#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~1))
126
127/* glue functions (see the init function for details) */
128static PyObject* elementtree_copyelement_obj;
129static PyObject* elementtree_deepcopy_obj;
130static PyObject* elementtree_getiterator_obj;
131static PyObject* elementpath_obj;
132
133/* helpers */
134
135LOCAL(PyObject*)
136deepcopy(PyObject* object, PyObject* memo)
137{
138 /* do a deep copy of the given object */
139
140 PyObject* args;
141 PyObject* result;
142
143 if (!elementtree_deepcopy_obj) {
144 PyErr_SetString(
145 PyExc_RuntimeError,
146 "deepcopy helper not found"
147 );
148 return NULL;
149 }
150
151 args = PyTuple_New(2);
152 Py_INCREF(object); PyTuple_SET_ITEM(args, 0, (PyObject*) object);
153 Py_INCREF(memo); PyTuple_SET_ITEM(args, 1, (PyObject*) memo);
154
155 result = PyObject_CallObject(elementtree_deepcopy_obj, args);
156
157 Py_DECREF(args);
158
159 return result;
160}
161
162LOCAL(PyObject*)
163list_join(PyObject* list)
164{
165 /* join list elements (destroying the list in the process) */
166
167 PyObject* joiner;
168 PyObject* function;
169 PyObject* args;
170 PyObject* result;
171
172 switch (PyList_GET_SIZE(list)) {
173 case 0:
174 Py_DECREF(list);
175 return PyString_FromString("");
176 case 1:
177 result = PyList_GET_ITEM(list, 0);
178 Py_INCREF(result);
179 Py_DECREF(list);
180 return result;
181 }
182
183 /* two or more elements: slice out a suitable separator from the
184 first member, and use that to join the entire list */
185
186 joiner = PySequence_GetSlice(PyList_GET_ITEM(list, 0), 0, 0);
187 if (!joiner)
188 return NULL;
189
190 function = PyObject_GetAttrString(joiner, "join");
191 if (!function) {
192 Py_DECREF(joiner);
193 return NULL;
194 }
195
196 args = PyTuple_New(1);
197 PyTuple_SET_ITEM(args, 0, list);
198
199 result = PyObject_CallObject(function, args);
200
201 Py_DECREF(args); /* also removes list */
202 Py_DECREF(function);
203 Py_DECREF(joiner);
204
205 return result;
206}
207
208#if (PY_VERSION_HEX < 0x02020000)
209LOCAL(int)
210PyDict_Update(PyObject* dict, PyObject* other)
211{
212 /* PyDict_Update emulation for 2.1 and earlier */
213
214 PyObject* res;
215
216 res = PyObject_CallMethod(dict, "update", "O", other);
217 if (!res)
218 return -1;
219
220 Py_DECREF(res);
221 return 0;
222}
223#endif
224
225/* -------------------------------------------------------------------- */
226/* the element type */
227
228typedef struct {
229
230 /* attributes (a dictionary object), or None if no attributes */
231 PyObject* attrib;
232
233 /* child elements */
234 int length; /* actual number of items */
235 int allocated; /* allocated items */
236
237 /* this either points to _children or to a malloced buffer */
238 PyObject* *children;
239
240 PyObject* _children[STATIC_CHILDREN];
241
242} ElementObjectExtra;
243
244typedef struct {
245 PyObject_HEAD
246
247 /* element tag (a string). */
248 PyObject* tag;
249
250 /* text before first child. note that this is a tagged pointer;
251 use JOIN_OBJ to get the object pointer. the join flag is used
252 to distinguish lists created by the tree builder from lists
253 assigned to the attribute by application code; the former
254 should be joined before being returned to the user, the latter
255 should be left intact. */
256 PyObject* text;
257
258 /* text after this element, in parent. note that this is a tagged
259 pointer; use JOIN_OBJ to get the object pointer. */
260 PyObject* tail;
261
262 ElementObjectExtra* extra;
263
264} ElementObject;
265
266staticforward PyTypeObject Element_Type;
267
268#define Element_CheckExact(op) ((op)->ob_type == &Element_Type)
269
270/* -------------------------------------------------------------------- */
271/* element constructor and destructor */
272
273LOCAL(int)
274element_new_extra(ElementObject* self, PyObject* attrib)
275{
276 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
277 if (!self->extra)
278 return -1;
279
280 if (!attrib)
281 attrib = Py_None;
282
283 Py_INCREF(attrib);
284 self->extra->attrib = attrib;
285
286 self->extra->length = 0;
287 self->extra->allocated = STATIC_CHILDREN;
288 self->extra->children = self->extra->_children;
289
290 return 0;
291}
292
293LOCAL(void)
294element_dealloc_extra(ElementObject* self)
295{
296 int i;
297
298 Py_DECREF(self->extra->attrib);
299
300 for (i = 0; i < self->extra->length; i++)
301 Py_DECREF(self->extra->children[i]);
302
303 if (self->extra->children != self->extra->_children)
304 PyObject_Free(self->extra->children);
305
306 PyObject_Free(self->extra);
307}
308
309LOCAL(PyObject*)
310element_new(PyObject* tag, PyObject* attrib)
311{
312 ElementObject* self;
313
314 self = PyObject_New(ElementObject, &Element_Type);
315 if (self == NULL)
316 return NULL;
317
318 /* use None for empty dictionaries */
319 if (PyDict_CheckExact(attrib) && !PyDict_Size(attrib))
320 attrib = Py_None;
321
322 self->extra = NULL;
323
324 if (attrib != Py_None) {
325
326 if (element_new_extra(self, attrib) < 0)
327 return NULL;
328
329 self->extra->length = 0;
330 self->extra->allocated = STATIC_CHILDREN;
331 self->extra->children = self->extra->_children;
332
333 }
334
335 Py_INCREF(tag);
336 self->tag = tag;
337
338 Py_INCREF(Py_None);
339 self->text = Py_None;
340
341 Py_INCREF(Py_None);
342 self->tail = Py_None;
343
344 ALLOC(sizeof(ElementObject), "create element");
345
346 return (PyObject*) self;
347}
348
349LOCAL(int)
350element_resize(ElementObject* self, int extra)
351{
352 int size;
353 PyObject* *children;
354
355 /* make sure self->children can hold the given number of extra
356 elements. set an exception and return -1 if allocation failed */
357
358 if (!self->extra)
359 element_new_extra(self, NULL);
360
361 size = self->extra->length + extra;
362
363 if (size > self->extra->allocated) {
364 /* use Python 2.4's list growth strategy */
365 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
366 if (self->extra->children != self->extra->_children) {
367 children = PyObject_Realloc(self->extra->children,
368 size * sizeof(PyObject*));
369 if (!children)
370 goto nomemory;
371 } else {
372 children = PyObject_Malloc(size * sizeof(PyObject*));
373 if (!children)
374 goto nomemory;
375 /* copy existing children from static area to malloc buffer */
376 memcpy(children, self->extra->children,
377 self->extra->length * sizeof(PyObject*));
378 }
379 self->extra->children = children;
380 self->extra->allocated = size;
381 }
382
383 return 0;
384
385 nomemory:
386 PyErr_NoMemory();
387 return -1;
388}
389
390LOCAL(int)
391element_add_subelement(ElementObject* self, PyObject* element)
392{
393 /* add a child element to a parent */
394
395 if (element_resize(self, 1) < 0)
396 return -1;
397
398 Py_INCREF(element);
399 self->extra->children[self->extra->length] = element;
400
401 self->extra->length++;
402
403 return 0;
404}
405
406LOCAL(PyObject*)
407element_get_attrib(ElementObject* self)
408{
409 /* return borrowed reference to attrib dictionary */
410 /* note: this function assumes that the extra section exists */
411
412 PyObject* res = self->extra->attrib;
413
414 if (res == Py_None) {
415 /* create missing dictionary */
416 res = PyDict_New();
417 if (!res)
418 return NULL;
419 self->extra->attrib = res;
420 }
421
422 return res;
423}
424
425LOCAL(PyObject*)
426element_get_text(ElementObject* self)
427{
428 /* return borrowed reference to text attribute */
429
430 PyObject* res = self->text;
431
432 if (JOIN_GET(res)) {
433 res = JOIN_OBJ(res);
434 if (PyList_CheckExact(res)) {
435 res = list_join(res);
436 if (!res)
437 return NULL;
438 self->text = res;
439 }
440 }
441
442 return res;
443}
444
445LOCAL(PyObject*)
446element_get_tail(ElementObject* self)
447{
448 /* return borrowed reference to text attribute */
449
450 PyObject* res = self->tail;
451
452 if (JOIN_GET(res)) {
453 res = JOIN_OBJ(res);
454 if (PyList_CheckExact(res)) {
455 res = list_join(res);
456 if (!res)
457 return NULL;
458 self->tail = res;
459 }
460 }
461
462 return res;
463}
464
465static PyObject*
466element(PyObject* self, PyObject* args, PyObject* kw)
467{
468 PyObject* elem;
469
470 PyObject* tag;
471 PyObject* attrib = NULL;
472 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag,
473 &PyDict_Type, &attrib))
474 return NULL;
475
476 if (attrib || kw) {
477 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
478 if (!attrib)
479 return NULL;
480 if (kw)
481 PyDict_Update(attrib, kw);
482 } else {
483 Py_INCREF(Py_None);
484 attrib = Py_None;
485 }
486
487 elem = element_new(tag, attrib);
488
489 Py_DECREF(attrib);
490
491 return elem;
492}
493
494static PyObject*
495subelement(PyObject* self, PyObject* args, PyObject* kw)
496{
497 PyObject* elem;
498
499 ElementObject* parent;
500 PyObject* tag;
501 PyObject* attrib = NULL;
502 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
503 &Element_Type, &parent, &tag,
504 &PyDict_Type, &attrib))
505 return NULL;
506
507 if (attrib || kw) {
508 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
509 if (!attrib)
510 return NULL;
511 if (kw)
512 PyDict_Update(attrib, kw);
513 } else {
514 Py_INCREF(Py_None);
515 attrib = Py_None;
516 }
517
518 elem = element_new(tag, attrib);
519
520 Py_DECREF(attrib);
521
522 if (element_add_subelement(parent, elem) < 0)
523 return NULL;
524
525 return elem;
526}
527
528static void
529element_dealloc(ElementObject* self)
530{
531 if (self->extra)
532 element_dealloc_extra(self);
533
534 /* discard attributes */
535 Py_DECREF(self->tag);
536 Py_DECREF(JOIN_OBJ(self->text));
537 Py_DECREF(JOIN_OBJ(self->tail));
538
539 RELEASE(sizeof(ElementObject), "destroy element");
540
541 PyObject_Del(self);
542}
543
544/* -------------------------------------------------------------------- */
545/* methods (in alphabetical order) */
546
547static PyObject*
548element_append(ElementObject* self, PyObject* args)
549{
550 PyObject* element;
551 if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element))
552 return NULL;
553
554 if (element_add_subelement(self, element) < 0)
555 return NULL;
556
557 Py_RETURN_NONE;
558}
559
560static PyObject*
561element_clear(ElementObject* self, PyObject* args)
562{
563 if (!PyArg_ParseTuple(args, ":clear"))
564 return NULL;
565
566 if (self->extra) {
567 element_dealloc_extra(self);
568 self->extra = NULL;
569 }
570
571 Py_INCREF(Py_None);
572 Py_DECREF(JOIN_OBJ(self->text));
573 self->text = Py_None;
574
575 Py_INCREF(Py_None);
576 Py_DECREF(JOIN_OBJ(self->tail));
577 self->tail = Py_None;
578
579 Py_RETURN_NONE;
580}
581
582static PyObject*
583element_copy(ElementObject* self, PyObject* args)
584{
585 int i;
586 ElementObject* element;
587
588 if (!PyArg_ParseTuple(args, ":__copy__"))
589 return NULL;
590
591 element = (ElementObject*) element_new(
592 self->tag, (self->extra) ? self->extra->attrib : Py_None
593 );
594 if (!element)
595 return NULL;
596
597 Py_DECREF(JOIN_OBJ(element->text));
598 element->text = self->text;
599 Py_INCREF(JOIN_OBJ(element->text));
600
601 Py_DECREF(JOIN_OBJ(element->tail));
602 element->tail = self->tail;
603 Py_INCREF(JOIN_OBJ(element->tail));
604
605 if (self->extra) {
606
607 if (element_resize(element, self->extra->length) < 0)
608 return NULL;
609
610 for (i = 0; i < self->extra->length; i++) {
611 Py_INCREF(self->extra->children[i]);
612 element->extra->children[i] = self->extra->children[i];
613 }
614
615 element->extra->length = self->extra->length;
616
617 }
618
619 return (PyObject*) element;
620}
621
622static PyObject*
623element_deepcopy(ElementObject* self, PyObject* args)
624{
625 int i;
626 ElementObject* element;
627 PyObject* tag;
628 PyObject* attrib;
629 PyObject* text;
630 PyObject* tail;
631 PyObject* id;
632
633 PyObject* memo;
634 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
635 return NULL;
636
637 tag = deepcopy(self->tag, memo);
638 if (!tag)
639 return NULL;
640
641 if (self->extra) {
642 attrib = deepcopy(self->extra->attrib, memo);
643 if (!attrib) {
644 Py_DECREF(tag);
645 return NULL;
646 }
647 } else {
648 Py_INCREF(Py_None);
649 attrib = Py_None;
650 }
651
652 element = (ElementObject*) element_new(tag, attrib);
653
654 Py_DECREF(tag);
655 Py_DECREF(attrib);
656
657 if (!element)
658 return NULL;
659
660 text = deepcopy(JOIN_OBJ(self->text), memo);
661 if (!text)
662 goto error;
663 Py_DECREF(element->text);
664 element->text = JOIN_SET(text, JOIN_GET(self->text));
665
666 tail = deepcopy(JOIN_OBJ(self->tail), memo);
667 if (!tail)
668 goto error;
669 Py_DECREF(element->tail);
670 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
671
672 if (self->extra) {
673
674 if (element_resize(element, self->extra->length) < 0)
675 goto error;
676
677 for (i = 0; i < self->extra->length; i++) {
678 PyObject* child = deepcopy(self->extra->children[i], memo);
679 if (!child) {
680 element->extra->length = i;
681 goto error;
682 }
683 element->extra->children[i] = child;
684 }
685
686 element->extra->length = self->extra->length;
687
688 }
689
690 /* add object to memo dictionary (so deepcopy won't visit it again) */
691 id = PyInt_FromLong((Py_uintptr_t) self);
692
693 i = PyDict_SetItem(memo, id, (PyObject*) element);
694
695 Py_DECREF(id);
696
697 if (i < 0)
698 goto error;
699
700 return (PyObject*) element;
701
702 error:
703 Py_DECREF(element);
704 return NULL;
705}
706
707LOCAL(int)
708checkpath(PyObject* tag)
709{
710 int i, check = 1;
711
712 /* check if a tag contains an xpath character */
713
714#define PATHCHAR(ch) (ch == '/' || ch == '*' || ch == '[' || ch == '@')
715
716#if defined(Py_USING_UNICODE)
717 if (PyUnicode_Check(tag)) {
718 Py_UNICODE *p = PyUnicode_AS_UNICODE(tag);
719 for (i = 0; i < PyUnicode_GET_SIZE(tag); i++) {
720 if (p[i] == '{')
721 check = 0;
722 else if (p[i] == '}')
723 check = 1;
724 else if (check && PATHCHAR(p[i]))
725 return 1;
726 }
727 return 0;
728 }
729#endif
730 if (PyString_Check(tag)) {
731 char *p = PyString_AS_STRING(tag);
732 for (i = 0; i < PyString_GET_SIZE(tag); i++) {
733 if (p[i] == '{')
734 check = 0;
735 else if (p[i] == '}')
736 check = 1;
737 else if (check && PATHCHAR(p[i]))
738 return 1;
739 }
740 return 0;
741 }
742
743 return 1; /* unknown type; might be path expression */
744}
745
746static PyObject*
747element_find(ElementObject* self, PyObject* args)
748{
749 int i;
750
751 PyObject* tag;
752 if (!PyArg_ParseTuple(args, "O:find", &tag))
753 return NULL;
754
755 if (checkpath(tag))
756 return PyObject_CallMethod(
757 elementpath_obj, "find", "OO", self, tag
758 );
759
760 if (!self->extra)
761 Py_RETURN_NONE;
762
763 for (i = 0; i < self->extra->length; i++) {
764 PyObject* item = self->extra->children[i];
765 if (Element_CheckExact(item) &&
766 PyObject_Compare(((ElementObject*)item)->tag, tag) == 0) {
767 Py_INCREF(item);
768 return item;
769 }
770 }
771
772 Py_RETURN_NONE;
773}
774
775static PyObject*
776element_findtext(ElementObject* self, PyObject* args)
777{
778 int i;
779
780 PyObject* tag;
781 PyObject* default_value = Py_None;
782 if (!PyArg_ParseTuple(args, "O|O:findtext", &tag, &default_value))
783 return NULL;
784
785 if (checkpath(tag))
786 return PyObject_CallMethod(
787 elementpath_obj, "findtext", "OOO", self, tag, default_value
788 );
789
790 if (!self->extra) {
791 Py_INCREF(default_value);
792 return default_value;
793 }
794
795 for (i = 0; i < self->extra->length; i++) {
796 ElementObject* item = (ElementObject*) self->extra->children[i];
797 if (Element_CheckExact(item) && !PyObject_Compare(item->tag, tag)) {
798 PyObject* text = element_get_text(item);
799 if (text == Py_None)
800 return PyString_FromString("");
801 Py_INCREF(text);
802 return text;
803 }
804 }
805
806 Py_INCREF(default_value);
807 return default_value;
808}
809
810static PyObject*
811element_findall(ElementObject* self, PyObject* args)
812{
813 int i;
814 PyObject* out;
815
816 PyObject* tag;
817 if (!PyArg_ParseTuple(args, "O:findall", &tag))
818 return NULL;
819
820 if (checkpath(tag))
821 return PyObject_CallMethod(
822 elementpath_obj, "findall", "OO", self, tag
823 );
824
825 out = PyList_New(0);
826 if (!out)
827 return NULL;
828
829 if (!self->extra)
830 return out;
831
832 for (i = 0; i < self->extra->length; i++) {
833 PyObject* item = self->extra->children[i];
834 if (Element_CheckExact(item) &&
835 PyObject_Compare(((ElementObject*)item)->tag, tag) == 0) {
836 if (PyList_Append(out, item) < 0) {
837 Py_DECREF(out);
838 return NULL;
839 }
840 }
841 }
842
843 return out;
844}
845
846static PyObject*
847element_get(ElementObject* self, PyObject* args)
848{
849 PyObject* value;
850
851 PyObject* key;
852 PyObject* default_value = Py_None;
853 if (!PyArg_ParseTuple(args, "O|O:get", &key, &default_value))
854 return NULL;
855
856 if (!self->extra || self->extra->attrib == Py_None)
857 value = default_value;
858 else {
859 value = PyDict_GetItem(self->extra->attrib, key);
860 if (!value)
861 value = default_value;
862 }
863
864 Py_INCREF(value);
865 return value;
866}
867
868static PyObject*
869element_getchildren(ElementObject* self, PyObject* args)
870{
871 int i;
872 PyObject* list;
873
874 if (!PyArg_ParseTuple(args, ":getchildren"))
875 return NULL;
876
877 if (!self->extra)
878 return PyList_New(0);
879
880 list = PyList_New(self->extra->length);
881 if (!list)
882 return NULL;
883
884 for (i = 0; i < self->extra->length; i++) {
885 PyObject* item = self->extra->children[i];
886 Py_INCREF(item);
887 PyList_SET_ITEM(list, i, item);
888 }
889
890 return list;
891}
892
893static PyObject*
894element_getiterator(ElementObject* self, PyObject* args)
895{
896 PyObject* result;
897
898 PyObject* tag = Py_None;
899 if (!PyArg_ParseTuple(args, "|O:getiterator", &tag))
900 return NULL;
901
902 if (!elementtree_getiterator_obj) {
903 PyErr_SetString(
904 PyExc_RuntimeError,
905 "getiterator helper not found"
906 );
907 return NULL;
908 }
909
910 args = PyTuple_New(2);
Neal Norwitz02876df2006-02-07 06:58:52 +0000911 if (args == NULL)
912 return NULL;
913
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000914 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
915 Py_INCREF(tag); PyTuple_SET_ITEM(args, 1, (PyObject*) tag);
916
917 result = PyObject_CallObject(elementtree_getiterator_obj, args);
918
919 Py_DECREF(args);
920
921 return result;
922}
923
924static PyObject*
Martin v. Löwis18e16552006-02-15 17:27:45 +0000925element_getitem(PyObject* _self, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000926{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000927 ElementObject* self = (ElementObject*)_self;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000928 if (!self->extra || index < 0 || index >= self->extra->length) {
929 PyErr_SetString(
930 PyExc_IndexError,
931 "child index out of range"
932 );
933 return NULL;
934 }
935
936 Py_INCREF(self->extra->children[index]);
937 return self->extra->children[index];
938}
939
940static PyObject*
Martin v. Löwis18e16552006-02-15 17:27:45 +0000941element_getslice(PyObject* _self, Py_ssize_t start, Py_ssize_t end)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000942{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000943 ElementObject* self = (ElementObject*)_self;
944 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000945 PyObject* list;
946
947 if (!self->extra)
948 return PyList_New(0);
949
950 /* standard clamping */
951 if (start < 0)
952 start = 0;
953 if (end < 0)
954 end = 0;
955 if (end > self->extra->length)
956 end = self->extra->length;
957 if (start > end)
958 start = end;
959
960 list = PyList_New(end - start);
961 if (!list)
962 return NULL;
963
964 for (i = start; i < end; i++) {
965 PyObject* item = self->extra->children[i];
966 Py_INCREF(item);
967 PyList_SET_ITEM(list, i - start, item);
968 }
969
970 return list;
971}
972
973static PyObject*
974element_insert(ElementObject* self, PyObject* args)
975{
976 int i;
977
978 int index;
979 PyObject* element;
980 if (!PyArg_ParseTuple(args, "iO!:insert", &index,
981 &Element_Type, &element))
982 return NULL;
983
984 if (!self->extra)
985 element_new_extra(self, NULL);
986
987 if (index < 0)
988 index = 0;
989 if (index > self->extra->length)
990 index = self->extra->length;
991
992 if (element_resize(self, 1) < 0)
993 return NULL;
994
995 for (i = self->extra->length; i > index; i--)
996 self->extra->children[i] = self->extra->children[i-1];
997
998 Py_INCREF(element);
999 self->extra->children[index] = element;
1000
1001 self->extra->length++;
1002
1003 Py_RETURN_NONE;
1004}
1005
1006static PyObject*
1007element_items(ElementObject* self, PyObject* args)
1008{
1009 if (!PyArg_ParseTuple(args, ":items"))
1010 return NULL;
1011
1012 if (!self->extra || self->extra->attrib == Py_None)
1013 return PyList_New(0);
1014
1015 return PyDict_Items(self->extra->attrib);
1016}
1017
1018static PyObject*
1019element_keys(ElementObject* self, PyObject* args)
1020{
1021 if (!PyArg_ParseTuple(args, ":keys"))
1022 return NULL;
1023
1024 if (!self->extra || self->extra->attrib == Py_None)
1025 return PyList_New(0);
1026
1027 return PyDict_Keys(self->extra->attrib);
1028}
1029
Martin v. Löwis18e16552006-02-15 17:27:45 +00001030static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001031element_length(ElementObject* self)
1032{
1033 if (!self->extra)
1034 return 0;
1035
1036 return self->extra->length;
1037}
1038
1039static PyObject*
1040element_makeelement(PyObject* self, PyObject* args, PyObject* kw)
1041{
1042 PyObject* elem;
1043
1044 PyObject* tag;
1045 PyObject* attrib;
1046 if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib))
1047 return NULL;
1048
1049 attrib = PyDict_Copy(attrib);
1050 if (!attrib)
1051 return NULL;
1052
1053 elem = element_new(tag, attrib);
1054
1055 Py_DECREF(attrib);
1056
1057 return elem;
1058}
1059
1060static PyObject*
1061element_reduce(ElementObject* self, PyObject* args)
1062{
1063 if (!PyArg_ParseTuple(args, ":__reduce__"))
1064 return NULL;
1065
1066 /* Hack alert: This method is used to work around a __copy__
1067 problem on certain 2.3 and 2.4 versions. To save time and
1068 simplify the code, we create the copy in here, and use a dummy
1069 copyelement helper to trick the copy module into doing the
1070 right thing. */
1071
1072 if (!elementtree_copyelement_obj) {
1073 PyErr_SetString(
1074 PyExc_RuntimeError,
1075 "copyelement helper not found"
1076 );
1077 return NULL;
1078 }
1079
1080 return Py_BuildValue(
1081 "O(N)", elementtree_copyelement_obj, element_copy(self, args)
1082 );
1083}
1084
1085static PyObject*
1086element_remove(ElementObject* self, PyObject* args)
1087{
1088 int i;
1089
1090 PyObject* element;
1091 if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element))
1092 return NULL;
1093
1094 if (!self->extra) {
1095 /* element has no children, so raise exception */
1096 PyErr_SetString(
1097 PyExc_ValueError,
1098 "list.remove(x): x not in list"
1099 );
1100 return NULL;
1101 }
1102
1103 for (i = 0; i < self->extra->length; i++) {
1104 if (self->extra->children[i] == element)
1105 break;
1106 if (PyObject_Compare(self->extra->children[i], element) == 0)
1107 break;
1108 }
1109
1110 if (i == self->extra->length) {
1111 /* element is not in children, so raise exception */
1112 PyErr_SetString(
1113 PyExc_ValueError,
1114 "list.remove(x): x not in list"
1115 );
1116 return NULL;
1117 }
1118
1119 Py_DECREF(self->extra->children[i]);
1120
1121 self->extra->length--;
1122
1123 for (; i < self->extra->length; i++)
1124 self->extra->children[i] = self->extra->children[i+1];
1125
1126 Py_RETURN_NONE;
1127}
1128
1129static PyObject*
1130element_repr(ElementObject* self)
1131{
1132 PyObject* repr;
1133 char buffer[100];
1134
1135 repr = PyString_FromString("<Element ");
1136
1137 PyString_ConcatAndDel(&repr, PyObject_Repr(self->tag));
1138
1139 sprintf(buffer, " at %p>", self);
1140 PyString_ConcatAndDel(&repr, PyString_FromString(buffer));
1141
1142 return repr;
1143}
1144
1145static PyObject*
1146element_set(ElementObject* self, PyObject* args)
1147{
1148 PyObject* attrib;
1149
1150 PyObject* key;
1151 PyObject* value;
1152 if (!PyArg_ParseTuple(args, "OO:set", &key, &value))
1153 return NULL;
1154
1155 if (!self->extra)
1156 element_new_extra(self, NULL);
1157
1158 attrib = element_get_attrib(self);
1159 if (!attrib)
1160 return NULL;
1161
1162 if (PyDict_SetItem(attrib, key, value) < 0)
1163 return NULL;
1164
1165 Py_RETURN_NONE;
1166}
1167
1168static int
Martin v. Löwis18e16552006-02-15 17:27:45 +00001169element_setslice(PyObject* _self, Py_ssize_t start, Py_ssize_t end, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001170{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001171 ElementObject* self = (ElementObject*)_self;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001172 int i, new, old;
1173 PyObject* recycle = NULL;
1174
1175 if (!self->extra)
1176 element_new_extra(self, NULL);
1177
1178 /* standard clamping */
1179 if (start < 0)
1180 start = 0;
1181 if (end < 0)
1182 end = 0;
1183 if (end > self->extra->length)
1184 end = self->extra->length;
1185 if (start > end)
1186 start = end;
1187
1188 old = end - start;
1189
1190 if (item == NULL)
1191 new = 0;
1192 else if (PyList_CheckExact(item)) {
1193 new = PyList_GET_SIZE(item);
1194 } else {
1195 /* FIXME: support arbitrary sequences? */
1196 PyErr_Format(
1197 PyExc_TypeError,
1198 "expected list, not \"%.200s\"", item->ob_type->tp_name
1199 );
1200 return -1;
1201 }
1202
1203 if (old > 0) {
1204 /* to avoid recursive calls to this method (via decref), move
1205 old items to the recycle bin here, and get rid of them when
1206 we're done modifying the element */
1207 recycle = PyList_New(old);
1208 for (i = 0; i < old; i++)
1209 PyList_SET_ITEM(recycle, i, self->extra->children[i + start]);
1210 }
1211
1212 if (new < old) {
1213 /* delete slice */
1214 for (i = end; i < self->extra->length; i++)
1215 self->extra->children[i + new - old] = self->extra->children[i];
1216 } else if (new > old) {
1217 /* insert slice */
1218 if (element_resize(self, new - old) < 0)
1219 return -1;
1220 for (i = self->extra->length-1; i >= end; i--)
1221 self->extra->children[i + new - old] = self->extra->children[i];
1222 }
1223
1224 /* replace the slice */
1225 for (i = 0; i < new; i++) {
1226 PyObject* element = PyList_GET_ITEM(item, i);
1227 Py_INCREF(element);
1228 self->extra->children[i + start] = element;
1229 }
1230
1231 self->extra->length += new - old;
1232
1233 /* discard the recycle bin, and everything in it */
1234 Py_XDECREF(recycle);
1235
1236 return 0;
1237}
1238
1239static int
Martin v. Löwis18e16552006-02-15 17:27:45 +00001240element_setitem(PyObject* _self, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001241{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001242 ElementObject* self = (ElementObject*)_self;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001243 int i;
1244 PyObject* old;
1245
1246 if (!self->extra || index < 0 || index >= self->extra->length) {
1247 PyErr_SetString(
1248 PyExc_IndexError,
1249 "child assignment index out of range");
1250 return -1;
1251 }
1252
1253 old = self->extra->children[index];
1254
1255 if (item) {
1256 Py_INCREF(item);
1257 self->extra->children[index] = item;
1258 } else {
1259 self->extra->length--;
1260 for (i = index; i < self->extra->length; i++)
1261 self->extra->children[i] = self->extra->children[i+1];
1262 }
1263
1264 Py_DECREF(old);
1265
1266 return 0;
1267}
1268
1269static PyMethodDef element_methods[] = {
1270
1271 {"clear", (PyCFunction) element_clear, METH_VARARGS},
1272
1273 {"get", (PyCFunction) element_get, METH_VARARGS},
1274 {"set", (PyCFunction) element_set, METH_VARARGS},
1275
1276 {"find", (PyCFunction) element_find, METH_VARARGS},
1277 {"findtext", (PyCFunction) element_findtext, METH_VARARGS},
1278 {"findall", (PyCFunction) element_findall, METH_VARARGS},
1279
1280 {"append", (PyCFunction) element_append, METH_VARARGS},
1281 {"insert", (PyCFunction) element_insert, METH_VARARGS},
1282 {"remove", (PyCFunction) element_remove, METH_VARARGS},
1283
1284 {"getiterator", (PyCFunction) element_getiterator, METH_VARARGS},
1285 {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS},
1286
1287 {"items", (PyCFunction) element_items, METH_VARARGS},
1288 {"keys", (PyCFunction) element_keys, METH_VARARGS},
1289
1290 {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS},
1291
1292 {"__copy__", (PyCFunction) element_copy, METH_VARARGS},
1293 {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS},
1294
1295 /* Some 2.3 and 2.4 versions do not handle the __copy__ method on
1296 C objects correctly, so we have to fake it using a __reduce__-
1297 based hack (see the element_reduce implementation above for
1298 details). */
1299
1300 /* The behaviour has been changed in 2.3.5 and 2.4.1, so we're
1301 using a runtime test to figure out if we need to fake things
1302 or now (see the init code below). The following entry is
1303 enabled only if the hack is needed. */
1304
1305 {"!__reduce__", (PyCFunction) element_reduce, METH_VARARGS},
1306
1307 {NULL, NULL}
1308};
1309
1310static PyObject*
1311element_getattr(ElementObject* self, char* name)
1312{
1313 PyObject* res;
1314
1315 res = Py_FindMethod(element_methods, (PyObject*) self, name);
1316 if (res)
1317 return res;
1318
1319 PyErr_Clear();
1320
1321 if (strcmp(name, "tag") == 0)
1322 res = self->tag;
1323 else if (strcmp(name, "text") == 0)
1324 res = element_get_text(self);
1325 else if (strcmp(name, "tail") == 0) {
1326 res = element_get_tail(self);
1327 } else if (strcmp(name, "attrib") == 0) {
1328 if (!self->extra)
1329 element_new_extra(self, NULL);
1330 res = element_get_attrib(self);
1331 } else {
1332 PyErr_SetString(PyExc_AttributeError, name);
1333 return NULL;
1334 }
1335
1336 if (!res)
1337 return NULL;
1338
1339 Py_INCREF(res);
1340 return res;
1341}
1342
1343static int
1344element_setattr(ElementObject* self, const char* name, PyObject* value)
1345{
1346 if (value == NULL) {
1347 PyErr_SetString(
1348 PyExc_AttributeError,
1349 "can't delete element attributes"
1350 );
1351 return -1;
1352 }
1353
1354 if (strcmp(name, "tag") == 0) {
1355 Py_DECREF(self->tag);
1356 self->tag = value;
1357 Py_INCREF(self->tag);
1358 } else if (strcmp(name, "text") == 0) {
1359 Py_DECREF(JOIN_OBJ(self->text));
1360 self->text = value;
1361 Py_INCREF(self->text);
1362 } else if (strcmp(name, "tail") == 0) {
1363 Py_DECREF(JOIN_OBJ(self->tail));
1364 self->tail = value;
1365 Py_INCREF(self->tail);
1366 } else if (strcmp(name, "attrib") == 0) {
1367 if (!self->extra)
1368 element_new_extra(self, NULL);
1369 Py_DECREF(self->extra->attrib);
1370 self->extra->attrib = value;
1371 Py_INCREF(self->extra->attrib);
1372 } else {
1373 PyErr_SetString(PyExc_AttributeError, name);
1374 return -1;
1375 }
1376
1377 return 0;
1378}
1379
1380static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001381 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001382 0, /* sq_concat */
1383 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001384 element_getitem,
1385 element_getslice,
1386 element_setitem,
1387 element_setslice,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001388};
1389
1390statichere PyTypeObject Element_Type = {
1391 PyObject_HEAD_INIT(NULL)
1392 0, "Element", sizeof(ElementObject), 0,
1393 /* methods */
1394 (destructor)element_dealloc, /* tp_dealloc */
1395 0, /* tp_print */
1396 (getattrfunc)element_getattr, /* tp_getattr */
1397 (setattrfunc)element_setattr, /* tp_setattr */
1398 0, /* tp_compare */
1399 (reprfunc)element_repr, /* tp_repr */
1400 0, /* tp_as_number */
1401 &element_as_sequence, /* tp_as_sequence */
1402};
1403
1404/* ==================================================================== */
1405/* the tree builder type */
1406
1407typedef struct {
1408 PyObject_HEAD
1409
1410 PyObject* root; /* root node (first created node) */
1411
1412 ElementObject* this; /* current node */
1413 ElementObject* last; /* most recently created node */
1414
1415 PyObject* data; /* data collector (string or list), or NULL */
1416
1417 PyObject* stack; /* element stack */
1418 int index; /* current stack size (0=empty) */
1419
1420 /* element tracing */
1421 PyObject* events; /* list of events, or NULL if not collecting */
1422 PyObject* start_event_obj; /* event objects (NULL to ignore) */
1423 PyObject* end_event_obj;
1424 PyObject* start_ns_event_obj;
1425 PyObject* end_ns_event_obj;
1426
1427} TreeBuilderObject;
1428
1429staticforward PyTypeObject TreeBuilder_Type;
1430
1431#define TreeBuilder_CheckExact(op) ((op)->ob_type == &TreeBuilder_Type)
1432
1433/* -------------------------------------------------------------------- */
1434/* constructor and destructor */
1435
1436LOCAL(PyObject*)
1437treebuilder_new(void)
1438{
1439 TreeBuilderObject* self;
1440
1441 self = PyObject_New(TreeBuilderObject, &TreeBuilder_Type);
1442 if (self == NULL)
1443 return NULL;
1444
1445 self->root = NULL;
1446
1447 Py_INCREF(Py_None);
1448 self->this = (ElementObject*) Py_None;
1449
1450 Py_INCREF(Py_None);
1451 self->last = (ElementObject*) Py_None;
1452
1453 self->data = NULL;
1454
1455 self->stack = PyList_New(20);
1456 self->index = 0;
1457
1458 self->events = NULL;
1459 self->start_event_obj = self->end_event_obj = NULL;
1460 self->start_ns_event_obj = self->end_ns_event_obj = NULL;
1461
1462 ALLOC(sizeof(TreeBuilderObject), "create treebuilder");
1463
1464 return (PyObject*) self;
1465}
1466
1467static PyObject*
1468treebuilder(PyObject* _self, PyObject* args)
1469{
1470 if (!PyArg_ParseTuple(args, ":TreeBuilder"))
1471 return NULL;
1472
1473 return treebuilder_new();
1474}
1475
1476static void
1477treebuilder_dealloc(TreeBuilderObject* self)
1478{
1479 Py_XDECREF(self->end_ns_event_obj);
1480 Py_XDECREF(self->start_ns_event_obj);
1481 Py_XDECREF(self->end_event_obj);
1482 Py_XDECREF(self->start_event_obj);
1483 Py_XDECREF(self->events);
1484 Py_DECREF(self->stack);
1485 Py_XDECREF(self->data);
1486 Py_DECREF(self->last);
1487 Py_DECREF(self->this);
1488 Py_XDECREF(self->root);
1489
1490 RELEASE(sizeof(TreeBuilderObject), "destroy treebuilder");
1491
1492 PyObject_Del(self);
1493}
1494
1495/* -------------------------------------------------------------------- */
1496/* handlers */
1497
1498LOCAL(PyObject*)
1499treebuilder_handle_xml(TreeBuilderObject* self, PyObject* encoding,
1500 PyObject* standalone)
1501{
1502 Py_RETURN_NONE;
1503}
1504
1505LOCAL(PyObject*)
1506treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
1507 PyObject* attrib)
1508{
1509 PyObject* node;
1510 PyObject* this;
1511
1512 if (self->data) {
1513 if (self->this == self->last) {
Fredrik Lundh0149e3a2005-12-18 13:58:25 +00001514 Py_DECREF(self->last->text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001515 self->last->text = JOIN_SET(
1516 self->data, PyList_CheckExact(self->data)
1517 );
1518 } else {
Fredrik Lundh0149e3a2005-12-18 13:58:25 +00001519 Py_DECREF(self->last->tail);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001520 self->last->tail = JOIN_SET(
1521 self->data, PyList_CheckExact(self->data)
1522 );
1523 }
1524 self->data = NULL;
1525 }
1526
1527 node = element_new(tag, attrib);
1528 if (!node)
1529 return NULL;
1530
1531 this = (PyObject*) self->this;
1532
1533 if (this != Py_None) {
1534 if (element_add_subelement((ElementObject*) this, node) < 0)
1535 return NULL;
1536 } else {
1537 if (self->root) {
1538 PyErr_SetString(
1539 PyExc_SyntaxError,
1540 "multiple elements on top level"
1541 );
1542 return NULL;
1543 }
1544 Py_INCREF(node);
1545 self->root = node;
1546 }
1547
1548 if (self->index < PyList_GET_SIZE(self->stack)) {
1549 if (PyList_SetItem(self->stack, self->index, this) < 0)
1550 return NULL;
1551 Py_INCREF(this);
1552 } else {
1553 if (PyList_Append(self->stack, this) < 0)
1554 return NULL;
1555 }
1556 self->index++;
1557
1558 Py_DECREF(this);
1559 Py_INCREF(node);
1560 self->this = (ElementObject*) node;
1561
1562 Py_DECREF(self->last);
1563 Py_INCREF(node);
1564 self->last = (ElementObject*) node;
1565
1566 if (self->start_event_obj) {
1567 PyObject* res;
1568 PyObject* action = self->start_event_obj;
1569 res = PyTuple_New(2);
1570 if (res) {
1571 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
1572 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
1573 PyList_Append(self->events, res);
1574 Py_DECREF(res);
1575 } else
1576 PyErr_Clear(); /* FIXME: propagate error */
1577 }
1578
1579 return node;
1580}
1581
1582LOCAL(PyObject*)
1583treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
1584{
1585 if (!self->data) {
1586 /* store the first item as is */
1587 Py_INCREF(data); self->data = data;
1588 } else {
1589 /* more than one item; use a list to collect items */
1590 if (PyString_CheckExact(self->data) && self->data->ob_refcnt == 1 &&
1591 PyString_CheckExact(data) && PyString_GET_SIZE(data) == 1) {
1592 /* expat often generates single character data sections; handle
1593 the most common case by resizing the existing string... */
1594 int size = PyString_GET_SIZE(self->data);
1595 if (_PyString_Resize(&self->data, size + 1) < 0)
1596 return NULL;
1597 PyString_AS_STRING(self->data)[size] = PyString_AS_STRING(data)[0];
1598 } else if (PyList_CheckExact(self->data)) {
1599 if (PyList_Append(self->data, data) < 0)
1600 return NULL;
1601 } else {
1602 PyObject* list = PyList_New(2);
1603 if (!list)
1604 return NULL;
1605 PyList_SET_ITEM(list, 0, self->data);
1606 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
1607 self->data = list;
1608 }
1609 }
1610
1611 Py_RETURN_NONE;
1612}
1613
1614LOCAL(PyObject*)
1615treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
1616{
1617 PyObject* item;
1618
1619 if (self->data) {
1620 if (self->this == self->last) {
Fredrik Lundh0149e3a2005-12-18 13:58:25 +00001621 Py_DECREF(self->last->text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001622 self->last->text = JOIN_SET(
1623 self->data, PyList_CheckExact(self->data)
1624 );
1625 } else {
Fredrik Lundh0149e3a2005-12-18 13:58:25 +00001626 Py_DECREF(self->last->tail);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001627 self->last->tail = JOIN_SET(
1628 self->data, PyList_CheckExact(self->data)
1629 );
1630 }
1631 self->data = NULL;
1632 }
1633
1634 if (self->index == 0) {
1635 PyErr_SetString(
1636 PyExc_IndexError,
1637 "pop from empty stack"
1638 );
1639 return NULL;
1640 }
1641
1642 self->index--;
1643
1644 item = PyList_GET_ITEM(self->stack, self->index);
1645 Py_INCREF(item);
1646
1647 Py_DECREF(self->last);
1648
1649 self->last = (ElementObject*) self->this;
1650 self->this = (ElementObject*) item;
1651
1652 if (self->end_event_obj) {
1653 PyObject* res;
1654 PyObject* action = self->end_event_obj;
1655 PyObject* node = (PyObject*) self->last;
1656 res = PyTuple_New(2);
1657 if (res) {
1658 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
1659 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
1660 PyList_Append(self->events, res);
1661 Py_DECREF(res);
1662 } else
1663 PyErr_Clear(); /* FIXME: propagate error */
1664 }
1665
1666 Py_INCREF(self->last);
1667 return (PyObject*) self->last;
1668}
1669
1670LOCAL(void)
1671treebuilder_handle_namespace(TreeBuilderObject* self, int start,
1672 const char* prefix, const char *uri)
1673{
1674 PyObject* res;
1675 PyObject* action;
1676 PyObject* parcel;
1677
1678 if (!self->events)
1679 return;
1680
1681 if (start) {
1682 if (!self->start_ns_event_obj)
1683 return;
1684 action = self->start_ns_event_obj;
1685 /* FIXME: prefix and uri use utf-8 encoding! */
1686 parcel = Py_BuildValue("ss", (prefix) ? prefix : "", uri);
1687 if (!parcel)
1688 return;
1689 Py_INCREF(action);
1690 } else {
1691 if (!self->end_ns_event_obj)
1692 return;
1693 action = self->end_ns_event_obj;
1694 Py_INCREF(action);
1695 parcel = Py_None;
1696 Py_INCREF(parcel);
1697 }
1698
1699 res = PyTuple_New(2);
1700
1701 if (res) {
1702 PyTuple_SET_ITEM(res, 0, action);
1703 PyTuple_SET_ITEM(res, 1, parcel);
1704 PyList_Append(self->events, res);
1705 Py_DECREF(res);
1706 } else
1707 PyErr_Clear(); /* FIXME: propagate error */
1708}
1709
1710/* -------------------------------------------------------------------- */
1711/* methods (in alphabetical order) */
1712
1713static PyObject*
1714treebuilder_data(TreeBuilderObject* self, PyObject* args)
1715{
1716 PyObject* data;
1717 if (!PyArg_ParseTuple(args, "O:data", &data))
1718 return NULL;
1719
1720 return treebuilder_handle_data(self, data);
1721}
1722
1723static PyObject*
1724treebuilder_end(TreeBuilderObject* self, PyObject* args)
1725{
1726 PyObject* tag;
1727 if (!PyArg_ParseTuple(args, "O:end", &tag))
1728 return NULL;
1729
1730 return treebuilder_handle_end(self, tag);
1731}
1732
1733LOCAL(PyObject*)
1734treebuilder_done(TreeBuilderObject* self)
1735{
1736 PyObject* res;
1737
1738 /* FIXME: check stack size? */
1739
1740 if (self->root)
1741 res = self->root;
1742 else
1743 res = Py_None;
1744
1745 Py_INCREF(res);
1746 return res;
1747}
1748
1749static PyObject*
1750treebuilder_close(TreeBuilderObject* self, PyObject* args)
1751{
1752 if (!PyArg_ParseTuple(args, ":close"))
1753 return NULL;
1754
1755 return treebuilder_done(self);
1756}
1757
1758static PyObject*
1759treebuilder_start(TreeBuilderObject* self, PyObject* args)
1760{
1761 PyObject* tag;
1762 PyObject* attrib = Py_None;
1763 if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib))
1764 return NULL;
1765
1766 return treebuilder_handle_start(self, tag, attrib);
1767}
1768
1769static PyObject*
1770treebuilder_xml(TreeBuilderObject* self, PyObject* args)
1771{
1772 PyObject* encoding;
1773 PyObject* standalone;
1774 if (!PyArg_ParseTuple(args, "OO:xml", &encoding, &standalone))
1775 return NULL;
1776
1777 return treebuilder_handle_xml(self, encoding, standalone);
1778}
1779
1780static PyMethodDef treebuilder_methods[] = {
1781 {"data", (PyCFunction) treebuilder_data, METH_VARARGS},
1782 {"start", (PyCFunction) treebuilder_start, METH_VARARGS},
1783 {"end", (PyCFunction) treebuilder_end, METH_VARARGS},
1784 {"xml", (PyCFunction) treebuilder_xml, METH_VARARGS},
1785 {"close", (PyCFunction) treebuilder_close, METH_VARARGS},
1786 {NULL, NULL}
1787};
1788
1789static PyObject*
1790treebuilder_getattr(TreeBuilderObject* self, char* name)
1791{
1792 return Py_FindMethod(treebuilder_methods, (PyObject*) self, name);
1793}
1794
1795statichere PyTypeObject TreeBuilder_Type = {
1796 PyObject_HEAD_INIT(NULL)
1797 0, "TreeBuilder", sizeof(TreeBuilderObject), 0,
1798 /* methods */
1799 (destructor)treebuilder_dealloc, /* tp_dealloc */
1800 0, /* tp_print */
1801 (getattrfunc)treebuilder_getattr, /* tp_getattr */
1802};
1803
1804/* ==================================================================== */
1805/* the expat interface */
1806
1807#if defined(USE_EXPAT)
1808
1809#include "expat.h"
1810
1811#if defined(USE_PYEXPAT_CAPI)
1812#include "pyexpat.h"
1813static struct PyExpat_CAPI* expat_capi;
1814#define EXPAT(func) (expat_capi->func)
1815#else
1816#define EXPAT(func) (XML_##func)
1817#endif
1818
1819typedef struct {
1820 PyObject_HEAD
1821
1822 XML_Parser parser;
1823
1824 PyObject* target;
1825 PyObject* entity;
1826
1827 PyObject* names;
1828
1829 PyObject* handle_xml;
1830 PyObject* handle_start;
1831 PyObject* handle_data;
1832 PyObject* handle_end;
1833
1834 PyObject* handle_comment;
1835 PyObject* handle_pi;
1836
1837} XMLParserObject;
1838
1839staticforward PyTypeObject XMLParser_Type;
1840
1841/* helpers */
1842
1843#if defined(Py_USING_UNICODE)
1844LOCAL(int)
1845checkstring(const char* string, int size)
1846{
1847 int i;
1848
1849 /* check if an 8-bit string contains UTF-8 characters */
1850 for (i = 0; i < size; i++)
1851 if (string[i] & 0x80)
1852 return 1;
1853
1854 return 0;
1855}
1856#endif
1857
1858LOCAL(PyObject*)
1859makestring(const char* string, int size)
1860{
1861 /* convert a UTF-8 string to either a 7-bit ascii string or a
1862 Unicode string */
1863
1864#if defined(Py_USING_UNICODE)
1865 if (checkstring(string, size))
1866 return PyUnicode_DecodeUTF8(string, size, "strict");
1867#endif
1868
1869 return PyString_FromStringAndSize(string, size);
1870}
1871
1872LOCAL(PyObject*)
1873makeuniversal(XMLParserObject* self, const char* string)
1874{
1875 /* convert a UTF-8 tag/attribute name from the expat parser
1876 to a universal name string */
1877
1878 int size = strlen(string);
1879 PyObject* key;
1880 PyObject* value;
1881
1882 /* look the 'raw' name up in the names dictionary */
1883 key = PyString_FromStringAndSize(string, size);
1884 if (!key)
1885 return NULL;
1886
1887 value = PyDict_GetItem(self->names, key);
1888
1889 if (value) {
1890 Py_INCREF(value);
1891 } else {
1892 /* new name. convert to universal name, and decode as
1893 necessary */
1894
1895 PyObject* tag;
1896 char* p;
1897 int i;
1898
1899 /* look for namespace separator */
1900 for (i = 0; i < size; i++)
1901 if (string[i] == '}')
1902 break;
1903 if (i != size) {
1904 /* convert to universal name */
1905 tag = PyString_FromStringAndSize(NULL, size+1);
1906 p = PyString_AS_STRING(tag);
1907 p[0] = '{';
1908 memcpy(p+1, string, size);
1909 size++;
1910 } else {
1911 /* plain name; use key as tag */
1912 Py_INCREF(key);
1913 tag = key;
1914 }
1915
1916 /* decode universal name */
1917#if defined(Py_USING_UNICODE)
1918 /* inline makestring, to avoid duplicating the source string if
1919 it's not an utf-8 string */
1920 p = PyString_AS_STRING(tag);
1921 if (checkstring(p, size)) {
1922 value = PyUnicode_DecodeUTF8(p, size, "strict");
1923 Py_DECREF(tag);
1924 if (!value) {
1925 Py_DECREF(key);
1926 return NULL;
1927 }
1928 } else
1929#endif
1930 value = tag; /* use tag as is */
1931
1932 /* add to names dictionary */
1933 if (PyDict_SetItem(self->names, key, value) < 0) {
1934 Py_DECREF(key);
1935 Py_DECREF(value);
1936 return NULL;
1937 }
1938 }
1939
1940 Py_DECREF(key);
1941 return value;
1942}
1943
1944/* -------------------------------------------------------------------- */
1945/* handlers */
1946
1947static void
1948expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
1949 int data_len)
1950{
1951 PyObject* key;
1952 PyObject* value;
1953 PyObject* res;
1954
1955 if (data_len < 2 || data_in[0] != '&')
1956 return;
1957
1958 key = makestring(data_in + 1, data_len - 2);
1959 if (!key)
1960 return;
1961
1962 value = PyDict_GetItem(self->entity, key);
1963
1964 if (value) {
1965 if (TreeBuilder_CheckExact(self->target))
1966 res = treebuilder_handle_data(
1967 (TreeBuilderObject*) self->target, value
1968 );
1969 else if (self->handle_data)
1970 res = PyObject_CallFunction(self->handle_data, "O", value);
1971 else
1972 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001973 Py_XDECREF(res);
1974 } else {
1975 PyErr_Format(
1976 PyExc_SyntaxError, "undefined entity &%s;: line %d, column %d",
1977 PyString_AS_STRING(key),
1978 EXPAT(GetErrorLineNumber)(self->parser),
1979 EXPAT(GetErrorColumnNumber)(self->parser)
1980 );
1981 }
1982
1983 Py_DECREF(key);
1984}
1985
1986static void
1987expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
1988 const XML_Char **attrib_in)
1989{
1990 PyObject* res;
1991 PyObject* tag;
1992 PyObject* attrib;
1993 int ok;
1994
1995 /* tag name */
1996 tag = makeuniversal(self, tag_in);
1997 if (!tag)
1998 return; /* parser will look for errors */
1999
2000 /* attributes */
2001 if (attrib_in[0]) {
2002 attrib = PyDict_New();
2003 if (!attrib)
2004 return;
2005 while (attrib_in[0] && attrib_in[1]) {
2006 PyObject* key = makeuniversal(self, attrib_in[0]);
2007 PyObject* value = makestring(attrib_in[1], strlen(attrib_in[1]));
2008 if (!key || !value) {
2009 Py_XDECREF(value);
2010 Py_XDECREF(key);
2011 Py_DECREF(attrib);
2012 return;
2013 }
2014 ok = PyDict_SetItem(attrib, key, value);
2015 Py_DECREF(value);
2016 Py_DECREF(key);
2017 if (ok < 0) {
2018 Py_DECREF(attrib);
2019 return;
2020 }
2021 attrib_in += 2;
2022 }
2023 } else {
2024 Py_INCREF(Py_None);
2025 attrib = Py_None;
2026 }
2027
2028 if (TreeBuilder_CheckExact(self->target))
2029 /* shortcut */
2030 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2031 tag, attrib);
2032 else if (self->handle_start)
2033 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
2034 else
2035 res = NULL;
2036
2037 Py_DECREF(tag);
2038 Py_DECREF(attrib);
2039
2040 Py_XDECREF(res);
2041}
2042
2043static void
2044expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2045 int data_len)
2046{
2047 PyObject* data;
2048 PyObject* res;
2049
2050 data = makestring(data_in, data_len);
2051
2052 if (TreeBuilder_CheckExact(self->target))
2053 /* shortcut */
2054 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
2055 else if (self->handle_data)
2056 res = PyObject_CallFunction(self->handle_data, "O", data);
2057 else
2058 res = NULL;
2059
2060 Py_DECREF(data);
2061
2062 Py_XDECREF(res);
2063}
2064
2065static void
2066expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
2067{
2068 PyObject* tag;
2069 PyObject* res = NULL;
2070
2071 if (TreeBuilder_CheckExact(self->target))
2072 /* shortcut */
2073 /* the standard tree builder doesn't look at the end tag */
2074 res = treebuilder_handle_end(
2075 (TreeBuilderObject*) self->target, Py_None
2076 );
2077 else if (self->handle_end) {
2078 tag = makeuniversal(self, tag_in);
2079 if (tag) {
2080 res = PyObject_CallFunction(self->handle_end, "O", tag);
2081 Py_DECREF(tag);
2082 }
2083 }
2084
2085 Py_XDECREF(res);
2086}
2087
2088static void
2089expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
2090 const XML_Char *uri)
2091{
2092 treebuilder_handle_namespace(
2093 (TreeBuilderObject*) self->target, 1, prefix, uri
2094 );
2095}
2096
2097static void
2098expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
2099{
2100 treebuilder_handle_namespace(
2101 (TreeBuilderObject*) self->target, 0, NULL, NULL
2102 );
2103}
2104
2105static void
2106expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
2107{
2108 PyObject* comment;
2109 PyObject* res;
2110
2111 if (self->handle_comment) {
2112 comment = makestring(comment_in, strlen(comment_in));
2113 if (comment) {
2114 res = PyObject_CallFunction(self->handle_comment, "O", comment);
2115 Py_XDECREF(res);
2116 Py_DECREF(comment);
2117 }
2118 }
2119}
2120
2121static void
2122expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
2123 const XML_Char* data_in)
2124{
2125 PyObject* target;
2126 PyObject* data;
2127 PyObject* res;
2128
2129 if (self->handle_pi) {
2130 target = makestring(target_in, strlen(target_in));
2131 data = makestring(data_in, strlen(data_in));
2132 if (target && data) {
2133 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
2134 Py_XDECREF(res);
2135 Py_DECREF(data);
2136 Py_DECREF(target);
2137 } else {
2138 Py_XDECREF(data);
2139 Py_XDECREF(target);
2140 }
2141 }
2142}
2143
2144#if defined(Py_USING_UNICODE)
2145static int
2146expat_unknown_encoding_handler(XMLParserObject *self, const XML_Char *name,
2147 XML_Encoding *info)
2148{
2149 PyObject* u;
2150 Py_UNICODE* p;
2151 unsigned char s[256];
2152 int i;
2153
2154 memset(info, 0, sizeof(XML_Encoding));
2155
2156 for (i = 0; i < 256; i++)
2157 s[i] = i;
2158
Fredrik Lundhc3389992005-12-25 11:40:19 +00002159 u = PyUnicode_Decode((char*) s, 256, name, "replace");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002160 if (!u)
2161 return XML_STATUS_ERROR;
2162
2163 if (PyUnicode_GET_SIZE(u) != 256) {
2164 Py_DECREF(u);
2165 return XML_STATUS_ERROR;
2166 }
2167
2168 p = PyUnicode_AS_UNICODE(u);
2169
2170 for (i = 0; i < 256; i++) {
2171 if (p[i] != Py_UNICODE_REPLACEMENT_CHARACTER)
2172 info->map[i] = p[i];
2173 else
2174 info->map[i] = -1;
2175 }
2176
2177 Py_DECREF(u);
2178
2179 return XML_STATUS_OK;
2180}
2181#endif
2182
2183/* -------------------------------------------------------------------- */
2184/* constructor and destructor */
2185
2186static PyObject*
2187xmlparser(PyObject* _self, PyObject* args, PyObject* kw)
2188{
2189 XMLParserObject* self;
2190 /* FIXME: does this need to be static? */
2191 static XML_Memory_Handling_Suite memory_handler;
2192
2193 PyObject* target = NULL;
2194 char* encoding = NULL;
2195 static PY_CONST char* kwlist[] = { "target", "encoding", NULL };
2196 if (!PyArg_ParseTupleAndKeywords(args, kw, "|Oz:XMLParser", kwlist,
2197 &target, &encoding))
2198 return NULL;
2199
2200#if defined(USE_PYEXPAT_CAPI)
2201 if (!expat_capi) {
2202 PyErr_SetString(
2203 PyExc_RuntimeError, "cannot load dispatch table from pyexpat"
2204 );
2205 return NULL;
2206 }
2207#endif
2208
2209 self = PyObject_New(XMLParserObject, &XMLParser_Type);
2210 if (self == NULL)
2211 return NULL;
2212
2213 self->entity = PyDict_New();
2214 if (!self->entity) {
2215 PyObject_Del(self);
2216 return NULL; /* FIXME: cleanup on error */
2217 }
2218
2219 self->names = PyDict_New();
2220 if (!self->names) {
2221 PyObject_Del(self);
2222 return NULL; /* FIXME: cleanup on error */
2223 }
2224
2225 memory_handler.malloc_fcn = PyObject_Malloc;
2226 memory_handler.realloc_fcn = PyObject_Realloc;
2227 memory_handler.free_fcn = PyObject_Free;
2228
2229 self->parser = EXPAT(ParserCreate_MM)(encoding, &memory_handler, "}");
2230 if (!self->parser) {
2231 PyErr_NoMemory();
2232 return NULL; /* FIXME: cleanup on error */
2233 }
2234
2235 /* setup target handlers */
2236 if (!target) {
2237 target = treebuilder_new();
2238 if (!target) {
2239 PyObject_Del(self);
2240 return NULL; /* FIXME: cleanup on error */
2241 }
2242 } else
2243 Py_INCREF(target);
2244 self->target = target;
2245
2246 self->handle_xml = PyObject_GetAttrString(target, "xml");
2247 self->handle_start = PyObject_GetAttrString(target, "start");
2248 self->handle_data = PyObject_GetAttrString(target, "data");
2249 self->handle_end = PyObject_GetAttrString(target, "end");
2250 self->handle_comment = PyObject_GetAttrString(target, "comment");
2251 self->handle_pi = PyObject_GetAttrString(target, "pi");
2252
2253 PyErr_Clear();
2254
2255 /* configure parser */
2256 EXPAT(SetUserData)(self->parser, self);
2257 EXPAT(SetElementHandler)(
2258 self->parser,
2259 (XML_StartElementHandler) expat_start_handler,
2260 (XML_EndElementHandler) expat_end_handler
2261 );
2262 EXPAT(SetDefaultHandlerExpand)(
2263 self->parser,
2264 (XML_DefaultHandler) expat_default_handler
2265 );
2266 EXPAT(SetCharacterDataHandler)(
2267 self->parser,
2268 (XML_CharacterDataHandler) expat_data_handler
2269 );
2270 if (self->handle_comment)
2271 EXPAT(SetCommentHandler)(
2272 self->parser,
2273 (XML_CommentHandler) expat_comment_handler
2274 );
2275 if (self->handle_pi)
2276 EXPAT(SetProcessingInstructionHandler)(
2277 self->parser,
2278 (XML_ProcessingInstructionHandler) expat_pi_handler
2279 );
2280#if defined(Py_USING_UNICODE)
2281 EXPAT(SetUnknownEncodingHandler)(
2282 self->parser,
2283 (XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL
2284 );
2285#endif
2286
2287 ALLOC(sizeof(XMLParserObject), "create expatparser");
2288
2289 return (PyObject*) self;
2290}
2291
2292static void
2293xmlparser_dealloc(XMLParserObject* self)
2294{
2295 EXPAT(ParserFree)(self->parser);
2296
2297 Py_XDECREF(self->handle_pi);
2298 Py_XDECREF(self->handle_comment);
2299 Py_XDECREF(self->handle_end);
2300 Py_XDECREF(self->handle_data);
2301 Py_XDECREF(self->handle_start);
2302 Py_XDECREF(self->handle_xml);
2303
2304 Py_DECREF(self->target);
2305 Py_DECREF(self->entity);
2306 Py_DECREF(self->names);
2307
2308 RELEASE(sizeof(XMLParserObject), "destroy expatparser");
2309
2310 PyObject_Del(self);
2311}
2312
2313/* -------------------------------------------------------------------- */
2314/* methods (in alphabetical order) */
2315
2316LOCAL(PyObject*)
2317expat_parse(XMLParserObject* self, char* data, int data_len, int final)
2318{
2319 int ok;
2320
2321 ok = EXPAT(Parse)(self->parser, data, data_len, final);
2322
2323 if (PyErr_Occurred())
2324 return NULL;
2325
2326 if (!ok) {
2327 PyErr_Format(
2328 PyExc_SyntaxError, "%s: line %d, column %d",
2329 EXPAT(ErrorString)(EXPAT(GetErrorCode)(self->parser)),
2330 EXPAT(GetErrorLineNumber)(self->parser),
2331 EXPAT(GetErrorColumnNumber)(self->parser)
2332 );
2333 return NULL;
2334 }
2335
2336 Py_RETURN_NONE;
2337}
2338
2339static PyObject*
2340xmlparser_close(XMLParserObject* self, PyObject* args)
2341{
2342 /* end feeding data to parser */
2343
2344 PyObject* res;
2345 if (!PyArg_ParseTuple(args, ":close"))
2346 return NULL;
2347
2348 res = expat_parse(self, "", 0, 1);
2349
2350 if (res && TreeBuilder_CheckExact(self->target)) {
2351 Py_DECREF(res);
2352 return treebuilder_done((TreeBuilderObject*) self->target);
2353 }
2354
2355 return res;
2356}
2357
2358static PyObject*
2359xmlparser_feed(XMLParserObject* self, PyObject* args)
2360{
2361 /* feed data to parser */
2362
2363 char* data;
2364 int data_len;
2365 if (!PyArg_ParseTuple(args, "s#:feed", &data, &data_len))
2366 return NULL;
2367
2368 return expat_parse(self, data, data_len, 0);
2369}
2370
2371static PyObject*
2372xmlparser_parse(XMLParserObject* self, PyObject* args)
2373{
2374 /* (internal) parse until end of input stream */
2375
2376 PyObject* reader;
2377 PyObject* buffer;
2378 PyObject* res;
2379
2380 PyObject* fileobj;
2381 if (!PyArg_ParseTuple(args, "O:_parse", &fileobj))
2382 return NULL;
2383
2384 reader = PyObject_GetAttrString(fileobj, "read");
2385 if (!reader)
2386 return NULL;
2387
2388 /* read from open file object */
2389 for (;;) {
2390
2391 buffer = PyObject_CallFunction(reader, "i", 64*1024);
2392
2393 if (!buffer) {
2394 /* read failed (e.g. due to KeyboardInterrupt) */
2395 Py_DECREF(reader);
2396 return NULL;
2397 }
2398
2399 if (!PyString_CheckExact(buffer) || PyString_GET_SIZE(buffer) == 0) {
2400 Py_DECREF(buffer);
2401 break;
2402 }
2403
2404 res = expat_parse(
2405 self, PyString_AS_STRING(buffer), PyString_GET_SIZE(buffer), 0
2406 );
2407
2408 Py_DECREF(buffer);
2409
2410 if (!res) {
2411 Py_DECREF(reader);
2412 return NULL;
2413 }
2414 Py_DECREF(res);
2415
2416 }
2417
2418 Py_DECREF(reader);
2419
2420 res = expat_parse(self, "", 0, 1);
2421
2422 if (res && TreeBuilder_CheckExact(self->target)) {
2423 Py_DECREF(res);
2424 return treebuilder_done((TreeBuilderObject*) self->target);
2425 }
2426
2427 return res;
2428}
2429
2430static PyObject*
2431xmlparser_setevents(XMLParserObject* self, PyObject* args)
2432{
2433 /* activate element event reporting */
2434
2435 int i;
2436 TreeBuilderObject* target;
2437
2438 PyObject* events; /* event collector */
2439 PyObject* event_set = Py_None;
2440 if (!PyArg_ParseTuple(args, "O!|O:_setevents", &PyList_Type, &events,
2441 &event_set))
2442 return NULL;
2443
2444 if (!TreeBuilder_CheckExact(self->target)) {
2445 PyErr_SetString(
2446 PyExc_TypeError,
2447 "event handling only supported for cElementTree.Treebuilder "
2448 "targets"
2449 );
2450 return NULL;
2451 }
2452
2453 target = (TreeBuilderObject*) self->target;
2454
2455 Py_INCREF(events);
2456 Py_XDECREF(target->events);
2457 target->events = events;
2458
2459 /* clear out existing events */
2460 Py_XDECREF(target->start_event_obj); target->start_event_obj = NULL;
2461 Py_XDECREF(target->end_event_obj); target->end_event_obj = NULL;
2462 Py_XDECREF(target->start_ns_event_obj); target->start_ns_event_obj = NULL;
2463 Py_XDECREF(target->end_ns_event_obj); target->end_ns_event_obj = NULL;
2464
2465 if (event_set == Py_None) {
2466 /* default is "end" only */
2467 target->end_event_obj = PyString_FromString("end");
2468 Py_RETURN_NONE;
2469 }
2470
2471 if (!PyTuple_Check(event_set)) /* FIXME: handle arbitrary sequences */
2472 goto error;
2473
2474 for (i = 0; i < PyTuple_GET_SIZE(event_set); i++) {
2475 PyObject* item = PyTuple_GET_ITEM(event_set, i);
2476 char* event;
2477 if (!PyString_Check(item))
2478 goto error;
2479 event = PyString_AS_STRING(item);
2480 if (strcmp(event, "start") == 0) {
2481 Py_INCREF(item);
2482 target->start_event_obj = item;
2483 } else if (strcmp(event, "end") == 0) {
2484 Py_INCREF(item);
2485 Py_XDECREF(target->end_event_obj);
2486 target->end_event_obj = item;
2487 } else if (strcmp(event, "start-ns") == 0) {
2488 Py_INCREF(item);
2489 Py_XDECREF(target->start_ns_event_obj);
2490 target->start_ns_event_obj = item;
2491 EXPAT(SetNamespaceDeclHandler)(
2492 self->parser,
2493 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2494 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2495 );
2496 } else if (strcmp(event, "end-ns") == 0) {
2497 Py_INCREF(item);
2498 Py_XDECREF(target->end_ns_event_obj);
2499 target->end_ns_event_obj = item;
2500 EXPAT(SetNamespaceDeclHandler)(
2501 self->parser,
2502 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2503 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2504 );
2505 } else {
2506 PyErr_Format(
2507 PyExc_ValueError,
2508 "unknown event '%s'", event
2509 );
2510 return NULL;
2511 }
2512 }
2513
2514 Py_RETURN_NONE;
2515
2516 error:
2517 PyErr_SetString(
2518 PyExc_TypeError,
2519 "invalid event tuple"
2520 );
2521 return NULL;
2522}
2523
2524static PyMethodDef xmlparser_methods[] = {
2525 {"feed", (PyCFunction) xmlparser_feed, METH_VARARGS},
2526 {"close", (PyCFunction) xmlparser_close, METH_VARARGS},
2527 {"_parse", (PyCFunction) xmlparser_parse, METH_VARARGS},
2528 {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
2529 {NULL, NULL}
2530};
2531
2532static PyObject*
2533xmlparser_getattr(XMLParserObject* self, char* name)
2534{
2535 PyObject* res;
2536
2537 res = Py_FindMethod(xmlparser_methods, (PyObject*) self, name);
2538 if (res)
2539 return res;
2540
2541 PyErr_Clear();
2542
2543 if (strcmp(name, "entity") == 0)
2544 res = self->entity;
2545 else if (strcmp(name, "target") == 0)
2546 res = self->target;
2547 else if (strcmp(name, "version") == 0) {
2548 char buffer[100];
2549 sprintf(buffer, "Expat %d.%d.%d", XML_MAJOR_VERSION,
2550 XML_MINOR_VERSION, XML_MICRO_VERSION);
2551 return PyString_FromString(buffer);
2552 } else {
2553 PyErr_SetString(PyExc_AttributeError, name);
2554 return NULL;
2555 }
2556
2557 Py_INCREF(res);
2558 return res;
2559}
2560
2561statichere PyTypeObject XMLParser_Type = {
2562 PyObject_HEAD_INIT(NULL)
2563 0, "XMLParser", sizeof(XMLParserObject), 0,
2564 /* methods */
2565 (destructor)xmlparser_dealloc, /* tp_dealloc */
2566 0, /* tp_print */
2567 (getattrfunc)xmlparser_getattr, /* tp_getattr */
2568};
2569
2570#endif
2571
2572/* ==================================================================== */
2573/* python module interface */
2574
2575static PyMethodDef _functions[] = {
2576 {"Element", (PyCFunction) element, METH_VARARGS|METH_KEYWORDS},
2577 {"SubElement", (PyCFunction) subelement, METH_VARARGS|METH_KEYWORDS},
2578 {"TreeBuilder", (PyCFunction) treebuilder, METH_VARARGS},
2579#if defined(USE_EXPAT)
2580 {"XMLParser", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
2581 {"XMLTreeBuilder", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
2582#endif
2583 {NULL, NULL}
2584};
2585
2586DL_EXPORT(void)
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002587init_elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002588{
2589 PyObject* m;
2590 PyObject* g;
2591 char* bootstrap;
2592#if defined(USE_PYEXPAT_CAPI)
2593 struct PyExpat_CAPI* capi;
2594#endif
2595
2596 /* Patch object type */
2597 Element_Type.ob_type = TreeBuilder_Type.ob_type = &PyType_Type;
2598#if defined(USE_EXPAT)
2599 XMLParser_Type.ob_type = &PyType_Type;
2600#endif
2601
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002602 m = Py_InitModule("_elementtree", _functions);
Neal Norwitz1ac754f2006-01-19 06:09:39 +00002603 if (m == NULL)
2604 return;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002605
2606 /* python glue code */
2607
2608 g = PyDict_New();
Neal Norwitz02876df2006-02-07 06:58:52 +00002609 if (g == NULL)
2610 return;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002611
2612 PyDict_SetItemString(g, "__builtins__", PyEval_GetBuiltins());
2613
2614 bootstrap = (
2615
2616#if (PY_VERSION_HEX >= 0x02020000 && PY_VERSION_HEX < 0x02030000)
2617 "from __future__ import generators\n" /* enable yield under 2.2 */
2618#endif
2619
2620 "from copy import copy, deepcopy\n"
2621
2622 "try:\n"
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002623 " from xml.etree import ElementTree\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002624 "except ImportError:\n"
2625 " import ElementTree\n"
2626 "ET = ElementTree\n"
2627 "del ElementTree\n"
2628
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002629 "import _elementtree as cElementTree\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002630
2631 "try:\n" /* check if copy works as is */
2632 " copy(cElementTree.Element('x'))\n"
2633 "except:\n"
2634 " def copyelement(elem):\n"
2635 " return elem\n"
2636
2637 "def Comment(text=None):\n" /* public */
2638 " element = cElementTree.Element(ET.Comment)\n"
2639 " element.text = text\n"
2640 " return element\n"
2641 "cElementTree.Comment = Comment\n"
2642
2643 "class ElementTree(ET.ElementTree):\n" /* public */
2644 " def parse(self, source, parser=None):\n"
2645 " if not hasattr(source, 'read'):\n"
2646 " source = open(source, 'rb')\n"
2647 " if parser is not None:\n"
2648 " while 1:\n"
2649 " data = source.read(65536)\n"
2650 " if not data:\n"
2651 " break\n"
2652 " parser.feed(data)\n"
2653 " self._root = parser.close()\n"
2654 " else:\n"
2655 " parser = cElementTree.XMLParser()\n"
2656 " self._root = parser._parse(source)\n"
2657 " return self._root\n"
2658 "cElementTree.ElementTree = ElementTree\n"
2659
2660 "def getiterator(node, tag=None):\n" /* helper */
2661 " if tag == '*':\n"
2662 " tag = None\n"
2663#if (PY_VERSION_HEX < 0x02020000)
2664 " nodes = []\n" /* 2.1 doesn't have yield */
2665 " if tag is None or node.tag == tag:\n"
2666 " nodes.append(node)\n"
2667 " for node in node:\n"
2668 " nodes.extend(getiterator(node, tag))\n"
2669 " return nodes\n"
2670#else
2671 " if tag is None or node.tag == tag:\n"
2672 " yield node\n"
2673 " for node in node:\n"
2674 " for node in getiterator(node, tag):\n"
2675 " yield node\n"
2676#endif
2677
2678 "def parse(source, parser=None):\n" /* public */
2679 " tree = ElementTree()\n"
2680 " tree.parse(source, parser)\n"
2681 " return tree\n"
2682 "cElementTree.parse = parse\n"
2683
2684#if (PY_VERSION_HEX < 0x02020000)
2685 "if hasattr(ET, 'iterparse'):\n"
2686 " cElementTree.iterparse = ET.iterparse\n" /* delegate on 2.1 */
2687#else
2688 "class iterparse(object):\n"
2689 " root = None\n"
2690 " def __init__(self, file, events=None):\n"
2691 " if not hasattr(file, 'read'):\n"
2692 " file = open(file, 'rb')\n"
2693 " self._file = file\n"
2694 " self._events = events\n"
2695 " def __iter__(self):\n"
2696 " events = []\n"
2697 " b = cElementTree.TreeBuilder()\n"
2698 " p = cElementTree.XMLParser(b)\n"
2699 " p._setevents(events, self._events)\n"
2700 " while 1:\n"
2701 " data = self._file.read(16384)\n"
2702 " if not data:\n"
2703 " break\n"
2704 " p.feed(data)\n"
2705 " for event in events:\n"
2706 " yield event\n"
2707 " del events[:]\n"
2708 " root = p.close()\n"
2709 " for event in events:\n"
2710 " yield event\n"
2711 " self.root = root\n"
2712 "cElementTree.iterparse = iterparse\n"
2713#endif
2714
2715 "def PI(target, text=None):\n" /* public */
2716 " element = cElementTree.Element(ET.ProcessingInstruction)\n"
2717 " element.text = target\n"
2718 " if text:\n"
2719 " element.text = element.text + ' ' + text\n"
2720 " return element\n"
2721
2722 " elem = cElementTree.Element(ET.PI)\n"
2723 " elem.text = text\n"
2724 " return elem\n"
2725 "cElementTree.PI = cElementTree.ProcessingInstruction = PI\n"
2726
2727 "def XML(text):\n" /* public */
2728 " parser = cElementTree.XMLParser()\n"
2729 " parser.feed(text)\n"
2730 " return parser.close()\n"
2731 "cElementTree.XML = cElementTree.fromstring = XML\n"
2732
2733 "def XMLID(text):\n" /* public */
2734 " tree = XML(text)\n"
2735 " ids = {}\n"
2736 " for elem in tree.getiterator():\n"
2737 " id = elem.get('id')\n"
2738 " if id:\n"
2739 " ids[id] = elem\n"
2740 " return tree, ids\n"
2741 "cElementTree.XMLID = XMLID\n"
2742
2743 "cElementTree.dump = ET.dump\n"
2744 "cElementTree.ElementPath = ElementPath = ET.ElementPath\n"
2745 "cElementTree.iselement = ET.iselement\n"
2746 "cElementTree.QName = ET.QName\n"
2747 "cElementTree.tostring = ET.tostring\n"
2748 "cElementTree.VERSION = '" VERSION "'\n"
2749 "cElementTree.__version__ = '" VERSION "'\n"
2750 "cElementTree.XMLParserError = SyntaxError\n"
2751
2752 );
2753
2754 PyRun_String(bootstrap, Py_file_input, g, NULL);
2755
2756 elementpath_obj = PyDict_GetItemString(g, "ElementPath");
2757
2758 elementtree_copyelement_obj = PyDict_GetItemString(g, "copyelement");
2759 if (elementtree_copyelement_obj) {
2760 /* reduce hack needed; enable reduce method */
2761 PyMethodDef* mp;
2762 for (mp = element_methods; mp->ml_name; mp++)
2763 if (mp->ml_meth == (PyCFunction) element_reduce) {
2764 mp->ml_name = "__reduce__";
2765 break;
2766 }
2767 } else
2768 PyErr_Clear();
2769 elementtree_deepcopy_obj = PyDict_GetItemString(g, "deepcopy");
2770 elementtree_getiterator_obj = PyDict_GetItemString(g, "getiterator");
2771
2772#if defined(USE_PYEXPAT_CAPI)
2773 /* link against pyexpat, if possible */
2774 capi = PyCObject_Import("pyexpat", "expat_CAPI");
2775 if (capi &&
2776 strcmp(capi->magic, PyExpat_CAPI_MAGIC) == 0 &&
2777 capi->size <= sizeof(*expat_capi) &&
2778 capi->MAJOR_VERSION == XML_MAJOR_VERSION &&
2779 capi->MINOR_VERSION == XML_MINOR_VERSION &&
2780 capi->MICRO_VERSION == XML_MICRO_VERSION)
2781 expat_capi = capi;
2782 else
2783 expat_capi = NULL;
2784#endif
2785
2786}