blob: 514ed72a4ccf1f8b73f3677d69b2fd24b3f70ba5 [file] [log] [blame]
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001/*
2 * ElementTree
3 * $Id: /work/modules/celementtree/cElementTree.c 1128 2005-12-16T21:57:13.668520Z Fredrik $
4 *
5 * elementtree accelerator
6 *
7 * History:
8 * 1999-06-20 fl created (as part of sgmlop)
9 * 2001-05-29 fl effdom edition
10 * 2001-06-05 fl backported to unix; fixed bogus free in clear
11 * 2001-07-10 fl added findall helper
12 * 2003-02-27 fl elementtree edition (alpha)
13 * 2004-06-03 fl updates for elementtree 1.2
14 * 2005-01-05 fl added universal name cache, Element/SubElement factories
15 * 2005-01-06 fl moved python helpers into C module; removed 1.5.2 support
16 * 2005-01-07 fl added 2.1 support; work around broken __copy__ in 2.3
17 * 2005-01-08 fl added makeelement method; fixed path support
18 * 2005-01-10 fl optimized memory usage
19 * 2005-01-11 fl first public release (cElementTree 0.8)
20 * 2005-01-12 fl split element object into base and extras
21 * 2005-01-13 fl use tagged pointers for tail/text (cElementTree 0.9)
22 * 2005-01-17 fl added treebuilder close method
23 * 2005-01-17 fl fixed crash in getchildren
24 * 2005-01-18 fl removed observer api, added iterparse (cElementTree 0.9.3)
25 * 2005-01-23 fl revised iterparse api; added namespace event support (0.9.8)
26 * 2005-01-26 fl added VERSION module property (cElementTree 1.0)
27 * 2005-01-28 fl added remove method (1.0.1)
28 * 2005-03-01 fl added iselement function; fixed makeelement aliasing (1.0.2)
29 * 2005-03-13 fl export Comment and ProcessingInstruction/PI helpers
30 * 2005-03-26 fl added Comment and PI support to XMLParser
31 * 2005-03-27 fl event optimizations; complain about bogus events
32 * 2005-08-08 fl fixed read error handling in parse
33 * 2005-08-11 fl added runtime test for copy workaround (1.0.3)
34 * 2005-12-13 fl added expat_capi support (for xml.etree) (1.0.4)
35 * 2005-12-16 fl added support for non-standard encodings
36 *
37 * Copyright (c) 1999-2005 by Secret Labs AB. All rights reserved.
38 * Copyright (c) 1999-2005 by Fredrik Lundh.
39 *
40 * info@pythonware.com
41 * http://www.pythonware.com
42 */
43
Fredrik Lundh6d52b552005-12-16 22:06:43 +000044/* Licensed to PSF under a Contributor Agreement. */
45/* See http://www.python.org/2.4/license for licensing details. */
46
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000047#include "Python.h"
48
49#define VERSION "1.0.5"
50
51/* -------------------------------------------------------------------- */
52/* configuration */
53
54/* Leave defined to include the expat-based XMLParser type */
55#define USE_EXPAT
56
57/* Define to to all expat calls via pyexpat's embedded expat library */
58/* #define USE_PYEXPAT_CAPI */
59
60/* An element can hold this many children without extra memory
61 allocations. */
62#define STATIC_CHILDREN 4
63
64/* For best performance, chose a value so that 80-90% of all nodes
65 have no more than the given number of children. Set this to zero
66 to minimize the size of the element structure itself (this only
67 helps if you have lots of leaf nodes with attributes). */
68
69/* Also note that pymalloc always allocates blocks in multiples of
70 eight bytes. For the current version of cElementTree, this means
71 that the number of children should be an even number, at least on
72 32-bit platforms. */
73
74/* -------------------------------------------------------------------- */
75
76#if 0
77static int memory = 0;
78#define ALLOC(size, comment)\
79do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
80#define RELEASE(size, comment)\
81do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
82#else
83#define ALLOC(size, comment)
84#define RELEASE(size, comment)
85#endif
86
87/* compiler tweaks */
88#if defined(_MSC_VER)
89#define LOCAL(type) static __inline type __fastcall
90#else
91#define LOCAL(type) static type
92#endif
93
94/* compatibility macros */
Martin v. Löwis18e16552006-02-15 17:27:45 +000095#if (PY_VERSION_HEX < 0x02050000)
96typedef int Py_ssize_t;
97#endif
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000098#if (PY_VERSION_HEX < 0x02040000)
99#define PyDict_CheckExact PyDict_Check
100#if (PY_VERSION_HEX < 0x02020000)
101#define PyList_CheckExact PyList_Check
102#define PyString_CheckExact PyString_Check
103#if (PY_VERSION_HEX >= 0x01060000)
104#define Py_USING_UNICODE /* always enabled for 2.0 and 2.1 */
105#endif
106#endif
107#endif
108
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000109#if !defined(Py_RETURN_NONE)
110#define Py_RETURN_NONE return Py_INCREF(Py_None), Py_None
111#endif
112
113/* macros used to store 'join' flags in string object pointers. note
114 that all use of text and tail as object pointers must be wrapped in
115 JOIN_OBJ. see comments in the ElementObject definition for more
116 info. */
117#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
118#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
119#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~1))
120
121/* glue functions (see the init function for details) */
122static PyObject* elementtree_copyelement_obj;
123static PyObject* elementtree_deepcopy_obj;
124static PyObject* elementtree_getiterator_obj;
125static PyObject* elementpath_obj;
126
127/* helpers */
128
129LOCAL(PyObject*)
130deepcopy(PyObject* object, PyObject* memo)
131{
132 /* do a deep copy of the given object */
133
134 PyObject* args;
135 PyObject* result;
136
137 if (!elementtree_deepcopy_obj) {
138 PyErr_SetString(
139 PyExc_RuntimeError,
140 "deepcopy helper not found"
141 );
142 return NULL;
143 }
144
145 args = PyTuple_New(2);
146 Py_INCREF(object); PyTuple_SET_ITEM(args, 0, (PyObject*) object);
147 Py_INCREF(memo); PyTuple_SET_ITEM(args, 1, (PyObject*) memo);
148
149 result = PyObject_CallObject(elementtree_deepcopy_obj, args);
150
151 Py_DECREF(args);
152
153 return result;
154}
155
156LOCAL(PyObject*)
157list_join(PyObject* list)
158{
159 /* join list elements (destroying the list in the process) */
160
161 PyObject* joiner;
162 PyObject* function;
163 PyObject* args;
164 PyObject* result;
165
166 switch (PyList_GET_SIZE(list)) {
167 case 0:
168 Py_DECREF(list);
169 return PyString_FromString("");
170 case 1:
171 result = PyList_GET_ITEM(list, 0);
172 Py_INCREF(result);
173 Py_DECREF(list);
174 return result;
175 }
176
177 /* two or more elements: slice out a suitable separator from the
178 first member, and use that to join the entire list */
179
180 joiner = PySequence_GetSlice(PyList_GET_ITEM(list, 0), 0, 0);
181 if (!joiner)
182 return NULL;
183
184 function = PyObject_GetAttrString(joiner, "join");
185 if (!function) {
186 Py_DECREF(joiner);
187 return NULL;
188 }
189
190 args = PyTuple_New(1);
191 PyTuple_SET_ITEM(args, 0, list);
192
193 result = PyObject_CallObject(function, args);
194
195 Py_DECREF(args); /* also removes list */
196 Py_DECREF(function);
197 Py_DECREF(joiner);
198
199 return result;
200}
201
202#if (PY_VERSION_HEX < 0x02020000)
203LOCAL(int)
204PyDict_Update(PyObject* dict, PyObject* other)
205{
206 /* PyDict_Update emulation for 2.1 and earlier */
207
208 PyObject* res;
209
210 res = PyObject_CallMethod(dict, "update", "O", other);
211 if (!res)
212 return -1;
213
214 Py_DECREF(res);
215 return 0;
216}
217#endif
218
219/* -------------------------------------------------------------------- */
220/* the element type */
221
222typedef struct {
223
224 /* attributes (a dictionary object), or None if no attributes */
225 PyObject* attrib;
226
227 /* child elements */
228 int length; /* actual number of items */
229 int allocated; /* allocated items */
230
231 /* this either points to _children or to a malloced buffer */
232 PyObject* *children;
233
234 PyObject* _children[STATIC_CHILDREN];
235
236} ElementObjectExtra;
237
238typedef struct {
239 PyObject_HEAD
240
241 /* element tag (a string). */
242 PyObject* tag;
243
244 /* text before first child. note that this is a tagged pointer;
245 use JOIN_OBJ to get the object pointer. the join flag is used
246 to distinguish lists created by the tree builder from lists
247 assigned to the attribute by application code; the former
248 should be joined before being returned to the user, the latter
249 should be left intact. */
250 PyObject* text;
251
252 /* text after this element, in parent. note that this is a tagged
253 pointer; use JOIN_OBJ to get the object pointer. */
254 PyObject* tail;
255
256 ElementObjectExtra* extra;
257
258} ElementObject;
259
260staticforward PyTypeObject Element_Type;
261
262#define Element_CheckExact(op) ((op)->ob_type == &Element_Type)
263
264/* -------------------------------------------------------------------- */
265/* element constructor and destructor */
266
267LOCAL(int)
268element_new_extra(ElementObject* self, PyObject* attrib)
269{
270 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
271 if (!self->extra)
272 return -1;
273
274 if (!attrib)
275 attrib = Py_None;
276
277 Py_INCREF(attrib);
278 self->extra->attrib = attrib;
279
280 self->extra->length = 0;
281 self->extra->allocated = STATIC_CHILDREN;
282 self->extra->children = self->extra->_children;
283
284 return 0;
285}
286
287LOCAL(void)
288element_dealloc_extra(ElementObject* self)
289{
290 int i;
291
292 Py_DECREF(self->extra->attrib);
293
294 for (i = 0; i < self->extra->length; i++)
295 Py_DECREF(self->extra->children[i]);
296
297 if (self->extra->children != self->extra->_children)
298 PyObject_Free(self->extra->children);
299
300 PyObject_Free(self->extra);
301}
302
303LOCAL(PyObject*)
304element_new(PyObject* tag, PyObject* attrib)
305{
306 ElementObject* self;
307
308 self = PyObject_New(ElementObject, &Element_Type);
309 if (self == NULL)
310 return NULL;
311
312 /* use None for empty dictionaries */
313 if (PyDict_CheckExact(attrib) && !PyDict_Size(attrib))
314 attrib = Py_None;
315
316 self->extra = NULL;
317
318 if (attrib != Py_None) {
319
320 if (element_new_extra(self, attrib) < 0)
321 return NULL;
322
323 self->extra->length = 0;
324 self->extra->allocated = STATIC_CHILDREN;
325 self->extra->children = self->extra->_children;
326
327 }
328
329 Py_INCREF(tag);
330 self->tag = tag;
331
332 Py_INCREF(Py_None);
333 self->text = Py_None;
334
335 Py_INCREF(Py_None);
336 self->tail = Py_None;
337
338 ALLOC(sizeof(ElementObject), "create element");
339
340 return (PyObject*) self;
341}
342
343LOCAL(int)
344element_resize(ElementObject* self, int extra)
345{
346 int size;
347 PyObject* *children;
348
349 /* make sure self->children can hold the given number of extra
350 elements. set an exception and return -1 if allocation failed */
351
352 if (!self->extra)
353 element_new_extra(self, NULL);
354
355 size = self->extra->length + extra;
356
357 if (size > self->extra->allocated) {
358 /* use Python 2.4's list growth strategy */
359 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
360 if (self->extra->children != self->extra->_children) {
361 children = PyObject_Realloc(self->extra->children,
362 size * sizeof(PyObject*));
363 if (!children)
364 goto nomemory;
365 } else {
366 children = PyObject_Malloc(size * sizeof(PyObject*));
367 if (!children)
368 goto nomemory;
369 /* copy existing children from static area to malloc buffer */
370 memcpy(children, self->extra->children,
371 self->extra->length * sizeof(PyObject*));
372 }
373 self->extra->children = children;
374 self->extra->allocated = size;
375 }
376
377 return 0;
378
379 nomemory:
380 PyErr_NoMemory();
381 return -1;
382}
383
384LOCAL(int)
385element_add_subelement(ElementObject* self, PyObject* element)
386{
387 /* add a child element to a parent */
388
389 if (element_resize(self, 1) < 0)
390 return -1;
391
392 Py_INCREF(element);
393 self->extra->children[self->extra->length] = element;
394
395 self->extra->length++;
396
397 return 0;
398}
399
400LOCAL(PyObject*)
401element_get_attrib(ElementObject* self)
402{
403 /* return borrowed reference to attrib dictionary */
404 /* note: this function assumes that the extra section exists */
405
406 PyObject* res = self->extra->attrib;
407
408 if (res == Py_None) {
409 /* create missing dictionary */
410 res = PyDict_New();
411 if (!res)
412 return NULL;
413 self->extra->attrib = res;
414 }
415
416 return res;
417}
418
419LOCAL(PyObject*)
420element_get_text(ElementObject* self)
421{
422 /* return borrowed reference to text attribute */
423
424 PyObject* res = self->text;
425
426 if (JOIN_GET(res)) {
427 res = JOIN_OBJ(res);
428 if (PyList_CheckExact(res)) {
429 res = list_join(res);
430 if (!res)
431 return NULL;
432 self->text = res;
433 }
434 }
435
436 return res;
437}
438
439LOCAL(PyObject*)
440element_get_tail(ElementObject* self)
441{
442 /* return borrowed reference to text attribute */
443
444 PyObject* res = self->tail;
445
446 if (JOIN_GET(res)) {
447 res = JOIN_OBJ(res);
448 if (PyList_CheckExact(res)) {
449 res = list_join(res);
450 if (!res)
451 return NULL;
452 self->tail = res;
453 }
454 }
455
456 return res;
457}
458
459static PyObject*
460element(PyObject* self, PyObject* args, PyObject* kw)
461{
462 PyObject* elem;
463
464 PyObject* tag;
465 PyObject* attrib = NULL;
466 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag,
467 &PyDict_Type, &attrib))
468 return NULL;
469
470 if (attrib || kw) {
471 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
472 if (!attrib)
473 return NULL;
474 if (kw)
475 PyDict_Update(attrib, kw);
476 } else {
477 Py_INCREF(Py_None);
478 attrib = Py_None;
479 }
480
481 elem = element_new(tag, attrib);
482
483 Py_DECREF(attrib);
484
485 return elem;
486}
487
488static PyObject*
489subelement(PyObject* self, PyObject* args, PyObject* kw)
490{
491 PyObject* elem;
492
493 ElementObject* parent;
494 PyObject* tag;
495 PyObject* attrib = NULL;
496 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
497 &Element_Type, &parent, &tag,
498 &PyDict_Type, &attrib))
499 return NULL;
500
501 if (attrib || kw) {
502 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
503 if (!attrib)
504 return NULL;
505 if (kw)
506 PyDict_Update(attrib, kw);
507 } else {
508 Py_INCREF(Py_None);
509 attrib = Py_None;
510 }
511
512 elem = element_new(tag, attrib);
513
514 Py_DECREF(attrib);
515
516 if (element_add_subelement(parent, elem) < 0)
517 return NULL;
518
519 return elem;
520}
521
522static void
523element_dealloc(ElementObject* self)
524{
525 if (self->extra)
526 element_dealloc_extra(self);
527
528 /* discard attributes */
529 Py_DECREF(self->tag);
530 Py_DECREF(JOIN_OBJ(self->text));
531 Py_DECREF(JOIN_OBJ(self->tail));
532
533 RELEASE(sizeof(ElementObject), "destroy element");
534
535 PyObject_Del(self);
536}
537
538/* -------------------------------------------------------------------- */
539/* methods (in alphabetical order) */
540
541static PyObject*
542element_append(ElementObject* self, PyObject* args)
543{
544 PyObject* element;
545 if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element))
546 return NULL;
547
548 if (element_add_subelement(self, element) < 0)
549 return NULL;
550
551 Py_RETURN_NONE;
552}
553
554static PyObject*
555element_clear(ElementObject* self, PyObject* args)
556{
557 if (!PyArg_ParseTuple(args, ":clear"))
558 return NULL;
559
560 if (self->extra) {
561 element_dealloc_extra(self);
562 self->extra = NULL;
563 }
564
565 Py_INCREF(Py_None);
566 Py_DECREF(JOIN_OBJ(self->text));
567 self->text = Py_None;
568
569 Py_INCREF(Py_None);
570 Py_DECREF(JOIN_OBJ(self->tail));
571 self->tail = Py_None;
572
573 Py_RETURN_NONE;
574}
575
576static PyObject*
577element_copy(ElementObject* self, PyObject* args)
578{
579 int i;
580 ElementObject* element;
581
582 if (!PyArg_ParseTuple(args, ":__copy__"))
583 return NULL;
584
585 element = (ElementObject*) element_new(
586 self->tag, (self->extra) ? self->extra->attrib : Py_None
587 );
588 if (!element)
589 return NULL;
590
591 Py_DECREF(JOIN_OBJ(element->text));
592 element->text = self->text;
593 Py_INCREF(JOIN_OBJ(element->text));
594
595 Py_DECREF(JOIN_OBJ(element->tail));
596 element->tail = self->tail;
597 Py_INCREF(JOIN_OBJ(element->tail));
598
599 if (self->extra) {
600
601 if (element_resize(element, self->extra->length) < 0)
602 return NULL;
603
604 for (i = 0; i < self->extra->length; i++) {
605 Py_INCREF(self->extra->children[i]);
606 element->extra->children[i] = self->extra->children[i];
607 }
608
609 element->extra->length = self->extra->length;
610
611 }
612
613 return (PyObject*) element;
614}
615
616static PyObject*
617element_deepcopy(ElementObject* self, PyObject* args)
618{
619 int i;
620 ElementObject* element;
621 PyObject* tag;
622 PyObject* attrib;
623 PyObject* text;
624 PyObject* tail;
625 PyObject* id;
626
627 PyObject* memo;
628 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
629 return NULL;
630
631 tag = deepcopy(self->tag, memo);
632 if (!tag)
633 return NULL;
634
635 if (self->extra) {
636 attrib = deepcopy(self->extra->attrib, memo);
637 if (!attrib) {
638 Py_DECREF(tag);
639 return NULL;
640 }
641 } else {
642 Py_INCREF(Py_None);
643 attrib = Py_None;
644 }
645
646 element = (ElementObject*) element_new(tag, attrib);
647
648 Py_DECREF(tag);
649 Py_DECREF(attrib);
650
651 if (!element)
652 return NULL;
653
654 text = deepcopy(JOIN_OBJ(self->text), memo);
655 if (!text)
656 goto error;
657 Py_DECREF(element->text);
658 element->text = JOIN_SET(text, JOIN_GET(self->text));
659
660 tail = deepcopy(JOIN_OBJ(self->tail), memo);
661 if (!tail)
662 goto error;
663 Py_DECREF(element->tail);
664 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
665
666 if (self->extra) {
667
668 if (element_resize(element, self->extra->length) < 0)
669 goto error;
670
671 for (i = 0; i < self->extra->length; i++) {
672 PyObject* child = deepcopy(self->extra->children[i], memo);
673 if (!child) {
674 element->extra->length = i;
675 goto error;
676 }
677 element->extra->children[i] = child;
678 }
679
680 element->extra->length = self->extra->length;
681
682 }
683
684 /* add object to memo dictionary (so deepcopy won't visit it again) */
685 id = PyInt_FromLong((Py_uintptr_t) self);
686
687 i = PyDict_SetItem(memo, id, (PyObject*) element);
688
689 Py_DECREF(id);
690
691 if (i < 0)
692 goto error;
693
694 return (PyObject*) element;
695
696 error:
697 Py_DECREF(element);
698 return NULL;
699}
700
701LOCAL(int)
702checkpath(PyObject* tag)
703{
704 int i, check = 1;
705
706 /* check if a tag contains an xpath character */
707
708#define PATHCHAR(ch) (ch == '/' || ch == '*' || ch == '[' || ch == '@')
709
710#if defined(Py_USING_UNICODE)
711 if (PyUnicode_Check(tag)) {
712 Py_UNICODE *p = PyUnicode_AS_UNICODE(tag);
713 for (i = 0; i < PyUnicode_GET_SIZE(tag); i++) {
714 if (p[i] == '{')
715 check = 0;
716 else if (p[i] == '}')
717 check = 1;
718 else if (check && PATHCHAR(p[i]))
719 return 1;
720 }
721 return 0;
722 }
723#endif
724 if (PyString_Check(tag)) {
725 char *p = PyString_AS_STRING(tag);
726 for (i = 0; i < PyString_GET_SIZE(tag); i++) {
727 if (p[i] == '{')
728 check = 0;
729 else if (p[i] == '}')
730 check = 1;
731 else if (check && PATHCHAR(p[i]))
732 return 1;
733 }
734 return 0;
735 }
736
737 return 1; /* unknown type; might be path expression */
738}
739
740static PyObject*
741element_find(ElementObject* self, PyObject* args)
742{
743 int i;
744
745 PyObject* tag;
746 if (!PyArg_ParseTuple(args, "O:find", &tag))
747 return NULL;
748
749 if (checkpath(tag))
750 return PyObject_CallMethod(
751 elementpath_obj, "find", "OO", self, tag
752 );
753
754 if (!self->extra)
755 Py_RETURN_NONE;
756
757 for (i = 0; i < self->extra->length; i++) {
758 PyObject* item = self->extra->children[i];
759 if (Element_CheckExact(item) &&
760 PyObject_Compare(((ElementObject*)item)->tag, tag) == 0) {
761 Py_INCREF(item);
762 return item;
763 }
764 }
765
766 Py_RETURN_NONE;
767}
768
769static PyObject*
770element_findtext(ElementObject* self, PyObject* args)
771{
772 int i;
773
774 PyObject* tag;
775 PyObject* default_value = Py_None;
776 if (!PyArg_ParseTuple(args, "O|O:findtext", &tag, &default_value))
777 return NULL;
778
779 if (checkpath(tag))
780 return PyObject_CallMethod(
781 elementpath_obj, "findtext", "OOO", self, tag, default_value
782 );
783
784 if (!self->extra) {
785 Py_INCREF(default_value);
786 return default_value;
787 }
788
789 for (i = 0; i < self->extra->length; i++) {
790 ElementObject* item = (ElementObject*) self->extra->children[i];
791 if (Element_CheckExact(item) && !PyObject_Compare(item->tag, tag)) {
792 PyObject* text = element_get_text(item);
793 if (text == Py_None)
794 return PyString_FromString("");
795 Py_INCREF(text);
796 return text;
797 }
798 }
799
800 Py_INCREF(default_value);
801 return default_value;
802}
803
804static PyObject*
805element_findall(ElementObject* self, PyObject* args)
806{
807 int i;
808 PyObject* out;
809
810 PyObject* tag;
811 if (!PyArg_ParseTuple(args, "O:findall", &tag))
812 return NULL;
813
814 if (checkpath(tag))
815 return PyObject_CallMethod(
816 elementpath_obj, "findall", "OO", self, tag
817 );
818
819 out = PyList_New(0);
820 if (!out)
821 return NULL;
822
823 if (!self->extra)
824 return out;
825
826 for (i = 0; i < self->extra->length; i++) {
827 PyObject* item = self->extra->children[i];
828 if (Element_CheckExact(item) &&
829 PyObject_Compare(((ElementObject*)item)->tag, tag) == 0) {
830 if (PyList_Append(out, item) < 0) {
831 Py_DECREF(out);
832 return NULL;
833 }
834 }
835 }
836
837 return out;
838}
839
840static PyObject*
841element_get(ElementObject* self, PyObject* args)
842{
843 PyObject* value;
844
845 PyObject* key;
846 PyObject* default_value = Py_None;
847 if (!PyArg_ParseTuple(args, "O|O:get", &key, &default_value))
848 return NULL;
849
850 if (!self->extra || self->extra->attrib == Py_None)
851 value = default_value;
852 else {
853 value = PyDict_GetItem(self->extra->attrib, key);
854 if (!value)
855 value = default_value;
856 }
857
858 Py_INCREF(value);
859 return value;
860}
861
862static PyObject*
863element_getchildren(ElementObject* self, PyObject* args)
864{
865 int i;
866 PyObject* list;
867
868 if (!PyArg_ParseTuple(args, ":getchildren"))
869 return NULL;
870
871 if (!self->extra)
872 return PyList_New(0);
873
874 list = PyList_New(self->extra->length);
875 if (!list)
876 return NULL;
877
878 for (i = 0; i < self->extra->length; i++) {
879 PyObject* item = self->extra->children[i];
880 Py_INCREF(item);
881 PyList_SET_ITEM(list, i, item);
882 }
883
884 return list;
885}
886
887static PyObject*
888element_getiterator(ElementObject* self, PyObject* args)
889{
890 PyObject* result;
891
892 PyObject* tag = Py_None;
893 if (!PyArg_ParseTuple(args, "|O:getiterator", &tag))
894 return NULL;
895
896 if (!elementtree_getiterator_obj) {
897 PyErr_SetString(
898 PyExc_RuntimeError,
899 "getiterator helper not found"
900 );
901 return NULL;
902 }
903
904 args = PyTuple_New(2);
Neal Norwitz02876df2006-02-07 06:58:52 +0000905 if (args == NULL)
906 return NULL;
907
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000908 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
909 Py_INCREF(tag); PyTuple_SET_ITEM(args, 1, (PyObject*) tag);
910
911 result = PyObject_CallObject(elementtree_getiterator_obj, args);
912
913 Py_DECREF(args);
914
915 return result;
916}
917
918static PyObject*
Martin v. Löwis18e16552006-02-15 17:27:45 +0000919element_getitem(PyObject* _self, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000920{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000921 ElementObject* self = (ElementObject*)_self;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000922 if (!self->extra || index < 0 || index >= self->extra->length) {
923 PyErr_SetString(
924 PyExc_IndexError,
925 "child index out of range"
926 );
927 return NULL;
928 }
929
930 Py_INCREF(self->extra->children[index]);
931 return self->extra->children[index];
932}
933
934static PyObject*
Martin v. Löwis18e16552006-02-15 17:27:45 +0000935element_getslice(PyObject* _self, Py_ssize_t start, Py_ssize_t end)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000936{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000937 ElementObject* self = (ElementObject*)_self;
938 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000939 PyObject* list;
940
941 if (!self->extra)
942 return PyList_New(0);
943
944 /* standard clamping */
945 if (start < 0)
946 start = 0;
947 if (end < 0)
948 end = 0;
949 if (end > self->extra->length)
950 end = self->extra->length;
951 if (start > end)
952 start = end;
953
954 list = PyList_New(end - start);
955 if (!list)
956 return NULL;
957
958 for (i = start; i < end; i++) {
959 PyObject* item = self->extra->children[i];
960 Py_INCREF(item);
961 PyList_SET_ITEM(list, i - start, item);
962 }
963
964 return list;
965}
966
967static PyObject*
968element_insert(ElementObject* self, PyObject* args)
969{
970 int i;
971
972 int index;
973 PyObject* element;
974 if (!PyArg_ParseTuple(args, "iO!:insert", &index,
975 &Element_Type, &element))
976 return NULL;
977
978 if (!self->extra)
979 element_new_extra(self, NULL);
980
981 if (index < 0)
982 index = 0;
983 if (index > self->extra->length)
984 index = self->extra->length;
985
986 if (element_resize(self, 1) < 0)
987 return NULL;
988
989 for (i = self->extra->length; i > index; i--)
990 self->extra->children[i] = self->extra->children[i-1];
991
992 Py_INCREF(element);
993 self->extra->children[index] = element;
994
995 self->extra->length++;
996
997 Py_RETURN_NONE;
998}
999
1000static PyObject*
1001element_items(ElementObject* self, PyObject* args)
1002{
1003 if (!PyArg_ParseTuple(args, ":items"))
1004 return NULL;
1005
1006 if (!self->extra || self->extra->attrib == Py_None)
1007 return PyList_New(0);
1008
1009 return PyDict_Items(self->extra->attrib);
1010}
1011
1012static PyObject*
1013element_keys(ElementObject* self, PyObject* args)
1014{
1015 if (!PyArg_ParseTuple(args, ":keys"))
1016 return NULL;
1017
1018 if (!self->extra || self->extra->attrib == Py_None)
1019 return PyList_New(0);
1020
1021 return PyDict_Keys(self->extra->attrib);
1022}
1023
Martin v. Löwis18e16552006-02-15 17:27:45 +00001024static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001025element_length(ElementObject* self)
1026{
1027 if (!self->extra)
1028 return 0;
1029
1030 return self->extra->length;
1031}
1032
1033static PyObject*
1034element_makeelement(PyObject* self, PyObject* args, PyObject* kw)
1035{
1036 PyObject* elem;
1037
1038 PyObject* tag;
1039 PyObject* attrib;
1040 if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib))
1041 return NULL;
1042
1043 attrib = PyDict_Copy(attrib);
1044 if (!attrib)
1045 return NULL;
1046
1047 elem = element_new(tag, attrib);
1048
1049 Py_DECREF(attrib);
1050
1051 return elem;
1052}
1053
1054static PyObject*
1055element_reduce(ElementObject* self, PyObject* args)
1056{
1057 if (!PyArg_ParseTuple(args, ":__reduce__"))
1058 return NULL;
1059
1060 /* Hack alert: This method is used to work around a __copy__
1061 problem on certain 2.3 and 2.4 versions. To save time and
1062 simplify the code, we create the copy in here, and use a dummy
1063 copyelement helper to trick the copy module into doing the
1064 right thing. */
1065
1066 if (!elementtree_copyelement_obj) {
1067 PyErr_SetString(
1068 PyExc_RuntimeError,
1069 "copyelement helper not found"
1070 );
1071 return NULL;
1072 }
1073
1074 return Py_BuildValue(
1075 "O(N)", elementtree_copyelement_obj, element_copy(self, args)
1076 );
1077}
1078
1079static PyObject*
1080element_remove(ElementObject* self, PyObject* args)
1081{
1082 int i;
1083
1084 PyObject* element;
1085 if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element))
1086 return NULL;
1087
1088 if (!self->extra) {
1089 /* element has no children, so raise exception */
1090 PyErr_SetString(
1091 PyExc_ValueError,
1092 "list.remove(x): x not in list"
1093 );
1094 return NULL;
1095 }
1096
1097 for (i = 0; i < self->extra->length; i++) {
1098 if (self->extra->children[i] == element)
1099 break;
1100 if (PyObject_Compare(self->extra->children[i], element) == 0)
1101 break;
1102 }
1103
1104 if (i == self->extra->length) {
1105 /* element is not in children, so raise exception */
1106 PyErr_SetString(
1107 PyExc_ValueError,
1108 "list.remove(x): x not in list"
1109 );
1110 return NULL;
1111 }
1112
1113 Py_DECREF(self->extra->children[i]);
1114
1115 self->extra->length--;
1116
1117 for (; i < self->extra->length; i++)
1118 self->extra->children[i] = self->extra->children[i+1];
1119
1120 Py_RETURN_NONE;
1121}
1122
1123static PyObject*
1124element_repr(ElementObject* self)
1125{
1126 PyObject* repr;
1127 char buffer[100];
1128
1129 repr = PyString_FromString("<Element ");
1130
1131 PyString_ConcatAndDel(&repr, PyObject_Repr(self->tag));
1132
1133 sprintf(buffer, " at %p>", self);
1134 PyString_ConcatAndDel(&repr, PyString_FromString(buffer));
1135
1136 return repr;
1137}
1138
1139static PyObject*
1140element_set(ElementObject* self, PyObject* args)
1141{
1142 PyObject* attrib;
1143
1144 PyObject* key;
1145 PyObject* value;
1146 if (!PyArg_ParseTuple(args, "OO:set", &key, &value))
1147 return NULL;
1148
1149 if (!self->extra)
1150 element_new_extra(self, NULL);
1151
1152 attrib = element_get_attrib(self);
1153 if (!attrib)
1154 return NULL;
1155
1156 if (PyDict_SetItem(attrib, key, value) < 0)
1157 return NULL;
1158
1159 Py_RETURN_NONE;
1160}
1161
1162static int
Martin v. Löwis18e16552006-02-15 17:27:45 +00001163element_setslice(PyObject* _self, Py_ssize_t start, Py_ssize_t end, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001164{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001165 ElementObject* self = (ElementObject*)_self;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001166 int i, new, old;
1167 PyObject* recycle = NULL;
1168
1169 if (!self->extra)
1170 element_new_extra(self, NULL);
1171
1172 /* standard clamping */
1173 if (start < 0)
1174 start = 0;
1175 if (end < 0)
1176 end = 0;
1177 if (end > self->extra->length)
1178 end = self->extra->length;
1179 if (start > end)
1180 start = end;
1181
1182 old = end - start;
1183
1184 if (item == NULL)
1185 new = 0;
1186 else if (PyList_CheckExact(item)) {
1187 new = PyList_GET_SIZE(item);
1188 } else {
1189 /* FIXME: support arbitrary sequences? */
1190 PyErr_Format(
1191 PyExc_TypeError,
1192 "expected list, not \"%.200s\"", item->ob_type->tp_name
1193 );
1194 return -1;
1195 }
1196
1197 if (old > 0) {
1198 /* to avoid recursive calls to this method (via decref), move
1199 old items to the recycle bin here, and get rid of them when
1200 we're done modifying the element */
1201 recycle = PyList_New(old);
1202 for (i = 0; i < old; i++)
1203 PyList_SET_ITEM(recycle, i, self->extra->children[i + start]);
1204 }
1205
1206 if (new < old) {
1207 /* delete slice */
1208 for (i = end; i < self->extra->length; i++)
1209 self->extra->children[i + new - old] = self->extra->children[i];
1210 } else if (new > old) {
1211 /* insert slice */
1212 if (element_resize(self, new - old) < 0)
1213 return -1;
1214 for (i = self->extra->length-1; i >= end; i--)
1215 self->extra->children[i + new - old] = self->extra->children[i];
1216 }
1217
1218 /* replace the slice */
1219 for (i = 0; i < new; i++) {
1220 PyObject* element = PyList_GET_ITEM(item, i);
1221 Py_INCREF(element);
1222 self->extra->children[i + start] = element;
1223 }
1224
1225 self->extra->length += new - old;
1226
1227 /* discard the recycle bin, and everything in it */
1228 Py_XDECREF(recycle);
1229
1230 return 0;
1231}
1232
1233static int
Martin v. Löwis18e16552006-02-15 17:27:45 +00001234element_setitem(PyObject* _self, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001235{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001236 ElementObject* self = (ElementObject*)_self;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001237 int i;
1238 PyObject* old;
1239
1240 if (!self->extra || index < 0 || index >= self->extra->length) {
1241 PyErr_SetString(
1242 PyExc_IndexError,
1243 "child assignment index out of range");
1244 return -1;
1245 }
1246
1247 old = self->extra->children[index];
1248
1249 if (item) {
1250 Py_INCREF(item);
1251 self->extra->children[index] = item;
1252 } else {
1253 self->extra->length--;
1254 for (i = index; i < self->extra->length; i++)
1255 self->extra->children[i] = self->extra->children[i+1];
1256 }
1257
1258 Py_DECREF(old);
1259
1260 return 0;
1261}
1262
1263static PyMethodDef element_methods[] = {
1264
1265 {"clear", (PyCFunction) element_clear, METH_VARARGS},
1266
1267 {"get", (PyCFunction) element_get, METH_VARARGS},
1268 {"set", (PyCFunction) element_set, METH_VARARGS},
1269
1270 {"find", (PyCFunction) element_find, METH_VARARGS},
1271 {"findtext", (PyCFunction) element_findtext, METH_VARARGS},
1272 {"findall", (PyCFunction) element_findall, METH_VARARGS},
1273
1274 {"append", (PyCFunction) element_append, METH_VARARGS},
1275 {"insert", (PyCFunction) element_insert, METH_VARARGS},
1276 {"remove", (PyCFunction) element_remove, METH_VARARGS},
1277
1278 {"getiterator", (PyCFunction) element_getiterator, METH_VARARGS},
1279 {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS},
1280
1281 {"items", (PyCFunction) element_items, METH_VARARGS},
1282 {"keys", (PyCFunction) element_keys, METH_VARARGS},
1283
1284 {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS},
1285
1286 {"__copy__", (PyCFunction) element_copy, METH_VARARGS},
1287 {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS},
1288
1289 /* Some 2.3 and 2.4 versions do not handle the __copy__ method on
1290 C objects correctly, so we have to fake it using a __reduce__-
1291 based hack (see the element_reduce implementation above for
1292 details). */
1293
1294 /* The behaviour has been changed in 2.3.5 and 2.4.1, so we're
1295 using a runtime test to figure out if we need to fake things
1296 or now (see the init code below). The following entry is
1297 enabled only if the hack is needed. */
1298
1299 {"!__reduce__", (PyCFunction) element_reduce, METH_VARARGS},
1300
1301 {NULL, NULL}
1302};
1303
1304static PyObject*
1305element_getattr(ElementObject* self, char* name)
1306{
1307 PyObject* res;
1308
1309 res = Py_FindMethod(element_methods, (PyObject*) self, name);
1310 if (res)
1311 return res;
1312
1313 PyErr_Clear();
1314
1315 if (strcmp(name, "tag") == 0)
1316 res = self->tag;
1317 else if (strcmp(name, "text") == 0)
1318 res = element_get_text(self);
1319 else if (strcmp(name, "tail") == 0) {
1320 res = element_get_tail(self);
1321 } else if (strcmp(name, "attrib") == 0) {
1322 if (!self->extra)
1323 element_new_extra(self, NULL);
1324 res = element_get_attrib(self);
1325 } else {
1326 PyErr_SetString(PyExc_AttributeError, name);
1327 return NULL;
1328 }
1329
1330 if (!res)
1331 return NULL;
1332
1333 Py_INCREF(res);
1334 return res;
1335}
1336
1337static int
1338element_setattr(ElementObject* self, const char* name, PyObject* value)
1339{
1340 if (value == NULL) {
1341 PyErr_SetString(
1342 PyExc_AttributeError,
1343 "can't delete element attributes"
1344 );
1345 return -1;
1346 }
1347
1348 if (strcmp(name, "tag") == 0) {
1349 Py_DECREF(self->tag);
1350 self->tag = value;
1351 Py_INCREF(self->tag);
1352 } else if (strcmp(name, "text") == 0) {
1353 Py_DECREF(JOIN_OBJ(self->text));
1354 self->text = value;
1355 Py_INCREF(self->text);
1356 } else if (strcmp(name, "tail") == 0) {
1357 Py_DECREF(JOIN_OBJ(self->tail));
1358 self->tail = value;
1359 Py_INCREF(self->tail);
1360 } else if (strcmp(name, "attrib") == 0) {
1361 if (!self->extra)
1362 element_new_extra(self, NULL);
1363 Py_DECREF(self->extra->attrib);
1364 self->extra->attrib = value;
1365 Py_INCREF(self->extra->attrib);
1366 } else {
1367 PyErr_SetString(PyExc_AttributeError, name);
1368 return -1;
1369 }
1370
1371 return 0;
1372}
1373
1374static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001375 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001376 0, /* sq_concat */
1377 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001378 element_getitem,
1379 element_getslice,
1380 element_setitem,
1381 element_setslice,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001382};
1383
1384statichere PyTypeObject Element_Type = {
1385 PyObject_HEAD_INIT(NULL)
1386 0, "Element", sizeof(ElementObject), 0,
1387 /* methods */
1388 (destructor)element_dealloc, /* tp_dealloc */
1389 0, /* tp_print */
1390 (getattrfunc)element_getattr, /* tp_getattr */
1391 (setattrfunc)element_setattr, /* tp_setattr */
1392 0, /* tp_compare */
1393 (reprfunc)element_repr, /* tp_repr */
1394 0, /* tp_as_number */
1395 &element_as_sequence, /* tp_as_sequence */
1396};
1397
1398/* ==================================================================== */
1399/* the tree builder type */
1400
1401typedef struct {
1402 PyObject_HEAD
1403
1404 PyObject* root; /* root node (first created node) */
1405
1406 ElementObject* this; /* current node */
1407 ElementObject* last; /* most recently created node */
1408
1409 PyObject* data; /* data collector (string or list), or NULL */
1410
1411 PyObject* stack; /* element stack */
1412 int index; /* current stack size (0=empty) */
1413
1414 /* element tracing */
1415 PyObject* events; /* list of events, or NULL if not collecting */
1416 PyObject* start_event_obj; /* event objects (NULL to ignore) */
1417 PyObject* end_event_obj;
1418 PyObject* start_ns_event_obj;
1419 PyObject* end_ns_event_obj;
1420
1421} TreeBuilderObject;
1422
1423staticforward PyTypeObject TreeBuilder_Type;
1424
1425#define TreeBuilder_CheckExact(op) ((op)->ob_type == &TreeBuilder_Type)
1426
1427/* -------------------------------------------------------------------- */
1428/* constructor and destructor */
1429
1430LOCAL(PyObject*)
1431treebuilder_new(void)
1432{
1433 TreeBuilderObject* self;
1434
1435 self = PyObject_New(TreeBuilderObject, &TreeBuilder_Type);
1436 if (self == NULL)
1437 return NULL;
1438
1439 self->root = NULL;
1440
1441 Py_INCREF(Py_None);
1442 self->this = (ElementObject*) Py_None;
1443
1444 Py_INCREF(Py_None);
1445 self->last = (ElementObject*) Py_None;
1446
1447 self->data = NULL;
1448
1449 self->stack = PyList_New(20);
1450 self->index = 0;
1451
1452 self->events = NULL;
1453 self->start_event_obj = self->end_event_obj = NULL;
1454 self->start_ns_event_obj = self->end_ns_event_obj = NULL;
1455
1456 ALLOC(sizeof(TreeBuilderObject), "create treebuilder");
1457
1458 return (PyObject*) self;
1459}
1460
1461static PyObject*
1462treebuilder(PyObject* _self, PyObject* args)
1463{
1464 if (!PyArg_ParseTuple(args, ":TreeBuilder"))
1465 return NULL;
1466
1467 return treebuilder_new();
1468}
1469
1470static void
1471treebuilder_dealloc(TreeBuilderObject* self)
1472{
1473 Py_XDECREF(self->end_ns_event_obj);
1474 Py_XDECREF(self->start_ns_event_obj);
1475 Py_XDECREF(self->end_event_obj);
1476 Py_XDECREF(self->start_event_obj);
1477 Py_XDECREF(self->events);
1478 Py_DECREF(self->stack);
1479 Py_XDECREF(self->data);
1480 Py_DECREF(self->last);
1481 Py_DECREF(self->this);
1482 Py_XDECREF(self->root);
1483
1484 RELEASE(sizeof(TreeBuilderObject), "destroy treebuilder");
1485
1486 PyObject_Del(self);
1487}
1488
1489/* -------------------------------------------------------------------- */
1490/* handlers */
1491
1492LOCAL(PyObject*)
1493treebuilder_handle_xml(TreeBuilderObject* self, PyObject* encoding,
1494 PyObject* standalone)
1495{
1496 Py_RETURN_NONE;
1497}
1498
1499LOCAL(PyObject*)
1500treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
1501 PyObject* attrib)
1502{
1503 PyObject* node;
1504 PyObject* this;
1505
1506 if (self->data) {
1507 if (self->this == self->last) {
Fredrik Lundh0149e3a2005-12-18 13:58:25 +00001508 Py_DECREF(self->last->text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001509 self->last->text = JOIN_SET(
1510 self->data, PyList_CheckExact(self->data)
1511 );
1512 } else {
Fredrik Lundh0149e3a2005-12-18 13:58:25 +00001513 Py_DECREF(self->last->tail);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001514 self->last->tail = JOIN_SET(
1515 self->data, PyList_CheckExact(self->data)
1516 );
1517 }
1518 self->data = NULL;
1519 }
1520
1521 node = element_new(tag, attrib);
1522 if (!node)
1523 return NULL;
1524
1525 this = (PyObject*) self->this;
1526
1527 if (this != Py_None) {
1528 if (element_add_subelement((ElementObject*) this, node) < 0)
1529 return NULL;
1530 } else {
1531 if (self->root) {
1532 PyErr_SetString(
1533 PyExc_SyntaxError,
1534 "multiple elements on top level"
1535 );
1536 return NULL;
1537 }
1538 Py_INCREF(node);
1539 self->root = node;
1540 }
1541
1542 if (self->index < PyList_GET_SIZE(self->stack)) {
1543 if (PyList_SetItem(self->stack, self->index, this) < 0)
1544 return NULL;
1545 Py_INCREF(this);
1546 } else {
1547 if (PyList_Append(self->stack, this) < 0)
1548 return NULL;
1549 }
1550 self->index++;
1551
1552 Py_DECREF(this);
1553 Py_INCREF(node);
1554 self->this = (ElementObject*) node;
1555
1556 Py_DECREF(self->last);
1557 Py_INCREF(node);
1558 self->last = (ElementObject*) node;
1559
1560 if (self->start_event_obj) {
1561 PyObject* res;
1562 PyObject* action = self->start_event_obj;
1563 res = PyTuple_New(2);
1564 if (res) {
1565 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
1566 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
1567 PyList_Append(self->events, res);
1568 Py_DECREF(res);
1569 } else
1570 PyErr_Clear(); /* FIXME: propagate error */
1571 }
1572
1573 return node;
1574}
1575
1576LOCAL(PyObject*)
1577treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
1578{
1579 if (!self->data) {
1580 /* store the first item as is */
1581 Py_INCREF(data); self->data = data;
1582 } else {
1583 /* more than one item; use a list to collect items */
1584 if (PyString_CheckExact(self->data) && self->data->ob_refcnt == 1 &&
1585 PyString_CheckExact(data) && PyString_GET_SIZE(data) == 1) {
1586 /* expat often generates single character data sections; handle
1587 the most common case by resizing the existing string... */
1588 int size = PyString_GET_SIZE(self->data);
1589 if (_PyString_Resize(&self->data, size + 1) < 0)
1590 return NULL;
1591 PyString_AS_STRING(self->data)[size] = PyString_AS_STRING(data)[0];
1592 } else if (PyList_CheckExact(self->data)) {
1593 if (PyList_Append(self->data, data) < 0)
1594 return NULL;
1595 } else {
1596 PyObject* list = PyList_New(2);
1597 if (!list)
1598 return NULL;
1599 PyList_SET_ITEM(list, 0, self->data);
1600 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
1601 self->data = list;
1602 }
1603 }
1604
1605 Py_RETURN_NONE;
1606}
1607
1608LOCAL(PyObject*)
1609treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
1610{
1611 PyObject* item;
1612
1613 if (self->data) {
1614 if (self->this == self->last) {
Fredrik Lundh0149e3a2005-12-18 13:58:25 +00001615 Py_DECREF(self->last->text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001616 self->last->text = JOIN_SET(
1617 self->data, PyList_CheckExact(self->data)
1618 );
1619 } else {
Fredrik Lundh0149e3a2005-12-18 13:58:25 +00001620 Py_DECREF(self->last->tail);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001621 self->last->tail = JOIN_SET(
1622 self->data, PyList_CheckExact(self->data)
1623 );
1624 }
1625 self->data = NULL;
1626 }
1627
1628 if (self->index == 0) {
1629 PyErr_SetString(
1630 PyExc_IndexError,
1631 "pop from empty stack"
1632 );
1633 return NULL;
1634 }
1635
1636 self->index--;
1637
1638 item = PyList_GET_ITEM(self->stack, self->index);
1639 Py_INCREF(item);
1640
1641 Py_DECREF(self->last);
1642
1643 self->last = (ElementObject*) self->this;
1644 self->this = (ElementObject*) item;
1645
1646 if (self->end_event_obj) {
1647 PyObject* res;
1648 PyObject* action = self->end_event_obj;
1649 PyObject* node = (PyObject*) self->last;
1650 res = PyTuple_New(2);
1651 if (res) {
1652 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
1653 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
1654 PyList_Append(self->events, res);
1655 Py_DECREF(res);
1656 } else
1657 PyErr_Clear(); /* FIXME: propagate error */
1658 }
1659
1660 Py_INCREF(self->last);
1661 return (PyObject*) self->last;
1662}
1663
1664LOCAL(void)
1665treebuilder_handle_namespace(TreeBuilderObject* self, int start,
1666 const char* prefix, const char *uri)
1667{
1668 PyObject* res;
1669 PyObject* action;
1670 PyObject* parcel;
1671
1672 if (!self->events)
1673 return;
1674
1675 if (start) {
1676 if (!self->start_ns_event_obj)
1677 return;
1678 action = self->start_ns_event_obj;
1679 /* FIXME: prefix and uri use utf-8 encoding! */
1680 parcel = Py_BuildValue("ss", (prefix) ? prefix : "", uri);
1681 if (!parcel)
1682 return;
1683 Py_INCREF(action);
1684 } else {
1685 if (!self->end_ns_event_obj)
1686 return;
1687 action = self->end_ns_event_obj;
1688 Py_INCREF(action);
1689 parcel = Py_None;
1690 Py_INCREF(parcel);
1691 }
1692
1693 res = PyTuple_New(2);
1694
1695 if (res) {
1696 PyTuple_SET_ITEM(res, 0, action);
1697 PyTuple_SET_ITEM(res, 1, parcel);
1698 PyList_Append(self->events, res);
1699 Py_DECREF(res);
1700 } else
1701 PyErr_Clear(); /* FIXME: propagate error */
1702}
1703
1704/* -------------------------------------------------------------------- */
1705/* methods (in alphabetical order) */
1706
1707static PyObject*
1708treebuilder_data(TreeBuilderObject* self, PyObject* args)
1709{
1710 PyObject* data;
1711 if (!PyArg_ParseTuple(args, "O:data", &data))
1712 return NULL;
1713
1714 return treebuilder_handle_data(self, data);
1715}
1716
1717static PyObject*
1718treebuilder_end(TreeBuilderObject* self, PyObject* args)
1719{
1720 PyObject* tag;
1721 if (!PyArg_ParseTuple(args, "O:end", &tag))
1722 return NULL;
1723
1724 return treebuilder_handle_end(self, tag);
1725}
1726
1727LOCAL(PyObject*)
1728treebuilder_done(TreeBuilderObject* self)
1729{
1730 PyObject* res;
1731
1732 /* FIXME: check stack size? */
1733
1734 if (self->root)
1735 res = self->root;
1736 else
1737 res = Py_None;
1738
1739 Py_INCREF(res);
1740 return res;
1741}
1742
1743static PyObject*
1744treebuilder_close(TreeBuilderObject* self, PyObject* args)
1745{
1746 if (!PyArg_ParseTuple(args, ":close"))
1747 return NULL;
1748
1749 return treebuilder_done(self);
1750}
1751
1752static PyObject*
1753treebuilder_start(TreeBuilderObject* self, PyObject* args)
1754{
1755 PyObject* tag;
1756 PyObject* attrib = Py_None;
1757 if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib))
1758 return NULL;
1759
1760 return treebuilder_handle_start(self, tag, attrib);
1761}
1762
1763static PyObject*
1764treebuilder_xml(TreeBuilderObject* self, PyObject* args)
1765{
1766 PyObject* encoding;
1767 PyObject* standalone;
1768 if (!PyArg_ParseTuple(args, "OO:xml", &encoding, &standalone))
1769 return NULL;
1770
1771 return treebuilder_handle_xml(self, encoding, standalone);
1772}
1773
1774static PyMethodDef treebuilder_methods[] = {
1775 {"data", (PyCFunction) treebuilder_data, METH_VARARGS},
1776 {"start", (PyCFunction) treebuilder_start, METH_VARARGS},
1777 {"end", (PyCFunction) treebuilder_end, METH_VARARGS},
1778 {"xml", (PyCFunction) treebuilder_xml, METH_VARARGS},
1779 {"close", (PyCFunction) treebuilder_close, METH_VARARGS},
1780 {NULL, NULL}
1781};
1782
1783static PyObject*
1784treebuilder_getattr(TreeBuilderObject* self, char* name)
1785{
1786 return Py_FindMethod(treebuilder_methods, (PyObject*) self, name);
1787}
1788
1789statichere PyTypeObject TreeBuilder_Type = {
1790 PyObject_HEAD_INIT(NULL)
1791 0, "TreeBuilder", sizeof(TreeBuilderObject), 0,
1792 /* methods */
1793 (destructor)treebuilder_dealloc, /* tp_dealloc */
1794 0, /* tp_print */
1795 (getattrfunc)treebuilder_getattr, /* tp_getattr */
1796};
1797
1798/* ==================================================================== */
1799/* the expat interface */
1800
1801#if defined(USE_EXPAT)
1802
1803#include "expat.h"
1804
1805#if defined(USE_PYEXPAT_CAPI)
1806#include "pyexpat.h"
1807static struct PyExpat_CAPI* expat_capi;
1808#define EXPAT(func) (expat_capi->func)
1809#else
1810#define EXPAT(func) (XML_##func)
1811#endif
1812
1813typedef struct {
1814 PyObject_HEAD
1815
1816 XML_Parser parser;
1817
1818 PyObject* target;
1819 PyObject* entity;
1820
1821 PyObject* names;
1822
1823 PyObject* handle_xml;
1824 PyObject* handle_start;
1825 PyObject* handle_data;
1826 PyObject* handle_end;
1827
1828 PyObject* handle_comment;
1829 PyObject* handle_pi;
1830
1831} XMLParserObject;
1832
1833staticforward PyTypeObject XMLParser_Type;
1834
1835/* helpers */
1836
1837#if defined(Py_USING_UNICODE)
1838LOCAL(int)
1839checkstring(const char* string, int size)
1840{
1841 int i;
1842
1843 /* check if an 8-bit string contains UTF-8 characters */
1844 for (i = 0; i < size; i++)
1845 if (string[i] & 0x80)
1846 return 1;
1847
1848 return 0;
1849}
1850#endif
1851
1852LOCAL(PyObject*)
1853makestring(const char* string, int size)
1854{
1855 /* convert a UTF-8 string to either a 7-bit ascii string or a
1856 Unicode string */
1857
1858#if defined(Py_USING_UNICODE)
1859 if (checkstring(string, size))
1860 return PyUnicode_DecodeUTF8(string, size, "strict");
1861#endif
1862
1863 return PyString_FromStringAndSize(string, size);
1864}
1865
1866LOCAL(PyObject*)
1867makeuniversal(XMLParserObject* self, const char* string)
1868{
1869 /* convert a UTF-8 tag/attribute name from the expat parser
1870 to a universal name string */
1871
1872 int size = strlen(string);
1873 PyObject* key;
1874 PyObject* value;
1875
1876 /* look the 'raw' name up in the names dictionary */
1877 key = PyString_FromStringAndSize(string, size);
1878 if (!key)
1879 return NULL;
1880
1881 value = PyDict_GetItem(self->names, key);
1882
1883 if (value) {
1884 Py_INCREF(value);
1885 } else {
1886 /* new name. convert to universal name, and decode as
1887 necessary */
1888
1889 PyObject* tag;
1890 char* p;
1891 int i;
1892
1893 /* look for namespace separator */
1894 for (i = 0; i < size; i++)
1895 if (string[i] == '}')
1896 break;
1897 if (i != size) {
1898 /* convert to universal name */
1899 tag = PyString_FromStringAndSize(NULL, size+1);
1900 p = PyString_AS_STRING(tag);
1901 p[0] = '{';
1902 memcpy(p+1, string, size);
1903 size++;
1904 } else {
1905 /* plain name; use key as tag */
1906 Py_INCREF(key);
1907 tag = key;
1908 }
1909
1910 /* decode universal name */
1911#if defined(Py_USING_UNICODE)
1912 /* inline makestring, to avoid duplicating the source string if
1913 it's not an utf-8 string */
1914 p = PyString_AS_STRING(tag);
1915 if (checkstring(p, size)) {
1916 value = PyUnicode_DecodeUTF8(p, size, "strict");
1917 Py_DECREF(tag);
1918 if (!value) {
1919 Py_DECREF(key);
1920 return NULL;
1921 }
1922 } else
1923#endif
1924 value = tag; /* use tag as is */
1925
1926 /* add to names dictionary */
1927 if (PyDict_SetItem(self->names, key, value) < 0) {
1928 Py_DECREF(key);
1929 Py_DECREF(value);
1930 return NULL;
1931 }
1932 }
1933
1934 Py_DECREF(key);
1935 return value;
1936}
1937
1938/* -------------------------------------------------------------------- */
1939/* handlers */
1940
1941static void
1942expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
1943 int data_len)
1944{
1945 PyObject* key;
1946 PyObject* value;
1947 PyObject* res;
1948
1949 if (data_len < 2 || data_in[0] != '&')
1950 return;
1951
1952 key = makestring(data_in + 1, data_len - 2);
1953 if (!key)
1954 return;
1955
1956 value = PyDict_GetItem(self->entity, key);
1957
1958 if (value) {
1959 if (TreeBuilder_CheckExact(self->target))
1960 res = treebuilder_handle_data(
1961 (TreeBuilderObject*) self->target, value
1962 );
1963 else if (self->handle_data)
1964 res = PyObject_CallFunction(self->handle_data, "O", value);
1965 else
1966 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001967 Py_XDECREF(res);
1968 } else {
1969 PyErr_Format(
1970 PyExc_SyntaxError, "undefined entity &%s;: line %d, column %d",
1971 PyString_AS_STRING(key),
1972 EXPAT(GetErrorLineNumber)(self->parser),
1973 EXPAT(GetErrorColumnNumber)(self->parser)
1974 );
1975 }
1976
1977 Py_DECREF(key);
1978}
1979
1980static void
1981expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
1982 const XML_Char **attrib_in)
1983{
1984 PyObject* res;
1985 PyObject* tag;
1986 PyObject* attrib;
1987 int ok;
1988
1989 /* tag name */
1990 tag = makeuniversal(self, tag_in);
1991 if (!tag)
1992 return; /* parser will look for errors */
1993
1994 /* attributes */
1995 if (attrib_in[0]) {
1996 attrib = PyDict_New();
1997 if (!attrib)
1998 return;
1999 while (attrib_in[0] && attrib_in[1]) {
2000 PyObject* key = makeuniversal(self, attrib_in[0]);
2001 PyObject* value = makestring(attrib_in[1], strlen(attrib_in[1]));
2002 if (!key || !value) {
2003 Py_XDECREF(value);
2004 Py_XDECREF(key);
2005 Py_DECREF(attrib);
2006 return;
2007 }
2008 ok = PyDict_SetItem(attrib, key, value);
2009 Py_DECREF(value);
2010 Py_DECREF(key);
2011 if (ok < 0) {
2012 Py_DECREF(attrib);
2013 return;
2014 }
2015 attrib_in += 2;
2016 }
2017 } else {
2018 Py_INCREF(Py_None);
2019 attrib = Py_None;
2020 }
2021
2022 if (TreeBuilder_CheckExact(self->target))
2023 /* shortcut */
2024 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2025 tag, attrib);
2026 else if (self->handle_start)
2027 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
2028 else
2029 res = NULL;
2030
2031 Py_DECREF(tag);
2032 Py_DECREF(attrib);
2033
2034 Py_XDECREF(res);
2035}
2036
2037static void
2038expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2039 int data_len)
2040{
2041 PyObject* data;
2042 PyObject* res;
2043
2044 data = makestring(data_in, data_len);
2045
2046 if (TreeBuilder_CheckExact(self->target))
2047 /* shortcut */
2048 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
2049 else if (self->handle_data)
2050 res = PyObject_CallFunction(self->handle_data, "O", data);
2051 else
2052 res = NULL;
2053
2054 Py_DECREF(data);
2055
2056 Py_XDECREF(res);
2057}
2058
2059static void
2060expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
2061{
2062 PyObject* tag;
2063 PyObject* res = NULL;
2064
2065 if (TreeBuilder_CheckExact(self->target))
2066 /* shortcut */
2067 /* the standard tree builder doesn't look at the end tag */
2068 res = treebuilder_handle_end(
2069 (TreeBuilderObject*) self->target, Py_None
2070 );
2071 else if (self->handle_end) {
2072 tag = makeuniversal(self, tag_in);
2073 if (tag) {
2074 res = PyObject_CallFunction(self->handle_end, "O", tag);
2075 Py_DECREF(tag);
2076 }
2077 }
2078
2079 Py_XDECREF(res);
2080}
2081
2082static void
2083expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
2084 const XML_Char *uri)
2085{
2086 treebuilder_handle_namespace(
2087 (TreeBuilderObject*) self->target, 1, prefix, uri
2088 );
2089}
2090
2091static void
2092expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
2093{
2094 treebuilder_handle_namespace(
2095 (TreeBuilderObject*) self->target, 0, NULL, NULL
2096 );
2097}
2098
2099static void
2100expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
2101{
2102 PyObject* comment;
2103 PyObject* res;
2104
2105 if (self->handle_comment) {
2106 comment = makestring(comment_in, strlen(comment_in));
2107 if (comment) {
2108 res = PyObject_CallFunction(self->handle_comment, "O", comment);
2109 Py_XDECREF(res);
2110 Py_DECREF(comment);
2111 }
2112 }
2113}
2114
2115static void
2116expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
2117 const XML_Char* data_in)
2118{
2119 PyObject* target;
2120 PyObject* data;
2121 PyObject* res;
2122
2123 if (self->handle_pi) {
2124 target = makestring(target_in, strlen(target_in));
2125 data = makestring(data_in, strlen(data_in));
2126 if (target && data) {
2127 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
2128 Py_XDECREF(res);
2129 Py_DECREF(data);
2130 Py_DECREF(target);
2131 } else {
2132 Py_XDECREF(data);
2133 Py_XDECREF(target);
2134 }
2135 }
2136}
2137
2138#if defined(Py_USING_UNICODE)
2139static int
2140expat_unknown_encoding_handler(XMLParserObject *self, const XML_Char *name,
2141 XML_Encoding *info)
2142{
2143 PyObject* u;
2144 Py_UNICODE* p;
2145 unsigned char s[256];
2146 int i;
2147
2148 memset(info, 0, sizeof(XML_Encoding));
2149
2150 for (i = 0; i < 256; i++)
2151 s[i] = i;
2152
Fredrik Lundhc3389992005-12-25 11:40:19 +00002153 u = PyUnicode_Decode((char*) s, 256, name, "replace");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002154 if (!u)
2155 return XML_STATUS_ERROR;
2156
2157 if (PyUnicode_GET_SIZE(u) != 256) {
2158 Py_DECREF(u);
2159 return XML_STATUS_ERROR;
2160 }
2161
2162 p = PyUnicode_AS_UNICODE(u);
2163
2164 for (i = 0; i < 256; i++) {
2165 if (p[i] != Py_UNICODE_REPLACEMENT_CHARACTER)
2166 info->map[i] = p[i];
2167 else
2168 info->map[i] = -1;
2169 }
2170
2171 Py_DECREF(u);
2172
2173 return XML_STATUS_OK;
2174}
2175#endif
2176
2177/* -------------------------------------------------------------------- */
2178/* constructor and destructor */
2179
2180static PyObject*
2181xmlparser(PyObject* _self, PyObject* args, PyObject* kw)
2182{
2183 XMLParserObject* self;
2184 /* FIXME: does this need to be static? */
2185 static XML_Memory_Handling_Suite memory_handler;
2186
2187 PyObject* target = NULL;
2188 char* encoding = NULL;
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +00002189 static char* kwlist[] = { "target", "encoding", NULL };
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002190 if (!PyArg_ParseTupleAndKeywords(args, kw, "|Oz:XMLParser", kwlist,
2191 &target, &encoding))
2192 return NULL;
2193
2194#if defined(USE_PYEXPAT_CAPI)
2195 if (!expat_capi) {
2196 PyErr_SetString(
2197 PyExc_RuntimeError, "cannot load dispatch table from pyexpat"
2198 );
2199 return NULL;
2200 }
2201#endif
2202
2203 self = PyObject_New(XMLParserObject, &XMLParser_Type);
2204 if (self == NULL)
2205 return NULL;
2206
2207 self->entity = PyDict_New();
2208 if (!self->entity) {
2209 PyObject_Del(self);
2210 return NULL; /* FIXME: cleanup on error */
2211 }
2212
2213 self->names = PyDict_New();
2214 if (!self->names) {
2215 PyObject_Del(self);
2216 return NULL; /* FIXME: cleanup on error */
2217 }
2218
2219 memory_handler.malloc_fcn = PyObject_Malloc;
2220 memory_handler.realloc_fcn = PyObject_Realloc;
2221 memory_handler.free_fcn = PyObject_Free;
2222
2223 self->parser = EXPAT(ParserCreate_MM)(encoding, &memory_handler, "}");
2224 if (!self->parser) {
2225 PyErr_NoMemory();
2226 return NULL; /* FIXME: cleanup on error */
2227 }
2228
2229 /* setup target handlers */
2230 if (!target) {
2231 target = treebuilder_new();
2232 if (!target) {
2233 PyObject_Del(self);
2234 return NULL; /* FIXME: cleanup on error */
2235 }
2236 } else
2237 Py_INCREF(target);
2238 self->target = target;
2239
2240 self->handle_xml = PyObject_GetAttrString(target, "xml");
2241 self->handle_start = PyObject_GetAttrString(target, "start");
2242 self->handle_data = PyObject_GetAttrString(target, "data");
2243 self->handle_end = PyObject_GetAttrString(target, "end");
2244 self->handle_comment = PyObject_GetAttrString(target, "comment");
2245 self->handle_pi = PyObject_GetAttrString(target, "pi");
2246
2247 PyErr_Clear();
2248
2249 /* configure parser */
2250 EXPAT(SetUserData)(self->parser, self);
2251 EXPAT(SetElementHandler)(
2252 self->parser,
2253 (XML_StartElementHandler) expat_start_handler,
2254 (XML_EndElementHandler) expat_end_handler
2255 );
2256 EXPAT(SetDefaultHandlerExpand)(
2257 self->parser,
2258 (XML_DefaultHandler) expat_default_handler
2259 );
2260 EXPAT(SetCharacterDataHandler)(
2261 self->parser,
2262 (XML_CharacterDataHandler) expat_data_handler
2263 );
2264 if (self->handle_comment)
2265 EXPAT(SetCommentHandler)(
2266 self->parser,
2267 (XML_CommentHandler) expat_comment_handler
2268 );
2269 if (self->handle_pi)
2270 EXPAT(SetProcessingInstructionHandler)(
2271 self->parser,
2272 (XML_ProcessingInstructionHandler) expat_pi_handler
2273 );
2274#if defined(Py_USING_UNICODE)
2275 EXPAT(SetUnknownEncodingHandler)(
2276 self->parser,
2277 (XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL
2278 );
2279#endif
2280
2281 ALLOC(sizeof(XMLParserObject), "create expatparser");
2282
2283 return (PyObject*) self;
2284}
2285
2286static void
2287xmlparser_dealloc(XMLParserObject* self)
2288{
2289 EXPAT(ParserFree)(self->parser);
2290
2291 Py_XDECREF(self->handle_pi);
2292 Py_XDECREF(self->handle_comment);
2293 Py_XDECREF(self->handle_end);
2294 Py_XDECREF(self->handle_data);
2295 Py_XDECREF(self->handle_start);
2296 Py_XDECREF(self->handle_xml);
2297
2298 Py_DECREF(self->target);
2299 Py_DECREF(self->entity);
2300 Py_DECREF(self->names);
2301
2302 RELEASE(sizeof(XMLParserObject), "destroy expatparser");
2303
2304 PyObject_Del(self);
2305}
2306
2307/* -------------------------------------------------------------------- */
2308/* methods (in alphabetical order) */
2309
2310LOCAL(PyObject*)
2311expat_parse(XMLParserObject* self, char* data, int data_len, int final)
2312{
2313 int ok;
2314
2315 ok = EXPAT(Parse)(self->parser, data, data_len, final);
2316
2317 if (PyErr_Occurred())
2318 return NULL;
2319
2320 if (!ok) {
2321 PyErr_Format(
2322 PyExc_SyntaxError, "%s: line %d, column %d",
2323 EXPAT(ErrorString)(EXPAT(GetErrorCode)(self->parser)),
2324 EXPAT(GetErrorLineNumber)(self->parser),
2325 EXPAT(GetErrorColumnNumber)(self->parser)
2326 );
2327 return NULL;
2328 }
2329
2330 Py_RETURN_NONE;
2331}
2332
2333static PyObject*
2334xmlparser_close(XMLParserObject* self, PyObject* args)
2335{
2336 /* end feeding data to parser */
2337
2338 PyObject* res;
2339 if (!PyArg_ParseTuple(args, ":close"))
2340 return NULL;
2341
2342 res = expat_parse(self, "", 0, 1);
2343
2344 if (res && TreeBuilder_CheckExact(self->target)) {
2345 Py_DECREF(res);
2346 return treebuilder_done((TreeBuilderObject*) self->target);
2347 }
2348
2349 return res;
2350}
2351
2352static PyObject*
2353xmlparser_feed(XMLParserObject* self, PyObject* args)
2354{
2355 /* feed data to parser */
2356
2357 char* data;
2358 int data_len;
2359 if (!PyArg_ParseTuple(args, "s#:feed", &data, &data_len))
2360 return NULL;
2361
2362 return expat_parse(self, data, data_len, 0);
2363}
2364
2365static PyObject*
2366xmlparser_parse(XMLParserObject* self, PyObject* args)
2367{
2368 /* (internal) parse until end of input stream */
2369
2370 PyObject* reader;
2371 PyObject* buffer;
2372 PyObject* res;
2373
2374 PyObject* fileobj;
2375 if (!PyArg_ParseTuple(args, "O:_parse", &fileobj))
2376 return NULL;
2377
2378 reader = PyObject_GetAttrString(fileobj, "read");
2379 if (!reader)
2380 return NULL;
2381
2382 /* read from open file object */
2383 for (;;) {
2384
2385 buffer = PyObject_CallFunction(reader, "i", 64*1024);
2386
2387 if (!buffer) {
2388 /* read failed (e.g. due to KeyboardInterrupt) */
2389 Py_DECREF(reader);
2390 return NULL;
2391 }
2392
2393 if (!PyString_CheckExact(buffer) || PyString_GET_SIZE(buffer) == 0) {
2394 Py_DECREF(buffer);
2395 break;
2396 }
2397
2398 res = expat_parse(
2399 self, PyString_AS_STRING(buffer), PyString_GET_SIZE(buffer), 0
2400 );
2401
2402 Py_DECREF(buffer);
2403
2404 if (!res) {
2405 Py_DECREF(reader);
2406 return NULL;
2407 }
2408 Py_DECREF(res);
2409
2410 }
2411
2412 Py_DECREF(reader);
2413
2414 res = expat_parse(self, "", 0, 1);
2415
2416 if (res && TreeBuilder_CheckExact(self->target)) {
2417 Py_DECREF(res);
2418 return treebuilder_done((TreeBuilderObject*) self->target);
2419 }
2420
2421 return res;
2422}
2423
2424static PyObject*
2425xmlparser_setevents(XMLParserObject* self, PyObject* args)
2426{
2427 /* activate element event reporting */
2428
2429 int i;
2430 TreeBuilderObject* target;
2431
2432 PyObject* events; /* event collector */
2433 PyObject* event_set = Py_None;
2434 if (!PyArg_ParseTuple(args, "O!|O:_setevents", &PyList_Type, &events,
2435 &event_set))
2436 return NULL;
2437
2438 if (!TreeBuilder_CheckExact(self->target)) {
2439 PyErr_SetString(
2440 PyExc_TypeError,
2441 "event handling only supported for cElementTree.Treebuilder "
2442 "targets"
2443 );
2444 return NULL;
2445 }
2446
2447 target = (TreeBuilderObject*) self->target;
2448
2449 Py_INCREF(events);
2450 Py_XDECREF(target->events);
2451 target->events = events;
2452
2453 /* clear out existing events */
2454 Py_XDECREF(target->start_event_obj); target->start_event_obj = NULL;
2455 Py_XDECREF(target->end_event_obj); target->end_event_obj = NULL;
2456 Py_XDECREF(target->start_ns_event_obj); target->start_ns_event_obj = NULL;
2457 Py_XDECREF(target->end_ns_event_obj); target->end_ns_event_obj = NULL;
2458
2459 if (event_set == Py_None) {
2460 /* default is "end" only */
2461 target->end_event_obj = PyString_FromString("end");
2462 Py_RETURN_NONE;
2463 }
2464
2465 if (!PyTuple_Check(event_set)) /* FIXME: handle arbitrary sequences */
2466 goto error;
2467
2468 for (i = 0; i < PyTuple_GET_SIZE(event_set); i++) {
2469 PyObject* item = PyTuple_GET_ITEM(event_set, i);
2470 char* event;
2471 if (!PyString_Check(item))
2472 goto error;
2473 event = PyString_AS_STRING(item);
2474 if (strcmp(event, "start") == 0) {
2475 Py_INCREF(item);
2476 target->start_event_obj = item;
2477 } else if (strcmp(event, "end") == 0) {
2478 Py_INCREF(item);
2479 Py_XDECREF(target->end_event_obj);
2480 target->end_event_obj = item;
2481 } else if (strcmp(event, "start-ns") == 0) {
2482 Py_INCREF(item);
2483 Py_XDECREF(target->start_ns_event_obj);
2484 target->start_ns_event_obj = item;
2485 EXPAT(SetNamespaceDeclHandler)(
2486 self->parser,
2487 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2488 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2489 );
2490 } else if (strcmp(event, "end-ns") == 0) {
2491 Py_INCREF(item);
2492 Py_XDECREF(target->end_ns_event_obj);
2493 target->end_ns_event_obj = item;
2494 EXPAT(SetNamespaceDeclHandler)(
2495 self->parser,
2496 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2497 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2498 );
2499 } else {
2500 PyErr_Format(
2501 PyExc_ValueError,
2502 "unknown event '%s'", event
2503 );
2504 return NULL;
2505 }
2506 }
2507
2508 Py_RETURN_NONE;
2509
2510 error:
2511 PyErr_SetString(
2512 PyExc_TypeError,
2513 "invalid event tuple"
2514 );
2515 return NULL;
2516}
2517
2518static PyMethodDef xmlparser_methods[] = {
2519 {"feed", (PyCFunction) xmlparser_feed, METH_VARARGS},
2520 {"close", (PyCFunction) xmlparser_close, METH_VARARGS},
2521 {"_parse", (PyCFunction) xmlparser_parse, METH_VARARGS},
2522 {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
2523 {NULL, NULL}
2524};
2525
2526static PyObject*
2527xmlparser_getattr(XMLParserObject* self, char* name)
2528{
2529 PyObject* res;
2530
2531 res = Py_FindMethod(xmlparser_methods, (PyObject*) self, name);
2532 if (res)
2533 return res;
2534
2535 PyErr_Clear();
2536
2537 if (strcmp(name, "entity") == 0)
2538 res = self->entity;
2539 else if (strcmp(name, "target") == 0)
2540 res = self->target;
2541 else if (strcmp(name, "version") == 0) {
2542 char buffer[100];
2543 sprintf(buffer, "Expat %d.%d.%d", XML_MAJOR_VERSION,
2544 XML_MINOR_VERSION, XML_MICRO_VERSION);
2545 return PyString_FromString(buffer);
2546 } else {
2547 PyErr_SetString(PyExc_AttributeError, name);
2548 return NULL;
2549 }
2550
2551 Py_INCREF(res);
2552 return res;
2553}
2554
2555statichere PyTypeObject XMLParser_Type = {
2556 PyObject_HEAD_INIT(NULL)
2557 0, "XMLParser", sizeof(XMLParserObject), 0,
2558 /* methods */
2559 (destructor)xmlparser_dealloc, /* tp_dealloc */
2560 0, /* tp_print */
2561 (getattrfunc)xmlparser_getattr, /* tp_getattr */
2562};
2563
2564#endif
2565
2566/* ==================================================================== */
2567/* python module interface */
2568
2569static PyMethodDef _functions[] = {
2570 {"Element", (PyCFunction) element, METH_VARARGS|METH_KEYWORDS},
2571 {"SubElement", (PyCFunction) subelement, METH_VARARGS|METH_KEYWORDS},
2572 {"TreeBuilder", (PyCFunction) treebuilder, METH_VARARGS},
2573#if defined(USE_EXPAT)
2574 {"XMLParser", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
2575 {"XMLTreeBuilder", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
2576#endif
2577 {NULL, NULL}
2578};
2579
2580DL_EXPORT(void)
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002581init_elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002582{
2583 PyObject* m;
2584 PyObject* g;
2585 char* bootstrap;
2586#if defined(USE_PYEXPAT_CAPI)
2587 struct PyExpat_CAPI* capi;
2588#endif
2589
2590 /* Patch object type */
2591 Element_Type.ob_type = TreeBuilder_Type.ob_type = &PyType_Type;
2592#if defined(USE_EXPAT)
2593 XMLParser_Type.ob_type = &PyType_Type;
2594#endif
2595
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002596 m = Py_InitModule("_elementtree", _functions);
Neal Norwitz1ac754f2006-01-19 06:09:39 +00002597 if (m == NULL)
2598 return;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002599
2600 /* python glue code */
2601
2602 g = PyDict_New();
Neal Norwitz02876df2006-02-07 06:58:52 +00002603 if (g == NULL)
2604 return;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002605
2606 PyDict_SetItemString(g, "__builtins__", PyEval_GetBuiltins());
2607
2608 bootstrap = (
2609
2610#if (PY_VERSION_HEX >= 0x02020000 && PY_VERSION_HEX < 0x02030000)
2611 "from __future__ import generators\n" /* enable yield under 2.2 */
2612#endif
2613
2614 "from copy import copy, deepcopy\n"
2615
2616 "try:\n"
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002617 " from xml.etree import ElementTree\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002618 "except ImportError:\n"
2619 " import ElementTree\n"
2620 "ET = ElementTree\n"
2621 "del ElementTree\n"
2622
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002623 "import _elementtree as cElementTree\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002624
2625 "try:\n" /* check if copy works as is */
2626 " copy(cElementTree.Element('x'))\n"
2627 "except:\n"
2628 " def copyelement(elem):\n"
2629 " return elem\n"
2630
2631 "def Comment(text=None):\n" /* public */
2632 " element = cElementTree.Element(ET.Comment)\n"
2633 " element.text = text\n"
2634 " return element\n"
2635 "cElementTree.Comment = Comment\n"
2636
2637 "class ElementTree(ET.ElementTree):\n" /* public */
2638 " def parse(self, source, parser=None):\n"
2639 " if not hasattr(source, 'read'):\n"
2640 " source = open(source, 'rb')\n"
2641 " if parser is not None:\n"
2642 " while 1:\n"
2643 " data = source.read(65536)\n"
2644 " if not data:\n"
2645 " break\n"
2646 " parser.feed(data)\n"
2647 " self._root = parser.close()\n"
2648 " else:\n"
2649 " parser = cElementTree.XMLParser()\n"
2650 " self._root = parser._parse(source)\n"
2651 " return self._root\n"
2652 "cElementTree.ElementTree = ElementTree\n"
2653
2654 "def getiterator(node, tag=None):\n" /* helper */
2655 " if tag == '*':\n"
2656 " tag = None\n"
2657#if (PY_VERSION_HEX < 0x02020000)
2658 " nodes = []\n" /* 2.1 doesn't have yield */
2659 " if tag is None or node.tag == tag:\n"
2660 " nodes.append(node)\n"
2661 " for node in node:\n"
2662 " nodes.extend(getiterator(node, tag))\n"
2663 " return nodes\n"
2664#else
2665 " if tag is None or node.tag == tag:\n"
2666 " yield node\n"
2667 " for node in node:\n"
2668 " for node in getiterator(node, tag):\n"
2669 " yield node\n"
2670#endif
2671
2672 "def parse(source, parser=None):\n" /* public */
2673 " tree = ElementTree()\n"
2674 " tree.parse(source, parser)\n"
2675 " return tree\n"
2676 "cElementTree.parse = parse\n"
2677
2678#if (PY_VERSION_HEX < 0x02020000)
2679 "if hasattr(ET, 'iterparse'):\n"
2680 " cElementTree.iterparse = ET.iterparse\n" /* delegate on 2.1 */
2681#else
2682 "class iterparse(object):\n"
2683 " root = None\n"
2684 " def __init__(self, file, events=None):\n"
2685 " if not hasattr(file, 'read'):\n"
2686 " file = open(file, 'rb')\n"
2687 " self._file = file\n"
2688 " self._events = events\n"
2689 " def __iter__(self):\n"
2690 " events = []\n"
2691 " b = cElementTree.TreeBuilder()\n"
2692 " p = cElementTree.XMLParser(b)\n"
2693 " p._setevents(events, self._events)\n"
2694 " while 1:\n"
2695 " data = self._file.read(16384)\n"
2696 " if not data:\n"
2697 " break\n"
2698 " p.feed(data)\n"
2699 " for event in events:\n"
2700 " yield event\n"
2701 " del events[:]\n"
2702 " root = p.close()\n"
2703 " for event in events:\n"
2704 " yield event\n"
2705 " self.root = root\n"
2706 "cElementTree.iterparse = iterparse\n"
2707#endif
2708
2709 "def PI(target, text=None):\n" /* public */
2710 " element = cElementTree.Element(ET.ProcessingInstruction)\n"
2711 " element.text = target\n"
2712 " if text:\n"
2713 " element.text = element.text + ' ' + text\n"
2714 " return element\n"
2715
2716 " elem = cElementTree.Element(ET.PI)\n"
2717 " elem.text = text\n"
2718 " return elem\n"
2719 "cElementTree.PI = cElementTree.ProcessingInstruction = PI\n"
2720
2721 "def XML(text):\n" /* public */
2722 " parser = cElementTree.XMLParser()\n"
2723 " parser.feed(text)\n"
2724 " return parser.close()\n"
2725 "cElementTree.XML = cElementTree.fromstring = XML\n"
2726
2727 "def XMLID(text):\n" /* public */
2728 " tree = XML(text)\n"
2729 " ids = {}\n"
2730 " for elem in tree.getiterator():\n"
2731 " id = elem.get('id')\n"
2732 " if id:\n"
2733 " ids[id] = elem\n"
2734 " return tree, ids\n"
2735 "cElementTree.XMLID = XMLID\n"
2736
2737 "cElementTree.dump = ET.dump\n"
2738 "cElementTree.ElementPath = ElementPath = ET.ElementPath\n"
2739 "cElementTree.iselement = ET.iselement\n"
2740 "cElementTree.QName = ET.QName\n"
2741 "cElementTree.tostring = ET.tostring\n"
2742 "cElementTree.VERSION = '" VERSION "'\n"
2743 "cElementTree.__version__ = '" VERSION "'\n"
2744 "cElementTree.XMLParserError = SyntaxError\n"
2745
2746 );
2747
2748 PyRun_String(bootstrap, Py_file_input, g, NULL);
2749
2750 elementpath_obj = PyDict_GetItemString(g, "ElementPath");
2751
2752 elementtree_copyelement_obj = PyDict_GetItemString(g, "copyelement");
2753 if (elementtree_copyelement_obj) {
2754 /* reduce hack needed; enable reduce method */
2755 PyMethodDef* mp;
2756 for (mp = element_methods; mp->ml_name; mp++)
2757 if (mp->ml_meth == (PyCFunction) element_reduce) {
2758 mp->ml_name = "__reduce__";
2759 break;
2760 }
2761 } else
2762 PyErr_Clear();
2763 elementtree_deepcopy_obj = PyDict_GetItemString(g, "deepcopy");
2764 elementtree_getiterator_obj = PyDict_GetItemString(g, "getiterator");
2765
2766#if defined(USE_PYEXPAT_CAPI)
2767 /* link against pyexpat, if possible */
2768 capi = PyCObject_Import("pyexpat", "expat_CAPI");
2769 if (capi &&
2770 strcmp(capi->magic, PyExpat_CAPI_MAGIC) == 0 &&
2771 capi->size <= sizeof(*expat_capi) &&
2772 capi->MAJOR_VERSION == XML_MAJOR_VERSION &&
2773 capi->MINOR_VERSION == XML_MINOR_VERSION &&
2774 capi->MICRO_VERSION == XML_MICRO_VERSION)
2775 expat_capi = capi;
2776 else
2777 expat_capi = NULL;
2778#endif
2779
2780}