blob: 72c157d3698b8a87f39f2715d40789668c079100 [file] [log] [blame]
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001/*
2 * ElementTree
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003 * $Id: _elementtree.c 2657 2006-03-12 20:50:32Z fredrik $
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
5 * elementtree accelerator
6 *
7 * History:
8 * 1999-06-20 fl created (as part of sgmlop)
9 * 2001-05-29 fl effdom edition
10 * 2001-06-05 fl backported to unix; fixed bogus free in clear
11 * 2001-07-10 fl added findall helper
12 * 2003-02-27 fl elementtree edition (alpha)
13 * 2004-06-03 fl updates for elementtree 1.2
14 * 2005-01-05 fl added universal name cache, Element/SubElement factories
15 * 2005-01-06 fl moved python helpers into C module; removed 1.5.2 support
16 * 2005-01-07 fl added 2.1 support; work around broken __copy__ in 2.3
17 * 2005-01-08 fl added makeelement method; fixed path support
18 * 2005-01-10 fl optimized memory usage
19 * 2005-01-11 fl first public release (cElementTree 0.8)
20 * 2005-01-12 fl split element object into base and extras
21 * 2005-01-13 fl use tagged pointers for tail/text (cElementTree 0.9)
22 * 2005-01-17 fl added treebuilder close method
23 * 2005-01-17 fl fixed crash in getchildren
24 * 2005-01-18 fl removed observer api, added iterparse (cElementTree 0.9.3)
25 * 2005-01-23 fl revised iterparse api; added namespace event support (0.9.8)
26 * 2005-01-26 fl added VERSION module property (cElementTree 1.0)
27 * 2005-01-28 fl added remove method (1.0.1)
28 * 2005-03-01 fl added iselement function; fixed makeelement aliasing (1.0.2)
29 * 2005-03-13 fl export Comment and ProcessingInstruction/PI helpers
30 * 2005-03-26 fl added Comment and PI support to XMLParser
31 * 2005-03-27 fl event optimizations; complain about bogus events
32 * 2005-08-08 fl fixed read error handling in parse
33 * 2005-08-11 fl added runtime test for copy workaround (1.0.3)
34 * 2005-12-13 fl added expat_capi support (for xml.etree) (1.0.4)
35 * 2005-12-16 fl added support for non-standard encodings
Fredrik Lundh44ed4db2006-03-12 21:06:35 +000036 * 2006-03-08 fl fixed a couple of potential null-refs and leaks
37 * 2006-03-12 fl merge in 2.5 ssize_t changes
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000038 *
Fredrik Lundh44ed4db2006-03-12 21:06:35 +000039 * Copyright (c) 1999-2006 by Secret Labs AB. All rights reserved.
40 * Copyright (c) 1999-2006 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000041 *
42 * info@pythonware.com
43 * http://www.pythonware.com
44 */
45
Fredrik Lundh6d52b552005-12-16 22:06:43 +000046/* Licensed to PSF under a Contributor Agreement. */
47/* See http://www.python.org/2.4/license for licensing details. */
48
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000049#include "Python.h"
50
Thomas Wouters00ee7ba2006-08-21 19:07:27 +000051#define VERSION "1.0.6"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000052
53/* -------------------------------------------------------------------- */
54/* configuration */
55
56/* Leave defined to include the expat-based XMLParser type */
57#define USE_EXPAT
58
59/* Define to to all expat calls via pyexpat's embedded expat library */
60/* #define USE_PYEXPAT_CAPI */
61
62/* An element can hold this many children without extra memory
63 allocations. */
64#define STATIC_CHILDREN 4
65
66/* For best performance, chose a value so that 80-90% of all nodes
67 have no more than the given number of children. Set this to zero
68 to minimize the size of the element structure itself (this only
69 helps if you have lots of leaf nodes with attributes). */
70
71/* Also note that pymalloc always allocates blocks in multiples of
72 eight bytes. For the current version of cElementTree, this means
73 that the number of children should be an even number, at least on
74 32-bit platforms. */
75
76/* -------------------------------------------------------------------- */
77
78#if 0
79static int memory = 0;
80#define ALLOC(size, comment)\
81do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
82#define RELEASE(size, comment)\
83do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
84#else
85#define ALLOC(size, comment)
86#define RELEASE(size, comment)
87#endif
88
89/* compiler tweaks */
90#if defined(_MSC_VER)
91#define LOCAL(type) static __inline type __fastcall
92#else
93#define LOCAL(type) static type
94#endif
95
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000096/* macros used to store 'join' flags in string object pointers. note
97 that all use of text and tail as object pointers must be wrapped in
98 JOIN_OBJ. see comments in the ElementObject definition for more
99 info. */
100#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
101#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
102#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~1))
103
104/* glue functions (see the init function for details) */
105static PyObject* elementtree_copyelement_obj;
106static PyObject* elementtree_deepcopy_obj;
107static PyObject* elementtree_getiterator_obj;
108static PyObject* elementpath_obj;
109
110/* helpers */
111
112LOCAL(PyObject*)
113deepcopy(PyObject* object, PyObject* memo)
114{
115 /* do a deep copy of the given object */
116
117 PyObject* args;
118 PyObject* result;
119
120 if (!elementtree_deepcopy_obj) {
121 PyErr_SetString(
122 PyExc_RuntimeError,
123 "deepcopy helper not found"
124 );
125 return NULL;
126 }
127
128 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000129 if (!args)
130 return NULL;
131
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000132 Py_INCREF(object); PyTuple_SET_ITEM(args, 0, (PyObject*) object);
133 Py_INCREF(memo); PyTuple_SET_ITEM(args, 1, (PyObject*) memo);
134
135 result = PyObject_CallObject(elementtree_deepcopy_obj, args);
136
137 Py_DECREF(args);
138
139 return result;
140}
141
142LOCAL(PyObject*)
143list_join(PyObject* list)
144{
145 /* join list elements (destroying the list in the process) */
146
147 PyObject* joiner;
148 PyObject* function;
149 PyObject* args;
150 PyObject* result;
151
152 switch (PyList_GET_SIZE(list)) {
153 case 0:
154 Py_DECREF(list);
155 return PyString_FromString("");
156 case 1:
157 result = PyList_GET_ITEM(list, 0);
158 Py_INCREF(result);
159 Py_DECREF(list);
160 return result;
161 }
162
163 /* two or more elements: slice out a suitable separator from the
164 first member, and use that to join the entire list */
165
166 joiner = PySequence_GetSlice(PyList_GET_ITEM(list, 0), 0, 0);
167 if (!joiner)
168 return NULL;
169
170 function = PyObject_GetAttrString(joiner, "join");
171 if (!function) {
172 Py_DECREF(joiner);
173 return NULL;
174 }
175
176 args = PyTuple_New(1);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000177 if (!args)
178 return NULL;
179
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000180 PyTuple_SET_ITEM(args, 0, list);
181
182 result = PyObject_CallObject(function, args);
183
184 Py_DECREF(args); /* also removes list */
185 Py_DECREF(function);
186 Py_DECREF(joiner);
187
188 return result;
189}
190
191#if (PY_VERSION_HEX < 0x02020000)
192LOCAL(int)
193PyDict_Update(PyObject* dict, PyObject* other)
194{
195 /* PyDict_Update emulation for 2.1 and earlier */
196
197 PyObject* res;
198
199 res = PyObject_CallMethod(dict, "update", "O", other);
200 if (!res)
201 return -1;
202
203 Py_DECREF(res);
204 return 0;
205}
206#endif
207
208/* -------------------------------------------------------------------- */
209/* the element type */
210
211typedef struct {
212
213 /* attributes (a dictionary object), or None if no attributes */
214 PyObject* attrib;
215
216 /* child elements */
217 int length; /* actual number of items */
218 int allocated; /* allocated items */
219
220 /* this either points to _children or to a malloced buffer */
221 PyObject* *children;
222
223 PyObject* _children[STATIC_CHILDREN];
224
225} ElementObjectExtra;
226
227typedef struct {
228 PyObject_HEAD
229
230 /* element tag (a string). */
231 PyObject* tag;
232
233 /* text before first child. note that this is a tagged pointer;
234 use JOIN_OBJ to get the object pointer. the join flag is used
235 to distinguish lists created by the tree builder from lists
236 assigned to the attribute by application code; the former
237 should be joined before being returned to the user, the latter
238 should be left intact. */
239 PyObject* text;
240
241 /* text after this element, in parent. note that this is a tagged
242 pointer; use JOIN_OBJ to get the object pointer. */
243 PyObject* tail;
244
245 ElementObjectExtra* extra;
246
247} ElementObject;
248
Neal Norwitz227b5332006-03-22 09:28:35 +0000249static PyTypeObject Element_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000250
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000251#define Element_CheckExact(op) (Py_Type(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000252
253/* -------------------------------------------------------------------- */
254/* element constructor and destructor */
255
256LOCAL(int)
257element_new_extra(ElementObject* self, PyObject* attrib)
258{
259 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
260 if (!self->extra)
261 return -1;
262
263 if (!attrib)
264 attrib = Py_None;
265
266 Py_INCREF(attrib);
267 self->extra->attrib = attrib;
268
269 self->extra->length = 0;
270 self->extra->allocated = STATIC_CHILDREN;
271 self->extra->children = self->extra->_children;
272
273 return 0;
274}
275
276LOCAL(void)
277element_dealloc_extra(ElementObject* self)
278{
279 int i;
280
281 Py_DECREF(self->extra->attrib);
282
283 for (i = 0; i < self->extra->length; i++)
284 Py_DECREF(self->extra->children[i]);
285
286 if (self->extra->children != self->extra->_children)
287 PyObject_Free(self->extra->children);
288
289 PyObject_Free(self->extra);
290}
291
292LOCAL(PyObject*)
293element_new(PyObject* tag, PyObject* attrib)
294{
295 ElementObject* self;
296
297 self = PyObject_New(ElementObject, &Element_Type);
298 if (self == NULL)
299 return NULL;
300
301 /* use None for empty dictionaries */
302 if (PyDict_CheckExact(attrib) && !PyDict_Size(attrib))
303 attrib = Py_None;
304
305 self->extra = NULL;
306
307 if (attrib != Py_None) {
308
Thomas Wouters477c8d52006-05-27 19:21:47 +0000309 if (element_new_extra(self, attrib) < 0) {
310 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000311 return NULL;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000312 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000313
314 self->extra->length = 0;
315 self->extra->allocated = STATIC_CHILDREN;
316 self->extra->children = self->extra->_children;
317
318 }
319
320 Py_INCREF(tag);
321 self->tag = tag;
322
323 Py_INCREF(Py_None);
324 self->text = Py_None;
325
326 Py_INCREF(Py_None);
327 self->tail = Py_None;
328
329 ALLOC(sizeof(ElementObject), "create element");
330
331 return (PyObject*) self;
332}
333
334LOCAL(int)
335element_resize(ElementObject* self, int extra)
336{
337 int size;
338 PyObject* *children;
339
340 /* make sure self->children can hold the given number of extra
341 elements. set an exception and return -1 if allocation failed */
342
343 if (!self->extra)
344 element_new_extra(self, NULL);
345
346 size = self->extra->length + extra;
347
348 if (size > self->extra->allocated) {
349 /* use Python 2.4's list growth strategy */
350 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
351 if (self->extra->children != self->extra->_children) {
352 children = PyObject_Realloc(self->extra->children,
353 size * sizeof(PyObject*));
354 if (!children)
355 goto nomemory;
356 } else {
357 children = PyObject_Malloc(size * sizeof(PyObject*));
358 if (!children)
359 goto nomemory;
360 /* copy existing children from static area to malloc buffer */
361 memcpy(children, self->extra->children,
362 self->extra->length * sizeof(PyObject*));
363 }
364 self->extra->children = children;
365 self->extra->allocated = size;
366 }
367
368 return 0;
369
370 nomemory:
371 PyErr_NoMemory();
372 return -1;
373}
374
375LOCAL(int)
376element_add_subelement(ElementObject* self, PyObject* element)
377{
378 /* add a child element to a parent */
379
380 if (element_resize(self, 1) < 0)
381 return -1;
382
383 Py_INCREF(element);
384 self->extra->children[self->extra->length] = element;
385
386 self->extra->length++;
387
388 return 0;
389}
390
391LOCAL(PyObject*)
392element_get_attrib(ElementObject* self)
393{
394 /* return borrowed reference to attrib dictionary */
395 /* note: this function assumes that the extra section exists */
396
397 PyObject* res = self->extra->attrib;
398
399 if (res == Py_None) {
400 /* create missing dictionary */
401 res = PyDict_New();
402 if (!res)
403 return NULL;
404 self->extra->attrib = res;
405 }
406
407 return res;
408}
409
410LOCAL(PyObject*)
411element_get_text(ElementObject* self)
412{
413 /* return borrowed reference to text attribute */
414
415 PyObject* res = self->text;
416
417 if (JOIN_GET(res)) {
418 res = JOIN_OBJ(res);
419 if (PyList_CheckExact(res)) {
420 res = list_join(res);
421 if (!res)
422 return NULL;
423 self->text = res;
424 }
425 }
426
427 return res;
428}
429
430LOCAL(PyObject*)
431element_get_tail(ElementObject* self)
432{
433 /* return borrowed reference to text attribute */
434
435 PyObject* res = self->tail;
436
437 if (JOIN_GET(res)) {
438 res = JOIN_OBJ(res);
439 if (PyList_CheckExact(res)) {
440 res = list_join(res);
441 if (!res)
442 return NULL;
443 self->tail = res;
444 }
445 }
446
447 return res;
448}
449
450static PyObject*
451element(PyObject* self, PyObject* args, PyObject* kw)
452{
453 PyObject* elem;
454
455 PyObject* tag;
456 PyObject* attrib = NULL;
457 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag,
458 &PyDict_Type, &attrib))
459 return NULL;
460
461 if (attrib || kw) {
462 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
463 if (!attrib)
464 return NULL;
465 if (kw)
466 PyDict_Update(attrib, kw);
467 } else {
468 Py_INCREF(Py_None);
469 attrib = Py_None;
470 }
471
472 elem = element_new(tag, attrib);
473
474 Py_DECREF(attrib);
475
476 return elem;
477}
478
479static PyObject*
480subelement(PyObject* self, PyObject* args, PyObject* kw)
481{
482 PyObject* elem;
483
484 ElementObject* parent;
485 PyObject* tag;
486 PyObject* attrib = NULL;
487 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
488 &Element_Type, &parent, &tag,
489 &PyDict_Type, &attrib))
490 return NULL;
491
492 if (attrib || kw) {
493 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
494 if (!attrib)
495 return NULL;
496 if (kw)
497 PyDict_Update(attrib, kw);
498 } else {
499 Py_INCREF(Py_None);
500 attrib = Py_None;
501 }
502
503 elem = element_new(tag, attrib);
504
505 Py_DECREF(attrib);
506
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000507 if (element_add_subelement(parent, elem) < 0) {
508 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000509 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000510 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000511
512 return elem;
513}
514
515static void
516element_dealloc(ElementObject* self)
517{
518 if (self->extra)
519 element_dealloc_extra(self);
520
521 /* discard attributes */
522 Py_DECREF(self->tag);
523 Py_DECREF(JOIN_OBJ(self->text));
524 Py_DECREF(JOIN_OBJ(self->tail));
525
526 RELEASE(sizeof(ElementObject), "destroy element");
527
528 PyObject_Del(self);
529}
530
531/* -------------------------------------------------------------------- */
532/* methods (in alphabetical order) */
533
534static PyObject*
535element_append(ElementObject* self, PyObject* args)
536{
537 PyObject* element;
538 if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element))
539 return NULL;
540
541 if (element_add_subelement(self, element) < 0)
542 return NULL;
543
544 Py_RETURN_NONE;
545}
546
547static PyObject*
548element_clear(ElementObject* self, PyObject* args)
549{
550 if (!PyArg_ParseTuple(args, ":clear"))
551 return NULL;
552
553 if (self->extra) {
554 element_dealloc_extra(self);
555 self->extra = NULL;
556 }
557
558 Py_INCREF(Py_None);
559 Py_DECREF(JOIN_OBJ(self->text));
560 self->text = Py_None;
561
562 Py_INCREF(Py_None);
563 Py_DECREF(JOIN_OBJ(self->tail));
564 self->tail = Py_None;
565
566 Py_RETURN_NONE;
567}
568
569static PyObject*
570element_copy(ElementObject* self, PyObject* args)
571{
572 int i;
573 ElementObject* element;
574
575 if (!PyArg_ParseTuple(args, ":__copy__"))
576 return NULL;
577
578 element = (ElementObject*) element_new(
579 self->tag, (self->extra) ? self->extra->attrib : Py_None
580 );
581 if (!element)
582 return NULL;
583
584 Py_DECREF(JOIN_OBJ(element->text));
585 element->text = self->text;
586 Py_INCREF(JOIN_OBJ(element->text));
587
588 Py_DECREF(JOIN_OBJ(element->tail));
589 element->tail = self->tail;
590 Py_INCREF(JOIN_OBJ(element->tail));
591
592 if (self->extra) {
593
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000594 if (element_resize(element, self->extra->length) < 0) {
595 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000596 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000597 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000598
599 for (i = 0; i < self->extra->length; i++) {
600 Py_INCREF(self->extra->children[i]);
601 element->extra->children[i] = self->extra->children[i];
602 }
603
604 element->extra->length = self->extra->length;
605
606 }
607
608 return (PyObject*) element;
609}
610
611static PyObject*
612element_deepcopy(ElementObject* self, PyObject* args)
613{
614 int i;
615 ElementObject* element;
616 PyObject* tag;
617 PyObject* attrib;
618 PyObject* text;
619 PyObject* tail;
620 PyObject* id;
621
622 PyObject* memo;
623 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
624 return NULL;
625
626 tag = deepcopy(self->tag, memo);
627 if (!tag)
628 return NULL;
629
630 if (self->extra) {
631 attrib = deepcopy(self->extra->attrib, memo);
632 if (!attrib) {
633 Py_DECREF(tag);
634 return NULL;
635 }
636 } else {
637 Py_INCREF(Py_None);
638 attrib = Py_None;
639 }
640
641 element = (ElementObject*) element_new(tag, attrib);
642
643 Py_DECREF(tag);
644 Py_DECREF(attrib);
645
646 if (!element)
647 return NULL;
648
649 text = deepcopy(JOIN_OBJ(self->text), memo);
650 if (!text)
651 goto error;
652 Py_DECREF(element->text);
653 element->text = JOIN_SET(text, JOIN_GET(self->text));
654
655 tail = deepcopy(JOIN_OBJ(self->tail), memo);
656 if (!tail)
657 goto error;
658 Py_DECREF(element->tail);
659 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
660
661 if (self->extra) {
662
663 if (element_resize(element, self->extra->length) < 0)
664 goto error;
665
666 for (i = 0; i < self->extra->length; i++) {
667 PyObject* child = deepcopy(self->extra->children[i], memo);
668 if (!child) {
669 element->extra->length = i;
670 goto error;
671 }
672 element->extra->children[i] = child;
673 }
674
675 element->extra->length = self->extra->length;
676
677 }
678
679 /* add object to memo dictionary (so deepcopy won't visit it again) */
Christian Heimes217cfd12007-12-02 14:31:20 +0000680 id = PyLong_FromLong((Py_uintptr_t) self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000681
682 i = PyDict_SetItem(memo, id, (PyObject*) element);
683
684 Py_DECREF(id);
685
686 if (i < 0)
687 goto error;
688
689 return (PyObject*) element;
690
691 error:
692 Py_DECREF(element);
693 return NULL;
694}
695
696LOCAL(int)
697checkpath(PyObject* tag)
698{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000699 Py_ssize_t i;
700 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000701
702 /* check if a tag contains an xpath character */
703
704#define PATHCHAR(ch) (ch == '/' || ch == '*' || ch == '[' || ch == '@')
705
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000706 if (PyUnicode_Check(tag)) {
707 Py_UNICODE *p = PyUnicode_AS_UNICODE(tag);
708 for (i = 0; i < PyUnicode_GET_SIZE(tag); i++) {
709 if (p[i] == '{')
710 check = 0;
711 else if (p[i] == '}')
712 check = 1;
713 else if (check && PATHCHAR(p[i]))
714 return 1;
715 }
716 return 0;
717 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000718 if (PyString_Check(tag)) {
719 char *p = PyString_AS_STRING(tag);
720 for (i = 0; i < PyString_GET_SIZE(tag); i++) {
721 if (p[i] == '{')
722 check = 0;
723 else if (p[i] == '}')
724 check = 1;
725 else if (check && PATHCHAR(p[i]))
726 return 1;
727 }
728 return 0;
729 }
730
731 return 1; /* unknown type; might be path expression */
732}
733
734static PyObject*
735element_find(ElementObject* self, PyObject* args)
736{
737 int i;
738
739 PyObject* tag;
740 if (!PyArg_ParseTuple(args, "O:find", &tag))
741 return NULL;
742
743 if (checkpath(tag))
744 return PyObject_CallMethod(
745 elementpath_obj, "find", "OO", self, tag
746 );
747
748 if (!self->extra)
749 Py_RETURN_NONE;
750
751 for (i = 0; i < self->extra->length; i++) {
752 PyObject* item = self->extra->children[i];
753 if (Element_CheckExact(item) &&
754 PyObject_Compare(((ElementObject*)item)->tag, tag) == 0) {
755 Py_INCREF(item);
756 return item;
757 }
758 }
759
760 Py_RETURN_NONE;
761}
762
763static PyObject*
764element_findtext(ElementObject* self, PyObject* args)
765{
766 int i;
767
768 PyObject* tag;
769 PyObject* default_value = Py_None;
770 if (!PyArg_ParseTuple(args, "O|O:findtext", &tag, &default_value))
771 return NULL;
772
773 if (checkpath(tag))
774 return PyObject_CallMethod(
775 elementpath_obj, "findtext", "OOO", self, tag, default_value
776 );
777
778 if (!self->extra) {
779 Py_INCREF(default_value);
780 return default_value;
781 }
782
783 for (i = 0; i < self->extra->length; i++) {
784 ElementObject* item = (ElementObject*) self->extra->children[i];
785 if (Element_CheckExact(item) && !PyObject_Compare(item->tag, tag)) {
786 PyObject* text = element_get_text(item);
787 if (text == Py_None)
788 return PyString_FromString("");
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000789 Py_XINCREF(text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000790 return text;
791 }
792 }
793
794 Py_INCREF(default_value);
795 return default_value;
796}
797
798static PyObject*
799element_findall(ElementObject* self, PyObject* args)
800{
801 int i;
802 PyObject* out;
803
804 PyObject* tag;
805 if (!PyArg_ParseTuple(args, "O:findall", &tag))
806 return NULL;
807
808 if (checkpath(tag))
809 return PyObject_CallMethod(
810 elementpath_obj, "findall", "OO", self, tag
811 );
812
813 out = PyList_New(0);
814 if (!out)
815 return NULL;
816
817 if (!self->extra)
818 return out;
819
820 for (i = 0; i < self->extra->length; i++) {
821 PyObject* item = self->extra->children[i];
822 if (Element_CheckExact(item) &&
823 PyObject_Compare(((ElementObject*)item)->tag, tag) == 0) {
824 if (PyList_Append(out, item) < 0) {
825 Py_DECREF(out);
826 return NULL;
827 }
828 }
829 }
830
831 return out;
832}
833
834static PyObject*
835element_get(ElementObject* self, PyObject* args)
836{
837 PyObject* value;
838
839 PyObject* key;
840 PyObject* default_value = Py_None;
841 if (!PyArg_ParseTuple(args, "O|O:get", &key, &default_value))
842 return NULL;
843
844 if (!self->extra || self->extra->attrib == Py_None)
845 value = default_value;
846 else {
847 value = PyDict_GetItem(self->extra->attrib, key);
848 if (!value)
849 value = default_value;
850 }
851
852 Py_INCREF(value);
853 return value;
854}
855
856static PyObject*
857element_getchildren(ElementObject* self, PyObject* args)
858{
859 int i;
860 PyObject* list;
861
862 if (!PyArg_ParseTuple(args, ":getchildren"))
863 return NULL;
864
865 if (!self->extra)
866 return PyList_New(0);
867
868 list = PyList_New(self->extra->length);
869 if (!list)
870 return NULL;
871
872 for (i = 0; i < self->extra->length; i++) {
873 PyObject* item = self->extra->children[i];
874 Py_INCREF(item);
875 PyList_SET_ITEM(list, i, item);
876 }
877
878 return list;
879}
880
881static PyObject*
882element_getiterator(ElementObject* self, PyObject* args)
883{
884 PyObject* result;
885
886 PyObject* tag = Py_None;
887 if (!PyArg_ParseTuple(args, "|O:getiterator", &tag))
888 return NULL;
889
890 if (!elementtree_getiterator_obj) {
891 PyErr_SetString(
892 PyExc_RuntimeError,
893 "getiterator helper not found"
894 );
895 return NULL;
896 }
897
898 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000899 if (!args)
900 return NULL;
Neal Norwitz02876df2006-02-07 06:58:52 +0000901
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000902 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
903 Py_INCREF(tag); PyTuple_SET_ITEM(args, 1, (PyObject*) tag);
904
905 result = PyObject_CallObject(elementtree_getiterator_obj, args);
906
907 Py_DECREF(args);
908
909 return result;
910}
911
912static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000913element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000914{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000915 ElementObject* self = (ElementObject*) self_;
916
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000917 if (!self->extra || index < 0 || index >= self->extra->length) {
918 PyErr_SetString(
919 PyExc_IndexError,
920 "child index out of range"
921 );
922 return NULL;
923 }
924
925 Py_INCREF(self->extra->children[index]);
926 return self->extra->children[index];
927}
928
929static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000930element_getslice(PyObject* self_, Py_ssize_t start, Py_ssize_t end)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000931{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000932 ElementObject* self = (ElementObject*) self_;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000933 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000934 PyObject* list;
935
936 if (!self->extra)
937 return PyList_New(0);
938
939 /* standard clamping */
940 if (start < 0)
941 start = 0;
942 if (end < 0)
943 end = 0;
944 if (end > self->extra->length)
945 end = self->extra->length;
946 if (start > end)
947 start = end;
948
949 list = PyList_New(end - start);
950 if (!list)
951 return NULL;
952
953 for (i = start; i < end; i++) {
954 PyObject* item = self->extra->children[i];
955 Py_INCREF(item);
956 PyList_SET_ITEM(list, i - start, item);
957 }
958
959 return list;
960}
961
962static PyObject*
963element_insert(ElementObject* self, PyObject* args)
964{
965 int i;
966
967 int index;
968 PyObject* element;
969 if (!PyArg_ParseTuple(args, "iO!:insert", &index,
970 &Element_Type, &element))
971 return NULL;
972
973 if (!self->extra)
974 element_new_extra(self, NULL);
975
976 if (index < 0)
977 index = 0;
978 if (index > self->extra->length)
979 index = self->extra->length;
980
981 if (element_resize(self, 1) < 0)
982 return NULL;
983
984 for (i = self->extra->length; i > index; i--)
985 self->extra->children[i] = self->extra->children[i-1];
986
987 Py_INCREF(element);
988 self->extra->children[index] = element;
989
990 self->extra->length++;
991
992 Py_RETURN_NONE;
993}
994
995static PyObject*
996element_items(ElementObject* self, PyObject* args)
997{
998 if (!PyArg_ParseTuple(args, ":items"))
999 return NULL;
1000
1001 if (!self->extra || self->extra->attrib == Py_None)
1002 return PyList_New(0);
1003
1004 return PyDict_Items(self->extra->attrib);
1005}
1006
1007static PyObject*
1008element_keys(ElementObject* self, PyObject* args)
1009{
1010 if (!PyArg_ParseTuple(args, ":keys"))
1011 return NULL;
1012
1013 if (!self->extra || self->extra->attrib == Py_None)
1014 return PyList_New(0);
1015
1016 return PyDict_Keys(self->extra->attrib);
1017}
1018
Martin v. Löwis18e16552006-02-15 17:27:45 +00001019static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001020element_length(ElementObject* self)
1021{
1022 if (!self->extra)
1023 return 0;
1024
1025 return self->extra->length;
1026}
1027
1028static PyObject*
1029element_makeelement(PyObject* self, PyObject* args, PyObject* kw)
1030{
1031 PyObject* elem;
1032
1033 PyObject* tag;
1034 PyObject* attrib;
1035 if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib))
1036 return NULL;
1037
1038 attrib = PyDict_Copy(attrib);
1039 if (!attrib)
1040 return NULL;
1041
1042 elem = element_new(tag, attrib);
1043
1044 Py_DECREF(attrib);
1045
1046 return elem;
1047}
1048
1049static PyObject*
1050element_reduce(ElementObject* self, PyObject* args)
1051{
1052 if (!PyArg_ParseTuple(args, ":__reduce__"))
1053 return NULL;
1054
1055 /* Hack alert: This method is used to work around a __copy__
1056 problem on certain 2.3 and 2.4 versions. To save time and
1057 simplify the code, we create the copy in here, and use a dummy
1058 copyelement helper to trick the copy module into doing the
1059 right thing. */
1060
1061 if (!elementtree_copyelement_obj) {
1062 PyErr_SetString(
1063 PyExc_RuntimeError,
1064 "copyelement helper not found"
1065 );
1066 return NULL;
1067 }
1068
1069 return Py_BuildValue(
1070 "O(N)", elementtree_copyelement_obj, element_copy(self, args)
1071 );
1072}
1073
1074static PyObject*
1075element_remove(ElementObject* self, PyObject* args)
1076{
1077 int i;
1078
1079 PyObject* element;
1080 if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element))
1081 return NULL;
1082
1083 if (!self->extra) {
1084 /* element has no children, so raise exception */
1085 PyErr_SetString(
1086 PyExc_ValueError,
1087 "list.remove(x): x not in list"
1088 );
1089 return NULL;
1090 }
1091
1092 for (i = 0; i < self->extra->length; i++) {
1093 if (self->extra->children[i] == element)
1094 break;
1095 if (PyObject_Compare(self->extra->children[i], element) == 0)
1096 break;
1097 }
1098
1099 if (i == self->extra->length) {
1100 /* element is not in children, so raise exception */
1101 PyErr_SetString(
1102 PyExc_ValueError,
1103 "list.remove(x): x not in list"
1104 );
1105 return NULL;
1106 }
1107
1108 Py_DECREF(self->extra->children[i]);
1109
1110 self->extra->length--;
1111
1112 for (; i < self->extra->length; i++)
1113 self->extra->children[i] = self->extra->children[i+1];
1114
1115 Py_RETURN_NONE;
1116}
1117
1118static PyObject*
1119element_repr(ElementObject* self)
1120{
Walter Dörwald7569dfe2007-05-19 21:49:49 +00001121 return PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001122}
1123
1124static PyObject*
1125element_set(ElementObject* self, PyObject* args)
1126{
1127 PyObject* attrib;
1128
1129 PyObject* key;
1130 PyObject* value;
1131 if (!PyArg_ParseTuple(args, "OO:set", &key, &value))
1132 return NULL;
1133
1134 if (!self->extra)
1135 element_new_extra(self, NULL);
1136
1137 attrib = element_get_attrib(self);
1138 if (!attrib)
1139 return NULL;
1140
1141 if (PyDict_SetItem(attrib, key, value) < 0)
1142 return NULL;
1143
1144 Py_RETURN_NONE;
1145}
1146
1147static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001148element_setslice(PyObject* self_, Py_ssize_t start, Py_ssize_t end, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001149{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001150 ElementObject* self = (ElementObject*) self_;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001151 Py_ssize_t i, new, old;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001152 PyObject* recycle = NULL;
1153
1154 if (!self->extra)
1155 element_new_extra(self, NULL);
1156
1157 /* standard clamping */
1158 if (start < 0)
1159 start = 0;
1160 if (end < 0)
1161 end = 0;
1162 if (end > self->extra->length)
1163 end = self->extra->length;
1164 if (start > end)
1165 start = end;
1166
1167 old = end - start;
1168
1169 if (item == NULL)
1170 new = 0;
1171 else if (PyList_CheckExact(item)) {
1172 new = PyList_GET_SIZE(item);
1173 } else {
1174 /* FIXME: support arbitrary sequences? */
1175 PyErr_Format(
1176 PyExc_TypeError,
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001177 "expected list, not \"%.200s\"", Py_Type(item)->tp_name
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001178 );
1179 return -1;
1180 }
1181
1182 if (old > 0) {
1183 /* to avoid recursive calls to this method (via decref), move
1184 old items to the recycle bin here, and get rid of them when
1185 we're done modifying the element */
1186 recycle = PyList_New(old);
1187 for (i = 0; i < old; i++)
1188 PyList_SET_ITEM(recycle, i, self->extra->children[i + start]);
1189 }
1190
1191 if (new < old) {
1192 /* delete slice */
1193 for (i = end; i < self->extra->length; i++)
1194 self->extra->children[i + new - old] = self->extra->children[i];
1195 } else if (new > old) {
1196 /* insert slice */
1197 if (element_resize(self, new - old) < 0)
1198 return -1;
1199 for (i = self->extra->length-1; i >= end; i--)
1200 self->extra->children[i + new - old] = self->extra->children[i];
1201 }
1202
1203 /* replace the slice */
1204 for (i = 0; i < new; i++) {
1205 PyObject* element = PyList_GET_ITEM(item, i);
1206 Py_INCREF(element);
1207 self->extra->children[i + start] = element;
1208 }
1209
1210 self->extra->length += new - old;
1211
1212 /* discard the recycle bin, and everything in it */
1213 Py_XDECREF(recycle);
1214
1215 return 0;
1216}
1217
1218static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001219element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001220{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001221 ElementObject* self = (ElementObject*) self_;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001222 int i;
1223 PyObject* old;
1224
1225 if (!self->extra || index < 0 || index >= self->extra->length) {
1226 PyErr_SetString(
1227 PyExc_IndexError,
1228 "child assignment index out of range");
1229 return -1;
1230 }
1231
1232 old = self->extra->children[index];
1233
1234 if (item) {
1235 Py_INCREF(item);
1236 self->extra->children[index] = item;
1237 } else {
1238 self->extra->length--;
1239 for (i = index; i < self->extra->length; i++)
1240 self->extra->children[i] = self->extra->children[i+1];
1241 }
1242
1243 Py_DECREF(old);
1244
1245 return 0;
1246}
1247
1248static PyMethodDef element_methods[] = {
1249
1250 {"clear", (PyCFunction) element_clear, METH_VARARGS},
1251
1252 {"get", (PyCFunction) element_get, METH_VARARGS},
1253 {"set", (PyCFunction) element_set, METH_VARARGS},
1254
1255 {"find", (PyCFunction) element_find, METH_VARARGS},
1256 {"findtext", (PyCFunction) element_findtext, METH_VARARGS},
1257 {"findall", (PyCFunction) element_findall, METH_VARARGS},
1258
1259 {"append", (PyCFunction) element_append, METH_VARARGS},
1260 {"insert", (PyCFunction) element_insert, METH_VARARGS},
1261 {"remove", (PyCFunction) element_remove, METH_VARARGS},
1262
1263 {"getiterator", (PyCFunction) element_getiterator, METH_VARARGS},
1264 {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS},
1265
1266 {"items", (PyCFunction) element_items, METH_VARARGS},
1267 {"keys", (PyCFunction) element_keys, METH_VARARGS},
1268
1269 {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS},
1270
1271 {"__copy__", (PyCFunction) element_copy, METH_VARARGS},
1272 {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS},
1273
1274 /* Some 2.3 and 2.4 versions do not handle the __copy__ method on
1275 C objects correctly, so we have to fake it using a __reduce__-
1276 based hack (see the element_reduce implementation above for
1277 details). */
1278
1279 /* The behaviour has been changed in 2.3.5 and 2.4.1, so we're
1280 using a runtime test to figure out if we need to fake things
1281 or now (see the init code below). The following entry is
1282 enabled only if the hack is needed. */
1283
1284 {"!__reduce__", (PyCFunction) element_reduce, METH_VARARGS},
1285
1286 {NULL, NULL}
1287};
1288
1289static PyObject*
1290element_getattr(ElementObject* self, char* name)
1291{
1292 PyObject* res;
1293
1294 res = Py_FindMethod(element_methods, (PyObject*) self, name);
1295 if (res)
1296 return res;
1297
1298 PyErr_Clear();
1299
1300 if (strcmp(name, "tag") == 0)
1301 res = self->tag;
1302 else if (strcmp(name, "text") == 0)
1303 res = element_get_text(self);
1304 else if (strcmp(name, "tail") == 0) {
1305 res = element_get_tail(self);
1306 } else if (strcmp(name, "attrib") == 0) {
1307 if (!self->extra)
1308 element_new_extra(self, NULL);
1309 res = element_get_attrib(self);
1310 } else {
1311 PyErr_SetString(PyExc_AttributeError, name);
1312 return NULL;
1313 }
1314
1315 if (!res)
1316 return NULL;
1317
1318 Py_INCREF(res);
1319 return res;
1320}
1321
1322static int
1323element_setattr(ElementObject* self, const char* name, PyObject* value)
1324{
1325 if (value == NULL) {
1326 PyErr_SetString(
1327 PyExc_AttributeError,
1328 "can't delete element attributes"
1329 );
1330 return -1;
1331 }
1332
1333 if (strcmp(name, "tag") == 0) {
1334 Py_DECREF(self->tag);
1335 self->tag = value;
1336 Py_INCREF(self->tag);
1337 } else if (strcmp(name, "text") == 0) {
1338 Py_DECREF(JOIN_OBJ(self->text));
1339 self->text = value;
1340 Py_INCREF(self->text);
1341 } else if (strcmp(name, "tail") == 0) {
1342 Py_DECREF(JOIN_OBJ(self->tail));
1343 self->tail = value;
1344 Py_INCREF(self->tail);
1345 } else if (strcmp(name, "attrib") == 0) {
1346 if (!self->extra)
1347 element_new_extra(self, NULL);
1348 Py_DECREF(self->extra->attrib);
1349 self->extra->attrib = value;
1350 Py_INCREF(self->extra->attrib);
1351 } else {
1352 PyErr_SetString(PyExc_AttributeError, name);
1353 return -1;
1354 }
1355
1356 return 0;
1357}
1358
1359static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001360 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001361 0, /* sq_concat */
1362 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001363 element_getitem,
1364 element_getslice,
1365 element_setitem,
1366 element_setslice,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001367};
1368
Neal Norwitz227b5332006-03-22 09:28:35 +00001369static PyTypeObject Element_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001370 PyVarObject_HEAD_INIT(NULL, 0)
1371 "Element", sizeof(ElementObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001372 /* methods */
1373 (destructor)element_dealloc, /* tp_dealloc */
1374 0, /* tp_print */
1375 (getattrfunc)element_getattr, /* tp_getattr */
1376 (setattrfunc)element_setattr, /* tp_setattr */
1377 0, /* tp_compare */
1378 (reprfunc)element_repr, /* tp_repr */
1379 0, /* tp_as_number */
1380 &element_as_sequence, /* tp_as_sequence */
1381};
1382
1383/* ==================================================================== */
1384/* the tree builder type */
1385
1386typedef struct {
1387 PyObject_HEAD
1388
1389 PyObject* root; /* root node (first created node) */
1390
1391 ElementObject* this; /* current node */
1392 ElementObject* last; /* most recently created node */
1393
1394 PyObject* data; /* data collector (string or list), or NULL */
1395
1396 PyObject* stack; /* element stack */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001397 Py_ssize_t index; /* current stack size (0=empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001398
1399 /* element tracing */
1400 PyObject* events; /* list of events, or NULL if not collecting */
1401 PyObject* start_event_obj; /* event objects (NULL to ignore) */
1402 PyObject* end_event_obj;
1403 PyObject* start_ns_event_obj;
1404 PyObject* end_ns_event_obj;
1405
1406} TreeBuilderObject;
1407
Neal Norwitz227b5332006-03-22 09:28:35 +00001408static PyTypeObject TreeBuilder_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001409
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001410#define TreeBuilder_CheckExact(op) (Py_Type(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001411
1412/* -------------------------------------------------------------------- */
1413/* constructor and destructor */
1414
1415LOCAL(PyObject*)
1416treebuilder_new(void)
1417{
1418 TreeBuilderObject* self;
1419
1420 self = PyObject_New(TreeBuilderObject, &TreeBuilder_Type);
1421 if (self == NULL)
1422 return NULL;
1423
1424 self->root = NULL;
1425
1426 Py_INCREF(Py_None);
1427 self->this = (ElementObject*) Py_None;
1428
1429 Py_INCREF(Py_None);
1430 self->last = (ElementObject*) Py_None;
1431
1432 self->data = NULL;
1433
1434 self->stack = PyList_New(20);
1435 self->index = 0;
1436
1437 self->events = NULL;
1438 self->start_event_obj = self->end_event_obj = NULL;
1439 self->start_ns_event_obj = self->end_ns_event_obj = NULL;
1440
1441 ALLOC(sizeof(TreeBuilderObject), "create treebuilder");
1442
1443 return (PyObject*) self;
1444}
1445
1446static PyObject*
Thomas Wouters73e5a5b2006-06-08 15:35:45 +00001447treebuilder(PyObject* self_, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001448{
1449 if (!PyArg_ParseTuple(args, ":TreeBuilder"))
1450 return NULL;
1451
1452 return treebuilder_new();
1453}
1454
1455static void
1456treebuilder_dealloc(TreeBuilderObject* self)
1457{
1458 Py_XDECREF(self->end_ns_event_obj);
1459 Py_XDECREF(self->start_ns_event_obj);
1460 Py_XDECREF(self->end_event_obj);
1461 Py_XDECREF(self->start_event_obj);
1462 Py_XDECREF(self->events);
1463 Py_DECREF(self->stack);
1464 Py_XDECREF(self->data);
1465 Py_DECREF(self->last);
1466 Py_DECREF(self->this);
1467 Py_XDECREF(self->root);
1468
1469 RELEASE(sizeof(TreeBuilderObject), "destroy treebuilder");
1470
1471 PyObject_Del(self);
1472}
1473
1474/* -------------------------------------------------------------------- */
1475/* handlers */
1476
1477LOCAL(PyObject*)
1478treebuilder_handle_xml(TreeBuilderObject* self, PyObject* encoding,
1479 PyObject* standalone)
1480{
1481 Py_RETURN_NONE;
1482}
1483
1484LOCAL(PyObject*)
1485treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
1486 PyObject* attrib)
1487{
1488 PyObject* node;
1489 PyObject* this;
1490
1491 if (self->data) {
1492 if (self->this == self->last) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001493 Py_DECREF(JOIN_OBJ(self->last->text));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001494 self->last->text = JOIN_SET(
1495 self->data, PyList_CheckExact(self->data)
1496 );
1497 } else {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001498 Py_DECREF(JOIN_OBJ(self->last->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001499 self->last->tail = JOIN_SET(
1500 self->data, PyList_CheckExact(self->data)
1501 );
1502 }
1503 self->data = NULL;
1504 }
1505
1506 node = element_new(tag, attrib);
1507 if (!node)
1508 return NULL;
1509
1510 this = (PyObject*) self->this;
1511
1512 if (this != Py_None) {
1513 if (element_add_subelement((ElementObject*) this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001514 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001515 } else {
1516 if (self->root) {
1517 PyErr_SetString(
1518 PyExc_SyntaxError,
1519 "multiple elements on top level"
1520 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001521 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001522 }
1523 Py_INCREF(node);
1524 self->root = node;
1525 }
1526
1527 if (self->index < PyList_GET_SIZE(self->stack)) {
1528 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001529 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001530 Py_INCREF(this);
1531 } else {
1532 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001533 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001534 }
1535 self->index++;
1536
1537 Py_DECREF(this);
1538 Py_INCREF(node);
1539 self->this = (ElementObject*) node;
1540
1541 Py_DECREF(self->last);
1542 Py_INCREF(node);
1543 self->last = (ElementObject*) node;
1544
1545 if (self->start_event_obj) {
1546 PyObject* res;
1547 PyObject* action = self->start_event_obj;
1548 res = PyTuple_New(2);
1549 if (res) {
1550 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
1551 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
1552 PyList_Append(self->events, res);
1553 Py_DECREF(res);
1554 } else
1555 PyErr_Clear(); /* FIXME: propagate error */
1556 }
1557
1558 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001559
1560 error:
1561 Py_DECREF(node);
1562 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001563}
1564
1565LOCAL(PyObject*)
1566treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
1567{
1568 if (!self->data) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001569 if (self->last == (ElementObject*) Py_None) {
1570 /* ignore calls to data before the first call to start */
1571 Py_RETURN_NONE;
1572 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001573 /* store the first item as is */
1574 Py_INCREF(data); self->data = data;
1575 } else {
1576 /* more than one item; use a list to collect items */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001577 if (PyString_CheckExact(self->data) && Py_Refcnt(self->data) == 1 &&
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001578 PyString_CheckExact(data) && PyString_GET_SIZE(data) == 1) {
1579 /* expat often generates single character data sections; handle
1580 the most common case by resizing the existing string... */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001581 Py_ssize_t size = PyString_GET_SIZE(self->data);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001582 if (_PyString_Resize(&self->data, size + 1) < 0)
1583 return NULL;
1584 PyString_AS_STRING(self->data)[size] = PyString_AS_STRING(data)[0];
1585 } else if (PyList_CheckExact(self->data)) {
1586 if (PyList_Append(self->data, data) < 0)
1587 return NULL;
1588 } else {
1589 PyObject* list = PyList_New(2);
1590 if (!list)
1591 return NULL;
1592 PyList_SET_ITEM(list, 0, self->data);
1593 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
1594 self->data = list;
1595 }
1596 }
1597
1598 Py_RETURN_NONE;
1599}
1600
1601LOCAL(PyObject*)
1602treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
1603{
1604 PyObject* item;
1605
1606 if (self->data) {
1607 if (self->this == self->last) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001608 Py_DECREF(JOIN_OBJ(self->last->text));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001609 self->last->text = JOIN_SET(
1610 self->data, PyList_CheckExact(self->data)
1611 );
1612 } else {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001613 Py_DECREF(JOIN_OBJ(self->last->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001614 self->last->tail = JOIN_SET(
1615 self->data, PyList_CheckExact(self->data)
1616 );
1617 }
1618 self->data = NULL;
1619 }
1620
1621 if (self->index == 0) {
1622 PyErr_SetString(
1623 PyExc_IndexError,
1624 "pop from empty stack"
1625 );
1626 return NULL;
1627 }
1628
1629 self->index--;
1630
1631 item = PyList_GET_ITEM(self->stack, self->index);
1632 Py_INCREF(item);
1633
1634 Py_DECREF(self->last);
1635
1636 self->last = (ElementObject*) self->this;
1637 self->this = (ElementObject*) item;
1638
1639 if (self->end_event_obj) {
1640 PyObject* res;
1641 PyObject* action = self->end_event_obj;
1642 PyObject* node = (PyObject*) self->last;
1643 res = PyTuple_New(2);
1644 if (res) {
1645 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
1646 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
1647 PyList_Append(self->events, res);
1648 Py_DECREF(res);
1649 } else
1650 PyErr_Clear(); /* FIXME: propagate error */
1651 }
1652
1653 Py_INCREF(self->last);
1654 return (PyObject*) self->last;
1655}
1656
1657LOCAL(void)
1658treebuilder_handle_namespace(TreeBuilderObject* self, int start,
1659 const char* prefix, const char *uri)
1660{
1661 PyObject* res;
1662 PyObject* action;
1663 PyObject* parcel;
1664
1665 if (!self->events)
1666 return;
1667
1668 if (start) {
1669 if (!self->start_ns_event_obj)
1670 return;
1671 action = self->start_ns_event_obj;
1672 /* FIXME: prefix and uri use utf-8 encoding! */
1673 parcel = Py_BuildValue("ss", (prefix) ? prefix : "", uri);
1674 if (!parcel)
1675 return;
1676 Py_INCREF(action);
1677 } else {
1678 if (!self->end_ns_event_obj)
1679 return;
1680 action = self->end_ns_event_obj;
1681 Py_INCREF(action);
1682 parcel = Py_None;
1683 Py_INCREF(parcel);
1684 }
1685
1686 res = PyTuple_New(2);
1687
1688 if (res) {
1689 PyTuple_SET_ITEM(res, 0, action);
1690 PyTuple_SET_ITEM(res, 1, parcel);
1691 PyList_Append(self->events, res);
1692 Py_DECREF(res);
1693 } else
1694 PyErr_Clear(); /* FIXME: propagate error */
1695}
1696
1697/* -------------------------------------------------------------------- */
1698/* methods (in alphabetical order) */
1699
1700static PyObject*
1701treebuilder_data(TreeBuilderObject* self, PyObject* args)
1702{
1703 PyObject* data;
1704 if (!PyArg_ParseTuple(args, "O:data", &data))
1705 return NULL;
1706
1707 return treebuilder_handle_data(self, data);
1708}
1709
1710static PyObject*
1711treebuilder_end(TreeBuilderObject* self, PyObject* args)
1712{
1713 PyObject* tag;
1714 if (!PyArg_ParseTuple(args, "O:end", &tag))
1715 return NULL;
1716
1717 return treebuilder_handle_end(self, tag);
1718}
1719
1720LOCAL(PyObject*)
1721treebuilder_done(TreeBuilderObject* self)
1722{
1723 PyObject* res;
1724
1725 /* FIXME: check stack size? */
1726
1727 if (self->root)
1728 res = self->root;
1729 else
1730 res = Py_None;
1731
1732 Py_INCREF(res);
1733 return res;
1734}
1735
1736static PyObject*
1737treebuilder_close(TreeBuilderObject* self, PyObject* args)
1738{
1739 if (!PyArg_ParseTuple(args, ":close"))
1740 return NULL;
1741
1742 return treebuilder_done(self);
1743}
1744
1745static PyObject*
1746treebuilder_start(TreeBuilderObject* self, PyObject* args)
1747{
1748 PyObject* tag;
1749 PyObject* attrib = Py_None;
1750 if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib))
1751 return NULL;
1752
1753 return treebuilder_handle_start(self, tag, attrib);
1754}
1755
1756static PyObject*
1757treebuilder_xml(TreeBuilderObject* self, PyObject* args)
1758{
1759 PyObject* encoding;
1760 PyObject* standalone;
1761 if (!PyArg_ParseTuple(args, "OO:xml", &encoding, &standalone))
1762 return NULL;
1763
1764 return treebuilder_handle_xml(self, encoding, standalone);
1765}
1766
1767static PyMethodDef treebuilder_methods[] = {
1768 {"data", (PyCFunction) treebuilder_data, METH_VARARGS},
1769 {"start", (PyCFunction) treebuilder_start, METH_VARARGS},
1770 {"end", (PyCFunction) treebuilder_end, METH_VARARGS},
1771 {"xml", (PyCFunction) treebuilder_xml, METH_VARARGS},
1772 {"close", (PyCFunction) treebuilder_close, METH_VARARGS},
1773 {NULL, NULL}
1774};
1775
1776static PyObject*
1777treebuilder_getattr(TreeBuilderObject* self, char* name)
1778{
1779 return Py_FindMethod(treebuilder_methods, (PyObject*) self, name);
1780}
1781
Neal Norwitz227b5332006-03-22 09:28:35 +00001782static PyTypeObject TreeBuilder_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001783 PyVarObject_HEAD_INIT(NULL, 0)
1784 "TreeBuilder", sizeof(TreeBuilderObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001785 /* methods */
1786 (destructor)treebuilder_dealloc, /* tp_dealloc */
1787 0, /* tp_print */
1788 (getattrfunc)treebuilder_getattr, /* tp_getattr */
1789};
1790
1791/* ==================================================================== */
1792/* the expat interface */
1793
1794#if defined(USE_EXPAT)
1795
1796#include "expat.h"
1797
1798#if defined(USE_PYEXPAT_CAPI)
1799#include "pyexpat.h"
1800static struct PyExpat_CAPI* expat_capi;
1801#define EXPAT(func) (expat_capi->func)
1802#else
1803#define EXPAT(func) (XML_##func)
1804#endif
1805
1806typedef struct {
1807 PyObject_HEAD
1808
1809 XML_Parser parser;
1810
1811 PyObject* target;
1812 PyObject* entity;
1813
1814 PyObject* names;
1815
1816 PyObject* handle_xml;
1817 PyObject* handle_start;
1818 PyObject* handle_data;
1819 PyObject* handle_end;
1820
1821 PyObject* handle_comment;
1822 PyObject* handle_pi;
1823
1824} XMLParserObject;
1825
Neal Norwitz227b5332006-03-22 09:28:35 +00001826static PyTypeObject XMLParser_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001827
1828/* helpers */
1829
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001830LOCAL(PyObject*)
1831makeuniversal(XMLParserObject* self, const char* string)
1832{
1833 /* convert a UTF-8 tag/attribute name from the expat parser
1834 to a universal name string */
1835
1836 int size = strlen(string);
1837 PyObject* key;
1838 PyObject* value;
1839
1840 /* look the 'raw' name up in the names dictionary */
1841 key = PyString_FromStringAndSize(string, size);
1842 if (!key)
1843 return NULL;
1844
1845 value = PyDict_GetItem(self->names, key);
1846
1847 if (value) {
1848 Py_INCREF(value);
1849 } else {
1850 /* new name. convert to universal name, and decode as
1851 necessary */
1852
1853 PyObject* tag;
1854 char* p;
1855 int i;
1856
1857 /* look for namespace separator */
1858 for (i = 0; i < size; i++)
1859 if (string[i] == '}')
1860 break;
1861 if (i != size) {
1862 /* convert to universal name */
1863 tag = PyString_FromStringAndSize(NULL, size+1);
1864 p = PyString_AS_STRING(tag);
1865 p[0] = '{';
1866 memcpy(p+1, string, size);
1867 size++;
1868 } else {
1869 /* plain name; use key as tag */
1870 Py_INCREF(key);
1871 tag = key;
1872 }
1873
1874 /* decode universal name */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001875 p = PyString_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00001876 value = PyUnicode_DecodeUTF8(p, size, "strict");
1877 Py_DECREF(tag);
1878 if (!value) {
1879 Py_DECREF(key);
1880 return NULL;
1881 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001882
1883 /* add to names dictionary */
1884 if (PyDict_SetItem(self->names, key, value) < 0) {
1885 Py_DECREF(key);
1886 Py_DECREF(value);
1887 return NULL;
1888 }
1889 }
1890
1891 Py_DECREF(key);
1892 return value;
1893}
1894
1895/* -------------------------------------------------------------------- */
1896/* handlers */
1897
1898static void
1899expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
1900 int data_len)
1901{
1902 PyObject* key;
1903 PyObject* value;
1904 PyObject* res;
1905
1906 if (data_len < 2 || data_in[0] != '&')
1907 return;
1908
Neal Norwitz0269b912007-08-08 06:56:02 +00001909 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001910 if (!key)
1911 return;
1912
1913 value = PyDict_GetItem(self->entity, key);
1914
1915 if (value) {
1916 if (TreeBuilder_CheckExact(self->target))
1917 res = treebuilder_handle_data(
1918 (TreeBuilderObject*) self->target, value
1919 );
1920 else if (self->handle_data)
1921 res = PyObject_CallFunction(self->handle_data, "O", value);
1922 else
1923 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001924 Py_XDECREF(res);
1925 } else {
1926 PyErr_Format(
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001927 PyExc_SyntaxError, "undefined entity &%s;: line %ld, column %ld",
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001928 PyString_AS_STRING(key),
1929 EXPAT(GetErrorLineNumber)(self->parser),
1930 EXPAT(GetErrorColumnNumber)(self->parser)
1931 );
1932 }
1933
1934 Py_DECREF(key);
1935}
1936
1937static void
1938expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
1939 const XML_Char **attrib_in)
1940{
1941 PyObject* res;
1942 PyObject* tag;
1943 PyObject* attrib;
1944 int ok;
1945
1946 /* tag name */
1947 tag = makeuniversal(self, tag_in);
1948 if (!tag)
1949 return; /* parser will look for errors */
1950
1951 /* attributes */
1952 if (attrib_in[0]) {
1953 attrib = PyDict_New();
1954 if (!attrib)
1955 return;
1956 while (attrib_in[0] && attrib_in[1]) {
1957 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00001958 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001959 if (!key || !value) {
1960 Py_XDECREF(value);
1961 Py_XDECREF(key);
1962 Py_DECREF(attrib);
1963 return;
1964 }
1965 ok = PyDict_SetItem(attrib, key, value);
1966 Py_DECREF(value);
1967 Py_DECREF(key);
1968 if (ok < 0) {
1969 Py_DECREF(attrib);
1970 return;
1971 }
1972 attrib_in += 2;
1973 }
1974 } else {
1975 Py_INCREF(Py_None);
1976 attrib = Py_None;
1977 }
1978
1979 if (TreeBuilder_CheckExact(self->target))
1980 /* shortcut */
1981 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
1982 tag, attrib);
1983 else if (self->handle_start)
1984 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
1985 else
1986 res = NULL;
1987
1988 Py_DECREF(tag);
1989 Py_DECREF(attrib);
1990
1991 Py_XDECREF(res);
1992}
1993
1994static void
1995expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
1996 int data_len)
1997{
1998 PyObject* data;
1999 PyObject* res;
2000
Neal Norwitz0269b912007-08-08 06:56:02 +00002001 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002002 if (!data)
2003 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002004
2005 if (TreeBuilder_CheckExact(self->target))
2006 /* shortcut */
2007 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
2008 else if (self->handle_data)
2009 res = PyObject_CallFunction(self->handle_data, "O", data);
2010 else
2011 res = NULL;
2012
2013 Py_DECREF(data);
2014
2015 Py_XDECREF(res);
2016}
2017
2018static void
2019expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
2020{
2021 PyObject* tag;
2022 PyObject* res = NULL;
2023
2024 if (TreeBuilder_CheckExact(self->target))
2025 /* shortcut */
2026 /* the standard tree builder doesn't look at the end tag */
2027 res = treebuilder_handle_end(
2028 (TreeBuilderObject*) self->target, Py_None
2029 );
2030 else if (self->handle_end) {
2031 tag = makeuniversal(self, tag_in);
2032 if (tag) {
2033 res = PyObject_CallFunction(self->handle_end, "O", tag);
2034 Py_DECREF(tag);
2035 }
2036 }
2037
2038 Py_XDECREF(res);
2039}
2040
2041static void
2042expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
2043 const XML_Char *uri)
2044{
2045 treebuilder_handle_namespace(
2046 (TreeBuilderObject*) self->target, 1, prefix, uri
2047 );
2048}
2049
2050static void
2051expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
2052{
2053 treebuilder_handle_namespace(
2054 (TreeBuilderObject*) self->target, 0, NULL, NULL
2055 );
2056}
2057
2058static void
2059expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
2060{
2061 PyObject* comment;
2062 PyObject* res;
2063
2064 if (self->handle_comment) {
Neal Norwitz0269b912007-08-08 06:56:02 +00002065 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002066 if (comment) {
2067 res = PyObject_CallFunction(self->handle_comment, "O", comment);
2068 Py_XDECREF(res);
2069 Py_DECREF(comment);
2070 }
2071 }
2072}
2073
2074static void
2075expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
2076 const XML_Char* data_in)
2077{
2078 PyObject* target;
2079 PyObject* data;
2080 PyObject* res;
2081
2082 if (self->handle_pi) {
Neal Norwitz0269b912007-08-08 06:56:02 +00002083 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
2084 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002085 if (target && data) {
2086 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
2087 Py_XDECREF(res);
2088 Py_DECREF(data);
2089 Py_DECREF(target);
2090 } else {
2091 Py_XDECREF(data);
2092 Py_XDECREF(target);
2093 }
2094 }
2095}
2096
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002097static int
2098expat_unknown_encoding_handler(XMLParserObject *self, const XML_Char *name,
2099 XML_Encoding *info)
2100{
2101 PyObject* u;
2102 Py_UNICODE* p;
2103 unsigned char s[256];
2104 int i;
2105
2106 memset(info, 0, sizeof(XML_Encoding));
2107
2108 for (i = 0; i < 256; i++)
2109 s[i] = i;
2110
Fredrik Lundhc3389992005-12-25 11:40:19 +00002111 u = PyUnicode_Decode((char*) s, 256, name, "replace");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002112 if (!u)
2113 return XML_STATUS_ERROR;
2114
2115 if (PyUnicode_GET_SIZE(u) != 256) {
2116 Py_DECREF(u);
2117 return XML_STATUS_ERROR;
2118 }
2119
2120 p = PyUnicode_AS_UNICODE(u);
2121
2122 for (i = 0; i < 256; i++) {
2123 if (p[i] != Py_UNICODE_REPLACEMENT_CHARACTER)
2124 info->map[i] = p[i];
2125 else
2126 info->map[i] = -1;
2127 }
2128
2129 Py_DECREF(u);
2130
2131 return XML_STATUS_OK;
2132}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002133
2134/* -------------------------------------------------------------------- */
2135/* constructor and destructor */
2136
2137static PyObject*
Thomas Wouters73e5a5b2006-06-08 15:35:45 +00002138xmlparser(PyObject* self_, PyObject* args, PyObject* kw)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002139{
2140 XMLParserObject* self;
2141 /* FIXME: does this need to be static? */
2142 static XML_Memory_Handling_Suite memory_handler;
2143
2144 PyObject* target = NULL;
2145 char* encoding = NULL;
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +00002146 static char* kwlist[] = { "target", "encoding", NULL };
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002147 if (!PyArg_ParseTupleAndKeywords(args, kw, "|Oz:XMLParser", kwlist,
2148 &target, &encoding))
2149 return NULL;
2150
2151#if defined(USE_PYEXPAT_CAPI)
2152 if (!expat_capi) {
2153 PyErr_SetString(
2154 PyExc_RuntimeError, "cannot load dispatch table from pyexpat"
2155 );
2156 return NULL;
2157 }
2158#endif
2159
2160 self = PyObject_New(XMLParserObject, &XMLParser_Type);
2161 if (self == NULL)
2162 return NULL;
2163
2164 self->entity = PyDict_New();
2165 if (!self->entity) {
2166 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002167 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002168 }
2169
2170 self->names = PyDict_New();
2171 if (!self->names) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002172 PyObject_Del(self->entity);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002173 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002174 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002175 }
2176
2177 memory_handler.malloc_fcn = PyObject_Malloc;
2178 memory_handler.realloc_fcn = PyObject_Realloc;
2179 memory_handler.free_fcn = PyObject_Free;
2180
2181 self->parser = EXPAT(ParserCreate_MM)(encoding, &memory_handler, "}");
2182 if (!self->parser) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002183 PyObject_Del(self->names);
2184 PyObject_Del(self->entity);
2185 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002186 PyErr_NoMemory();
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002187 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002188 }
2189
2190 /* setup target handlers */
2191 if (!target) {
2192 target = treebuilder_new();
2193 if (!target) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002194 EXPAT(ParserFree)(self->parser);
2195 PyObject_Del(self->names);
2196 PyObject_Del(self->entity);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002197 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002198 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002199 }
2200 } else
2201 Py_INCREF(target);
2202 self->target = target;
2203
2204 self->handle_xml = PyObject_GetAttrString(target, "xml");
2205 self->handle_start = PyObject_GetAttrString(target, "start");
2206 self->handle_data = PyObject_GetAttrString(target, "data");
2207 self->handle_end = PyObject_GetAttrString(target, "end");
2208 self->handle_comment = PyObject_GetAttrString(target, "comment");
2209 self->handle_pi = PyObject_GetAttrString(target, "pi");
2210
2211 PyErr_Clear();
2212
2213 /* configure parser */
2214 EXPAT(SetUserData)(self->parser, self);
2215 EXPAT(SetElementHandler)(
2216 self->parser,
2217 (XML_StartElementHandler) expat_start_handler,
2218 (XML_EndElementHandler) expat_end_handler
2219 );
2220 EXPAT(SetDefaultHandlerExpand)(
2221 self->parser,
2222 (XML_DefaultHandler) expat_default_handler
2223 );
2224 EXPAT(SetCharacterDataHandler)(
2225 self->parser,
2226 (XML_CharacterDataHandler) expat_data_handler
2227 );
2228 if (self->handle_comment)
2229 EXPAT(SetCommentHandler)(
2230 self->parser,
2231 (XML_CommentHandler) expat_comment_handler
2232 );
2233 if (self->handle_pi)
2234 EXPAT(SetProcessingInstructionHandler)(
2235 self->parser,
2236 (XML_ProcessingInstructionHandler) expat_pi_handler
2237 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002238 EXPAT(SetUnknownEncodingHandler)(
2239 self->parser,
2240 (XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL
2241 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002242
2243 ALLOC(sizeof(XMLParserObject), "create expatparser");
2244
2245 return (PyObject*) self;
2246}
2247
2248static void
2249xmlparser_dealloc(XMLParserObject* self)
2250{
2251 EXPAT(ParserFree)(self->parser);
2252
2253 Py_XDECREF(self->handle_pi);
2254 Py_XDECREF(self->handle_comment);
2255 Py_XDECREF(self->handle_end);
2256 Py_XDECREF(self->handle_data);
2257 Py_XDECREF(self->handle_start);
2258 Py_XDECREF(self->handle_xml);
2259
2260 Py_DECREF(self->target);
2261 Py_DECREF(self->entity);
2262 Py_DECREF(self->names);
2263
2264 RELEASE(sizeof(XMLParserObject), "destroy expatparser");
2265
2266 PyObject_Del(self);
2267}
2268
2269/* -------------------------------------------------------------------- */
2270/* methods (in alphabetical order) */
2271
2272LOCAL(PyObject*)
2273expat_parse(XMLParserObject* self, char* data, int data_len, int final)
2274{
2275 int ok;
2276
2277 ok = EXPAT(Parse)(self->parser, data, data_len, final);
2278
2279 if (PyErr_Occurred())
2280 return NULL;
2281
2282 if (!ok) {
2283 PyErr_Format(
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002284 PyExc_SyntaxError, "%s: line %ld, column %ld",
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002285 EXPAT(ErrorString)(EXPAT(GetErrorCode)(self->parser)),
2286 EXPAT(GetErrorLineNumber)(self->parser),
2287 EXPAT(GetErrorColumnNumber)(self->parser)
2288 );
2289 return NULL;
2290 }
2291
2292 Py_RETURN_NONE;
2293}
2294
2295static PyObject*
2296xmlparser_close(XMLParserObject* self, PyObject* args)
2297{
2298 /* end feeding data to parser */
2299
2300 PyObject* res;
2301 if (!PyArg_ParseTuple(args, ":close"))
2302 return NULL;
2303
2304 res = expat_parse(self, "", 0, 1);
2305
2306 if (res && TreeBuilder_CheckExact(self->target)) {
2307 Py_DECREF(res);
2308 return treebuilder_done((TreeBuilderObject*) self->target);
2309 }
2310
2311 return res;
2312}
2313
2314static PyObject*
2315xmlparser_feed(XMLParserObject* self, PyObject* args)
2316{
2317 /* feed data to parser */
2318
2319 char* data;
2320 int data_len;
2321 if (!PyArg_ParseTuple(args, "s#:feed", &data, &data_len))
2322 return NULL;
2323
2324 return expat_parse(self, data, data_len, 0);
2325}
2326
2327static PyObject*
2328xmlparser_parse(XMLParserObject* self, PyObject* args)
2329{
2330 /* (internal) parse until end of input stream */
2331
2332 PyObject* reader;
2333 PyObject* buffer;
2334 PyObject* res;
2335
2336 PyObject* fileobj;
2337 if (!PyArg_ParseTuple(args, "O:_parse", &fileobj))
2338 return NULL;
2339
2340 reader = PyObject_GetAttrString(fileobj, "read");
2341 if (!reader)
2342 return NULL;
2343
2344 /* read from open file object */
2345 for (;;) {
2346
2347 buffer = PyObject_CallFunction(reader, "i", 64*1024);
2348
2349 if (!buffer) {
2350 /* read failed (e.g. due to KeyboardInterrupt) */
2351 Py_DECREF(reader);
2352 return NULL;
2353 }
2354
2355 if (!PyString_CheckExact(buffer) || PyString_GET_SIZE(buffer) == 0) {
2356 Py_DECREF(buffer);
2357 break;
2358 }
2359
2360 res = expat_parse(
2361 self, PyString_AS_STRING(buffer), PyString_GET_SIZE(buffer), 0
2362 );
2363
2364 Py_DECREF(buffer);
2365
2366 if (!res) {
2367 Py_DECREF(reader);
2368 return NULL;
2369 }
2370 Py_DECREF(res);
2371
2372 }
2373
2374 Py_DECREF(reader);
2375
2376 res = expat_parse(self, "", 0, 1);
2377
2378 if (res && TreeBuilder_CheckExact(self->target)) {
2379 Py_DECREF(res);
2380 return treebuilder_done((TreeBuilderObject*) self->target);
2381 }
2382
2383 return res;
2384}
2385
2386static PyObject*
2387xmlparser_setevents(XMLParserObject* self, PyObject* args)
2388{
2389 /* activate element event reporting */
2390
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002391 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002392 TreeBuilderObject* target;
2393
2394 PyObject* events; /* event collector */
2395 PyObject* event_set = Py_None;
2396 if (!PyArg_ParseTuple(args, "O!|O:_setevents", &PyList_Type, &events,
2397 &event_set))
2398 return NULL;
2399
2400 if (!TreeBuilder_CheckExact(self->target)) {
2401 PyErr_SetString(
2402 PyExc_TypeError,
2403 "event handling only supported for cElementTree.Treebuilder "
2404 "targets"
2405 );
2406 return NULL;
2407 }
2408
2409 target = (TreeBuilderObject*) self->target;
2410
2411 Py_INCREF(events);
2412 Py_XDECREF(target->events);
2413 target->events = events;
2414
2415 /* clear out existing events */
2416 Py_XDECREF(target->start_event_obj); target->start_event_obj = NULL;
2417 Py_XDECREF(target->end_event_obj); target->end_event_obj = NULL;
2418 Py_XDECREF(target->start_ns_event_obj); target->start_ns_event_obj = NULL;
2419 Py_XDECREF(target->end_ns_event_obj); target->end_ns_event_obj = NULL;
2420
2421 if (event_set == Py_None) {
2422 /* default is "end" only */
2423 target->end_event_obj = PyString_FromString("end");
2424 Py_RETURN_NONE;
2425 }
2426
2427 if (!PyTuple_Check(event_set)) /* FIXME: handle arbitrary sequences */
2428 goto error;
2429
2430 for (i = 0; i < PyTuple_GET_SIZE(event_set); i++) {
2431 PyObject* item = PyTuple_GET_ITEM(event_set, i);
2432 char* event;
2433 if (!PyString_Check(item))
2434 goto error;
2435 event = PyString_AS_STRING(item);
2436 if (strcmp(event, "start") == 0) {
2437 Py_INCREF(item);
2438 target->start_event_obj = item;
2439 } else if (strcmp(event, "end") == 0) {
2440 Py_INCREF(item);
2441 Py_XDECREF(target->end_event_obj);
2442 target->end_event_obj = item;
2443 } else if (strcmp(event, "start-ns") == 0) {
2444 Py_INCREF(item);
2445 Py_XDECREF(target->start_ns_event_obj);
2446 target->start_ns_event_obj = item;
2447 EXPAT(SetNamespaceDeclHandler)(
2448 self->parser,
2449 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2450 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2451 );
2452 } else if (strcmp(event, "end-ns") == 0) {
2453 Py_INCREF(item);
2454 Py_XDECREF(target->end_ns_event_obj);
2455 target->end_ns_event_obj = item;
2456 EXPAT(SetNamespaceDeclHandler)(
2457 self->parser,
2458 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2459 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2460 );
2461 } else {
2462 PyErr_Format(
2463 PyExc_ValueError,
2464 "unknown event '%s'", event
2465 );
2466 return NULL;
2467 }
2468 }
2469
2470 Py_RETURN_NONE;
2471
2472 error:
2473 PyErr_SetString(
2474 PyExc_TypeError,
2475 "invalid event tuple"
2476 );
2477 return NULL;
2478}
2479
2480static PyMethodDef xmlparser_methods[] = {
2481 {"feed", (PyCFunction) xmlparser_feed, METH_VARARGS},
2482 {"close", (PyCFunction) xmlparser_close, METH_VARARGS},
2483 {"_parse", (PyCFunction) xmlparser_parse, METH_VARARGS},
2484 {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
2485 {NULL, NULL}
2486};
2487
2488static PyObject*
2489xmlparser_getattr(XMLParserObject* self, char* name)
2490{
2491 PyObject* res;
2492
2493 res = Py_FindMethod(xmlparser_methods, (PyObject*) self, name);
2494 if (res)
2495 return res;
2496
2497 PyErr_Clear();
2498
2499 if (strcmp(name, "entity") == 0)
2500 res = self->entity;
2501 else if (strcmp(name, "target") == 0)
2502 res = self->target;
2503 else if (strcmp(name, "version") == 0) {
2504 char buffer[100];
2505 sprintf(buffer, "Expat %d.%d.%d", XML_MAJOR_VERSION,
2506 XML_MINOR_VERSION, XML_MICRO_VERSION);
2507 return PyString_FromString(buffer);
2508 } else {
2509 PyErr_SetString(PyExc_AttributeError, name);
2510 return NULL;
2511 }
2512
2513 Py_INCREF(res);
2514 return res;
2515}
2516
Neal Norwitz227b5332006-03-22 09:28:35 +00002517static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002518 PyVarObject_HEAD_INIT(NULL, 0)
2519 "XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002520 /* methods */
2521 (destructor)xmlparser_dealloc, /* tp_dealloc */
2522 0, /* tp_print */
2523 (getattrfunc)xmlparser_getattr, /* tp_getattr */
2524};
2525
2526#endif
2527
2528/* ==================================================================== */
2529/* python module interface */
2530
2531static PyMethodDef _functions[] = {
2532 {"Element", (PyCFunction) element, METH_VARARGS|METH_KEYWORDS},
2533 {"SubElement", (PyCFunction) subelement, METH_VARARGS|METH_KEYWORDS},
2534 {"TreeBuilder", (PyCFunction) treebuilder, METH_VARARGS},
2535#if defined(USE_EXPAT)
2536 {"XMLParser", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
2537 {"XMLTreeBuilder", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
2538#endif
2539 {NULL, NULL}
2540};
2541
Neal Norwitzf6657e62006-12-28 04:47:50 +00002542PyMODINIT_FUNC
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002543init_elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002544{
2545 PyObject* m;
2546 PyObject* g;
2547 char* bootstrap;
2548#if defined(USE_PYEXPAT_CAPI)
2549 struct PyExpat_CAPI* capi;
2550#endif
2551
2552 /* Patch object type */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002553 Py_Type(&Element_Type) = Py_Type(&TreeBuilder_Type) = &PyType_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002554#if defined(USE_EXPAT)
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002555 Py_Type(&XMLParser_Type) = &PyType_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002556#endif
2557
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002558 m = Py_InitModule("_elementtree", _functions);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002559 if (!m)
2560 return;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002561
2562 /* python glue code */
2563
2564 g = PyDict_New();
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002565 if (!g)
2566 return;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002567
2568 PyDict_SetItemString(g, "__builtins__", PyEval_GetBuiltins());
2569
2570 bootstrap = (
2571
2572#if (PY_VERSION_HEX >= 0x02020000 && PY_VERSION_HEX < 0x02030000)
2573 "from __future__ import generators\n" /* enable yield under 2.2 */
2574#endif
2575
2576 "from copy import copy, deepcopy\n"
2577
2578 "try:\n"
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002579 " from xml.etree import ElementTree\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002580 "except ImportError:\n"
2581 " import ElementTree\n"
2582 "ET = ElementTree\n"
2583 "del ElementTree\n"
2584
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002585 "import _elementtree as cElementTree\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002586
2587 "try:\n" /* check if copy works as is */
2588 " copy(cElementTree.Element('x'))\n"
2589 "except:\n"
2590 " def copyelement(elem):\n"
2591 " return elem\n"
2592
2593 "def Comment(text=None):\n" /* public */
2594 " element = cElementTree.Element(ET.Comment)\n"
2595 " element.text = text\n"
2596 " return element\n"
2597 "cElementTree.Comment = Comment\n"
2598
2599 "class ElementTree(ET.ElementTree):\n" /* public */
2600 " def parse(self, source, parser=None):\n"
2601 " if not hasattr(source, 'read'):\n"
2602 " source = open(source, 'rb')\n"
2603 " if parser is not None:\n"
2604 " while 1:\n"
2605 " data = source.read(65536)\n"
2606 " if not data:\n"
2607 " break\n"
2608 " parser.feed(data)\n"
2609 " self._root = parser.close()\n"
2610 " else:\n"
2611 " parser = cElementTree.XMLParser()\n"
2612 " self._root = parser._parse(source)\n"
2613 " return self._root\n"
2614 "cElementTree.ElementTree = ElementTree\n"
2615
2616 "def getiterator(node, tag=None):\n" /* helper */
2617 " if tag == '*':\n"
2618 " tag = None\n"
2619#if (PY_VERSION_HEX < 0x02020000)
2620 " nodes = []\n" /* 2.1 doesn't have yield */
2621 " if tag is None or node.tag == tag:\n"
2622 " nodes.append(node)\n"
2623 " for node in node:\n"
2624 " nodes.extend(getiterator(node, tag))\n"
2625 " return nodes\n"
2626#else
2627 " if tag is None or node.tag == tag:\n"
2628 " yield node\n"
2629 " for node in node:\n"
2630 " for node in getiterator(node, tag):\n"
2631 " yield node\n"
2632#endif
2633
2634 "def parse(source, parser=None):\n" /* public */
2635 " tree = ElementTree()\n"
2636 " tree.parse(source, parser)\n"
2637 " return tree\n"
2638 "cElementTree.parse = parse\n"
2639
2640#if (PY_VERSION_HEX < 0x02020000)
2641 "if hasattr(ET, 'iterparse'):\n"
2642 " cElementTree.iterparse = ET.iterparse\n" /* delegate on 2.1 */
2643#else
2644 "class iterparse(object):\n"
2645 " root = None\n"
2646 " def __init__(self, file, events=None):\n"
2647 " if not hasattr(file, 'read'):\n"
2648 " file = open(file, 'rb')\n"
2649 " self._file = file\n"
2650 " self._events = events\n"
2651 " def __iter__(self):\n"
2652 " events = []\n"
2653 " b = cElementTree.TreeBuilder()\n"
2654 " p = cElementTree.XMLParser(b)\n"
2655 " p._setevents(events, self._events)\n"
2656 " while 1:\n"
2657 " data = self._file.read(16384)\n"
2658 " if not data:\n"
2659 " break\n"
2660 " p.feed(data)\n"
2661 " for event in events:\n"
2662 " yield event\n"
2663 " del events[:]\n"
2664 " root = p.close()\n"
2665 " for event in events:\n"
2666 " yield event\n"
2667 " self.root = root\n"
2668 "cElementTree.iterparse = iterparse\n"
2669#endif
2670
2671 "def PI(target, text=None):\n" /* public */
2672 " element = cElementTree.Element(ET.ProcessingInstruction)\n"
2673 " element.text = target\n"
2674 " if text:\n"
2675 " element.text = element.text + ' ' + text\n"
2676 " return element\n"
2677
2678 " elem = cElementTree.Element(ET.PI)\n"
2679 " elem.text = text\n"
2680 " return elem\n"
2681 "cElementTree.PI = cElementTree.ProcessingInstruction = PI\n"
2682
2683 "def XML(text):\n" /* public */
2684 " parser = cElementTree.XMLParser()\n"
2685 " parser.feed(text)\n"
2686 " return parser.close()\n"
2687 "cElementTree.XML = cElementTree.fromstring = XML\n"
2688
2689 "def XMLID(text):\n" /* public */
2690 " tree = XML(text)\n"
2691 " ids = {}\n"
2692 " for elem in tree.getiterator():\n"
2693 " id = elem.get('id')\n"
2694 " if id:\n"
2695 " ids[id] = elem\n"
2696 " return tree, ids\n"
2697 "cElementTree.XMLID = XMLID\n"
2698
2699 "cElementTree.dump = ET.dump\n"
2700 "cElementTree.ElementPath = ElementPath = ET.ElementPath\n"
2701 "cElementTree.iselement = ET.iselement\n"
2702 "cElementTree.QName = ET.QName\n"
2703 "cElementTree.tostring = ET.tostring\n"
2704 "cElementTree.VERSION = '" VERSION "'\n"
2705 "cElementTree.__version__ = '" VERSION "'\n"
2706 "cElementTree.XMLParserError = SyntaxError\n"
2707
2708 );
2709
2710 PyRun_String(bootstrap, Py_file_input, g, NULL);
2711
2712 elementpath_obj = PyDict_GetItemString(g, "ElementPath");
2713
2714 elementtree_copyelement_obj = PyDict_GetItemString(g, "copyelement");
2715 if (elementtree_copyelement_obj) {
2716 /* reduce hack needed; enable reduce method */
2717 PyMethodDef* mp;
2718 for (mp = element_methods; mp->ml_name; mp++)
2719 if (mp->ml_meth == (PyCFunction) element_reduce) {
2720 mp->ml_name = "__reduce__";
2721 break;
2722 }
2723 } else
2724 PyErr_Clear();
2725 elementtree_deepcopy_obj = PyDict_GetItemString(g, "deepcopy");
2726 elementtree_getiterator_obj = PyDict_GetItemString(g, "getiterator");
2727
2728#if defined(USE_PYEXPAT_CAPI)
2729 /* link against pyexpat, if possible */
2730 capi = PyCObject_Import("pyexpat", "expat_CAPI");
2731 if (capi &&
2732 strcmp(capi->magic, PyExpat_CAPI_MAGIC) == 0 &&
2733 capi->size <= sizeof(*expat_capi) &&
2734 capi->MAJOR_VERSION == XML_MAJOR_VERSION &&
2735 capi->MINOR_VERSION == XML_MINOR_VERSION &&
2736 capi->MICRO_VERSION == XML_MICRO_VERSION)
2737 expat_capi = capi;
2738 else
2739 expat_capi = NULL;
2740#endif
2741
2742}