blob: 871aa186c255d7f186dde07de4d88c5baa35bb09 [file] [log] [blame]
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001/*
2 * ElementTree
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003 * $Id: _elementtree.c 2657 2006-03-12 20:50:32Z fredrik $
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
5 * elementtree accelerator
6 *
7 * History:
8 * 1999-06-20 fl created (as part of sgmlop)
9 * 2001-05-29 fl effdom edition
10 * 2001-06-05 fl backported to unix; fixed bogus free in clear
11 * 2001-07-10 fl added findall helper
12 * 2003-02-27 fl elementtree edition (alpha)
13 * 2004-06-03 fl updates for elementtree 1.2
14 * 2005-01-05 fl added universal name cache, Element/SubElement factories
15 * 2005-01-06 fl moved python helpers into C module; removed 1.5.2 support
16 * 2005-01-07 fl added 2.1 support; work around broken __copy__ in 2.3
17 * 2005-01-08 fl added makeelement method; fixed path support
18 * 2005-01-10 fl optimized memory usage
19 * 2005-01-11 fl first public release (cElementTree 0.8)
20 * 2005-01-12 fl split element object into base and extras
21 * 2005-01-13 fl use tagged pointers for tail/text (cElementTree 0.9)
22 * 2005-01-17 fl added treebuilder close method
23 * 2005-01-17 fl fixed crash in getchildren
24 * 2005-01-18 fl removed observer api, added iterparse (cElementTree 0.9.3)
25 * 2005-01-23 fl revised iterparse api; added namespace event support (0.9.8)
26 * 2005-01-26 fl added VERSION module property (cElementTree 1.0)
27 * 2005-01-28 fl added remove method (1.0.1)
28 * 2005-03-01 fl added iselement function; fixed makeelement aliasing (1.0.2)
29 * 2005-03-13 fl export Comment and ProcessingInstruction/PI helpers
30 * 2005-03-26 fl added Comment and PI support to XMLParser
31 * 2005-03-27 fl event optimizations; complain about bogus events
32 * 2005-08-08 fl fixed read error handling in parse
33 * 2005-08-11 fl added runtime test for copy workaround (1.0.3)
34 * 2005-12-13 fl added expat_capi support (for xml.etree) (1.0.4)
35 * 2005-12-16 fl added support for non-standard encodings
Fredrik Lundh44ed4db2006-03-12 21:06:35 +000036 * 2006-03-08 fl fixed a couple of potential null-refs and leaks
37 * 2006-03-12 fl merge in 2.5 ssize_t changes
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000038 *
Fredrik Lundh44ed4db2006-03-12 21:06:35 +000039 * Copyright (c) 1999-2006 by Secret Labs AB. All rights reserved.
40 * Copyright (c) 1999-2006 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000041 *
42 * info@pythonware.com
43 * http://www.pythonware.com
44 */
45
Fredrik Lundh6d52b552005-12-16 22:06:43 +000046/* Licensed to PSF under a Contributor Agreement. */
47/* See http://www.python.org/2.4/license for licensing details. */
48
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000049#include "Python.h"
50
Fredrik Lundh44ed4db2006-03-12 21:06:35 +000051#define VERSION "1.0.6-snapshot"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000052
53/* -------------------------------------------------------------------- */
54/* configuration */
55
56/* Leave defined to include the expat-based XMLParser type */
57#define USE_EXPAT
58
59/* Define to to all expat calls via pyexpat's embedded expat library */
60/* #define USE_PYEXPAT_CAPI */
61
62/* An element can hold this many children without extra memory
63 allocations. */
64#define STATIC_CHILDREN 4
65
66/* For best performance, chose a value so that 80-90% of all nodes
67 have no more than the given number of children. Set this to zero
68 to minimize the size of the element structure itself (this only
69 helps if you have lots of leaf nodes with attributes). */
70
71/* Also note that pymalloc always allocates blocks in multiples of
72 eight bytes. For the current version of cElementTree, this means
73 that the number of children should be an even number, at least on
74 32-bit platforms. */
75
76/* -------------------------------------------------------------------- */
77
78#if 0
79static int memory = 0;
80#define ALLOC(size, comment)\
81do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
82#define RELEASE(size, comment)\
83do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
84#else
85#define ALLOC(size, comment)
86#define RELEASE(size, comment)
87#endif
88
89/* compiler tweaks */
90#if defined(_MSC_VER)
91#define LOCAL(type) static __inline type __fastcall
92#else
93#define LOCAL(type) static type
94#endif
95
96/* compatibility macros */
Martin v. Löwis18e16552006-02-15 17:27:45 +000097#if (PY_VERSION_HEX < 0x02050000)
98typedef int Py_ssize_t;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +000099#define lenfunc inquiry
Martin v. Löwis18e16552006-02-15 17:27:45 +0000100#endif
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000101
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000102#if (PY_VERSION_HEX < 0x02040000)
103#define PyDict_CheckExact PyDict_Check
104#if (PY_VERSION_HEX < 0x02020000)
105#define PyList_CheckExact PyList_Check
106#define PyString_CheckExact PyString_Check
107#if (PY_VERSION_HEX >= 0x01060000)
108#define Py_USING_UNICODE /* always enabled for 2.0 and 2.1 */
109#endif
110#endif
111#endif
112
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000113#if !defined(Py_RETURN_NONE)
114#define Py_RETURN_NONE return Py_INCREF(Py_None), Py_None
115#endif
116
117/* macros used to store 'join' flags in string object pointers. note
118 that all use of text and tail as object pointers must be wrapped in
119 JOIN_OBJ. see comments in the ElementObject definition for more
120 info. */
121#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
122#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
123#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~1))
124
125/* glue functions (see the init function for details) */
126static PyObject* elementtree_copyelement_obj;
127static PyObject* elementtree_deepcopy_obj;
128static PyObject* elementtree_getiterator_obj;
129static PyObject* elementpath_obj;
130
131/* helpers */
132
133LOCAL(PyObject*)
134deepcopy(PyObject* object, PyObject* memo)
135{
136 /* do a deep copy of the given object */
137
138 PyObject* args;
139 PyObject* result;
140
141 if (!elementtree_deepcopy_obj) {
142 PyErr_SetString(
143 PyExc_RuntimeError,
144 "deepcopy helper not found"
145 );
146 return NULL;
147 }
148
149 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000150 if (!args)
151 return NULL;
152
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000153 Py_INCREF(object); PyTuple_SET_ITEM(args, 0, (PyObject*) object);
154 Py_INCREF(memo); PyTuple_SET_ITEM(args, 1, (PyObject*) memo);
155
156 result = PyObject_CallObject(elementtree_deepcopy_obj, args);
157
158 Py_DECREF(args);
159
160 return result;
161}
162
163LOCAL(PyObject*)
164list_join(PyObject* list)
165{
166 /* join list elements (destroying the list in the process) */
167
168 PyObject* joiner;
169 PyObject* function;
170 PyObject* args;
171 PyObject* result;
172
173 switch (PyList_GET_SIZE(list)) {
174 case 0:
175 Py_DECREF(list);
176 return PyString_FromString("");
177 case 1:
178 result = PyList_GET_ITEM(list, 0);
179 Py_INCREF(result);
180 Py_DECREF(list);
181 return result;
182 }
183
184 /* two or more elements: slice out a suitable separator from the
185 first member, and use that to join the entire list */
186
187 joiner = PySequence_GetSlice(PyList_GET_ITEM(list, 0), 0, 0);
188 if (!joiner)
189 return NULL;
190
191 function = PyObject_GetAttrString(joiner, "join");
192 if (!function) {
193 Py_DECREF(joiner);
194 return NULL;
195 }
196
197 args = PyTuple_New(1);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000198 if (!args)
199 return NULL;
200
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000201 PyTuple_SET_ITEM(args, 0, list);
202
203 result = PyObject_CallObject(function, args);
204
205 Py_DECREF(args); /* also removes list */
206 Py_DECREF(function);
207 Py_DECREF(joiner);
208
209 return result;
210}
211
212#if (PY_VERSION_HEX < 0x02020000)
213LOCAL(int)
214PyDict_Update(PyObject* dict, PyObject* other)
215{
216 /* PyDict_Update emulation for 2.1 and earlier */
217
218 PyObject* res;
219
220 res = PyObject_CallMethod(dict, "update", "O", other);
221 if (!res)
222 return -1;
223
224 Py_DECREF(res);
225 return 0;
226}
227#endif
228
229/* -------------------------------------------------------------------- */
230/* the element type */
231
232typedef struct {
233
234 /* attributes (a dictionary object), or None if no attributes */
235 PyObject* attrib;
236
237 /* child elements */
238 int length; /* actual number of items */
239 int allocated; /* allocated items */
240
241 /* this either points to _children or to a malloced buffer */
242 PyObject* *children;
243
244 PyObject* _children[STATIC_CHILDREN];
245
246} ElementObjectExtra;
247
248typedef struct {
249 PyObject_HEAD
250
251 /* element tag (a string). */
252 PyObject* tag;
253
254 /* text before first child. note that this is a tagged pointer;
255 use JOIN_OBJ to get the object pointer. the join flag is used
256 to distinguish lists created by the tree builder from lists
257 assigned to the attribute by application code; the former
258 should be joined before being returned to the user, the latter
259 should be left intact. */
260 PyObject* text;
261
262 /* text after this element, in parent. note that this is a tagged
263 pointer; use JOIN_OBJ to get the object pointer. */
264 PyObject* tail;
265
266 ElementObjectExtra* extra;
267
268} ElementObject;
269
Neal Norwitz227b5332006-03-22 09:28:35 +0000270static PyTypeObject Element_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000271
272#define Element_CheckExact(op) ((op)->ob_type == &Element_Type)
273
274/* -------------------------------------------------------------------- */
275/* element constructor and destructor */
276
277LOCAL(int)
278element_new_extra(ElementObject* self, PyObject* attrib)
279{
280 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
281 if (!self->extra)
282 return -1;
283
284 if (!attrib)
285 attrib = Py_None;
286
287 Py_INCREF(attrib);
288 self->extra->attrib = attrib;
289
290 self->extra->length = 0;
291 self->extra->allocated = STATIC_CHILDREN;
292 self->extra->children = self->extra->_children;
293
294 return 0;
295}
296
297LOCAL(void)
298element_dealloc_extra(ElementObject* self)
299{
300 int i;
301
302 Py_DECREF(self->extra->attrib);
303
304 for (i = 0; i < self->extra->length; i++)
305 Py_DECREF(self->extra->children[i]);
306
307 if (self->extra->children != self->extra->_children)
308 PyObject_Free(self->extra->children);
309
310 PyObject_Free(self->extra);
311}
312
313LOCAL(PyObject*)
314element_new(PyObject* tag, PyObject* attrib)
315{
316 ElementObject* self;
317
318 self = PyObject_New(ElementObject, &Element_Type);
319 if (self == NULL)
320 return NULL;
321
322 /* use None for empty dictionaries */
323 if (PyDict_CheckExact(attrib) && !PyDict_Size(attrib))
324 attrib = Py_None;
325
326 self->extra = NULL;
327
328 if (attrib != Py_None) {
329
Thomas Wouters477c8d52006-05-27 19:21:47 +0000330 if (element_new_extra(self, attrib) < 0) {
331 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000332 return NULL;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000333 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000334
335 self->extra->length = 0;
336 self->extra->allocated = STATIC_CHILDREN;
337 self->extra->children = self->extra->_children;
338
339 }
340
341 Py_INCREF(tag);
342 self->tag = tag;
343
344 Py_INCREF(Py_None);
345 self->text = Py_None;
346
347 Py_INCREF(Py_None);
348 self->tail = Py_None;
349
350 ALLOC(sizeof(ElementObject), "create element");
351
352 return (PyObject*) self;
353}
354
355LOCAL(int)
356element_resize(ElementObject* self, int extra)
357{
358 int size;
359 PyObject* *children;
360
361 /* make sure self->children can hold the given number of extra
362 elements. set an exception and return -1 if allocation failed */
363
364 if (!self->extra)
365 element_new_extra(self, NULL);
366
367 size = self->extra->length + extra;
368
369 if (size > self->extra->allocated) {
370 /* use Python 2.4's list growth strategy */
371 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
372 if (self->extra->children != self->extra->_children) {
373 children = PyObject_Realloc(self->extra->children,
374 size * sizeof(PyObject*));
375 if (!children)
376 goto nomemory;
377 } else {
378 children = PyObject_Malloc(size * sizeof(PyObject*));
379 if (!children)
380 goto nomemory;
381 /* copy existing children from static area to malloc buffer */
382 memcpy(children, self->extra->children,
383 self->extra->length * sizeof(PyObject*));
384 }
385 self->extra->children = children;
386 self->extra->allocated = size;
387 }
388
389 return 0;
390
391 nomemory:
392 PyErr_NoMemory();
393 return -1;
394}
395
396LOCAL(int)
397element_add_subelement(ElementObject* self, PyObject* element)
398{
399 /* add a child element to a parent */
400
401 if (element_resize(self, 1) < 0)
402 return -1;
403
404 Py_INCREF(element);
405 self->extra->children[self->extra->length] = element;
406
407 self->extra->length++;
408
409 return 0;
410}
411
412LOCAL(PyObject*)
413element_get_attrib(ElementObject* self)
414{
415 /* return borrowed reference to attrib dictionary */
416 /* note: this function assumes that the extra section exists */
417
418 PyObject* res = self->extra->attrib;
419
420 if (res == Py_None) {
421 /* create missing dictionary */
422 res = PyDict_New();
423 if (!res)
424 return NULL;
425 self->extra->attrib = res;
426 }
427
428 return res;
429}
430
431LOCAL(PyObject*)
432element_get_text(ElementObject* self)
433{
434 /* return borrowed reference to text attribute */
435
436 PyObject* res = self->text;
437
438 if (JOIN_GET(res)) {
439 res = JOIN_OBJ(res);
440 if (PyList_CheckExact(res)) {
441 res = list_join(res);
442 if (!res)
443 return NULL;
444 self->text = res;
445 }
446 }
447
448 return res;
449}
450
451LOCAL(PyObject*)
452element_get_tail(ElementObject* self)
453{
454 /* return borrowed reference to text attribute */
455
456 PyObject* res = self->tail;
457
458 if (JOIN_GET(res)) {
459 res = JOIN_OBJ(res);
460 if (PyList_CheckExact(res)) {
461 res = list_join(res);
462 if (!res)
463 return NULL;
464 self->tail = res;
465 }
466 }
467
468 return res;
469}
470
471static PyObject*
472element(PyObject* self, PyObject* args, PyObject* kw)
473{
474 PyObject* elem;
475
476 PyObject* tag;
477 PyObject* attrib = NULL;
478 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag,
479 &PyDict_Type, &attrib))
480 return NULL;
481
482 if (attrib || kw) {
483 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
484 if (!attrib)
485 return NULL;
486 if (kw)
487 PyDict_Update(attrib, kw);
488 } else {
489 Py_INCREF(Py_None);
490 attrib = Py_None;
491 }
492
493 elem = element_new(tag, attrib);
494
495 Py_DECREF(attrib);
496
497 return elem;
498}
499
500static PyObject*
501subelement(PyObject* self, PyObject* args, PyObject* kw)
502{
503 PyObject* elem;
504
505 ElementObject* parent;
506 PyObject* tag;
507 PyObject* attrib = NULL;
508 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
509 &Element_Type, &parent, &tag,
510 &PyDict_Type, &attrib))
511 return NULL;
512
513 if (attrib || kw) {
514 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
515 if (!attrib)
516 return NULL;
517 if (kw)
518 PyDict_Update(attrib, kw);
519 } else {
520 Py_INCREF(Py_None);
521 attrib = Py_None;
522 }
523
524 elem = element_new(tag, attrib);
525
526 Py_DECREF(attrib);
527
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000528 if (element_add_subelement(parent, elem) < 0) {
529 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000530 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000531 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000532
533 return elem;
534}
535
536static void
537element_dealloc(ElementObject* self)
538{
539 if (self->extra)
540 element_dealloc_extra(self);
541
542 /* discard attributes */
543 Py_DECREF(self->tag);
544 Py_DECREF(JOIN_OBJ(self->text));
545 Py_DECREF(JOIN_OBJ(self->tail));
546
547 RELEASE(sizeof(ElementObject), "destroy element");
548
549 PyObject_Del(self);
550}
551
552/* -------------------------------------------------------------------- */
553/* methods (in alphabetical order) */
554
555static PyObject*
556element_append(ElementObject* self, PyObject* args)
557{
558 PyObject* element;
559 if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element))
560 return NULL;
561
562 if (element_add_subelement(self, element) < 0)
563 return NULL;
564
565 Py_RETURN_NONE;
566}
567
568static PyObject*
569element_clear(ElementObject* self, PyObject* args)
570{
571 if (!PyArg_ParseTuple(args, ":clear"))
572 return NULL;
573
574 if (self->extra) {
575 element_dealloc_extra(self);
576 self->extra = NULL;
577 }
578
579 Py_INCREF(Py_None);
580 Py_DECREF(JOIN_OBJ(self->text));
581 self->text = Py_None;
582
583 Py_INCREF(Py_None);
584 Py_DECREF(JOIN_OBJ(self->tail));
585 self->tail = Py_None;
586
587 Py_RETURN_NONE;
588}
589
590static PyObject*
591element_copy(ElementObject* self, PyObject* args)
592{
593 int i;
594 ElementObject* element;
595
596 if (!PyArg_ParseTuple(args, ":__copy__"))
597 return NULL;
598
599 element = (ElementObject*) element_new(
600 self->tag, (self->extra) ? self->extra->attrib : Py_None
601 );
602 if (!element)
603 return NULL;
604
605 Py_DECREF(JOIN_OBJ(element->text));
606 element->text = self->text;
607 Py_INCREF(JOIN_OBJ(element->text));
608
609 Py_DECREF(JOIN_OBJ(element->tail));
610 element->tail = self->tail;
611 Py_INCREF(JOIN_OBJ(element->tail));
612
613 if (self->extra) {
614
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000615 if (element_resize(element, self->extra->length) < 0) {
616 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000617 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000618 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000619
620 for (i = 0; i < self->extra->length; i++) {
621 Py_INCREF(self->extra->children[i]);
622 element->extra->children[i] = self->extra->children[i];
623 }
624
625 element->extra->length = self->extra->length;
626
627 }
628
629 return (PyObject*) element;
630}
631
632static PyObject*
633element_deepcopy(ElementObject* self, PyObject* args)
634{
635 int i;
636 ElementObject* element;
637 PyObject* tag;
638 PyObject* attrib;
639 PyObject* text;
640 PyObject* tail;
641 PyObject* id;
642
643 PyObject* memo;
644 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
645 return NULL;
646
647 tag = deepcopy(self->tag, memo);
648 if (!tag)
649 return NULL;
650
651 if (self->extra) {
652 attrib = deepcopy(self->extra->attrib, memo);
653 if (!attrib) {
654 Py_DECREF(tag);
655 return NULL;
656 }
657 } else {
658 Py_INCREF(Py_None);
659 attrib = Py_None;
660 }
661
662 element = (ElementObject*) element_new(tag, attrib);
663
664 Py_DECREF(tag);
665 Py_DECREF(attrib);
666
667 if (!element)
668 return NULL;
669
670 text = deepcopy(JOIN_OBJ(self->text), memo);
671 if (!text)
672 goto error;
673 Py_DECREF(element->text);
674 element->text = JOIN_SET(text, JOIN_GET(self->text));
675
676 tail = deepcopy(JOIN_OBJ(self->tail), memo);
677 if (!tail)
678 goto error;
679 Py_DECREF(element->tail);
680 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
681
682 if (self->extra) {
683
684 if (element_resize(element, self->extra->length) < 0)
685 goto error;
686
687 for (i = 0; i < self->extra->length; i++) {
688 PyObject* child = deepcopy(self->extra->children[i], memo);
689 if (!child) {
690 element->extra->length = i;
691 goto error;
692 }
693 element->extra->children[i] = child;
694 }
695
696 element->extra->length = self->extra->length;
697
698 }
699
700 /* add object to memo dictionary (so deepcopy won't visit it again) */
701 id = PyInt_FromLong((Py_uintptr_t) self);
702
703 i = PyDict_SetItem(memo, id, (PyObject*) element);
704
705 Py_DECREF(id);
706
707 if (i < 0)
708 goto error;
709
710 return (PyObject*) element;
711
712 error:
713 Py_DECREF(element);
714 return NULL;
715}
716
717LOCAL(int)
718checkpath(PyObject* tag)
719{
720 int i, check = 1;
721
722 /* check if a tag contains an xpath character */
723
724#define PATHCHAR(ch) (ch == '/' || ch == '*' || ch == '[' || ch == '@')
725
726#if defined(Py_USING_UNICODE)
727 if (PyUnicode_Check(tag)) {
728 Py_UNICODE *p = PyUnicode_AS_UNICODE(tag);
729 for (i = 0; i < PyUnicode_GET_SIZE(tag); i++) {
730 if (p[i] == '{')
731 check = 0;
732 else if (p[i] == '}')
733 check = 1;
734 else if (check && PATHCHAR(p[i]))
735 return 1;
736 }
737 return 0;
738 }
739#endif
740 if (PyString_Check(tag)) {
741 char *p = PyString_AS_STRING(tag);
742 for (i = 0; i < PyString_GET_SIZE(tag); i++) {
743 if (p[i] == '{')
744 check = 0;
745 else if (p[i] == '}')
746 check = 1;
747 else if (check && PATHCHAR(p[i]))
748 return 1;
749 }
750 return 0;
751 }
752
753 return 1; /* unknown type; might be path expression */
754}
755
756static PyObject*
757element_find(ElementObject* self, PyObject* args)
758{
759 int i;
760
761 PyObject* tag;
762 if (!PyArg_ParseTuple(args, "O:find", &tag))
763 return NULL;
764
765 if (checkpath(tag))
766 return PyObject_CallMethod(
767 elementpath_obj, "find", "OO", self, tag
768 );
769
770 if (!self->extra)
771 Py_RETURN_NONE;
772
773 for (i = 0; i < self->extra->length; i++) {
774 PyObject* item = self->extra->children[i];
775 if (Element_CheckExact(item) &&
776 PyObject_Compare(((ElementObject*)item)->tag, tag) == 0) {
777 Py_INCREF(item);
778 return item;
779 }
780 }
781
782 Py_RETURN_NONE;
783}
784
785static PyObject*
786element_findtext(ElementObject* self, PyObject* args)
787{
788 int i;
789
790 PyObject* tag;
791 PyObject* default_value = Py_None;
792 if (!PyArg_ParseTuple(args, "O|O:findtext", &tag, &default_value))
793 return NULL;
794
795 if (checkpath(tag))
796 return PyObject_CallMethod(
797 elementpath_obj, "findtext", "OOO", self, tag, default_value
798 );
799
800 if (!self->extra) {
801 Py_INCREF(default_value);
802 return default_value;
803 }
804
805 for (i = 0; i < self->extra->length; i++) {
806 ElementObject* item = (ElementObject*) self->extra->children[i];
807 if (Element_CheckExact(item) && !PyObject_Compare(item->tag, tag)) {
808 PyObject* text = element_get_text(item);
809 if (text == Py_None)
810 return PyString_FromString("");
811 Py_INCREF(text);
812 return text;
813 }
814 }
815
816 Py_INCREF(default_value);
817 return default_value;
818}
819
820static PyObject*
821element_findall(ElementObject* self, PyObject* args)
822{
823 int i;
824 PyObject* out;
825
826 PyObject* tag;
827 if (!PyArg_ParseTuple(args, "O:findall", &tag))
828 return NULL;
829
830 if (checkpath(tag))
831 return PyObject_CallMethod(
832 elementpath_obj, "findall", "OO", self, tag
833 );
834
835 out = PyList_New(0);
836 if (!out)
837 return NULL;
838
839 if (!self->extra)
840 return out;
841
842 for (i = 0; i < self->extra->length; i++) {
843 PyObject* item = self->extra->children[i];
844 if (Element_CheckExact(item) &&
845 PyObject_Compare(((ElementObject*)item)->tag, tag) == 0) {
846 if (PyList_Append(out, item) < 0) {
847 Py_DECREF(out);
848 return NULL;
849 }
850 }
851 }
852
853 return out;
854}
855
856static PyObject*
857element_get(ElementObject* self, PyObject* args)
858{
859 PyObject* value;
860
861 PyObject* key;
862 PyObject* default_value = Py_None;
863 if (!PyArg_ParseTuple(args, "O|O:get", &key, &default_value))
864 return NULL;
865
866 if (!self->extra || self->extra->attrib == Py_None)
867 value = default_value;
868 else {
869 value = PyDict_GetItem(self->extra->attrib, key);
870 if (!value)
871 value = default_value;
872 }
873
874 Py_INCREF(value);
875 return value;
876}
877
878static PyObject*
879element_getchildren(ElementObject* self, PyObject* args)
880{
881 int i;
882 PyObject* list;
883
884 if (!PyArg_ParseTuple(args, ":getchildren"))
885 return NULL;
886
887 if (!self->extra)
888 return PyList_New(0);
889
890 list = PyList_New(self->extra->length);
891 if (!list)
892 return NULL;
893
894 for (i = 0; i < self->extra->length; i++) {
895 PyObject* item = self->extra->children[i];
896 Py_INCREF(item);
897 PyList_SET_ITEM(list, i, item);
898 }
899
900 return list;
901}
902
903static PyObject*
904element_getiterator(ElementObject* self, PyObject* args)
905{
906 PyObject* result;
907
908 PyObject* tag = Py_None;
909 if (!PyArg_ParseTuple(args, "|O:getiterator", &tag))
910 return NULL;
911
912 if (!elementtree_getiterator_obj) {
913 PyErr_SetString(
914 PyExc_RuntimeError,
915 "getiterator helper not found"
916 );
917 return NULL;
918 }
919
920 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000921 if (!args)
922 return NULL;
Neal Norwitz02876df2006-02-07 06:58:52 +0000923
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000924 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
925 Py_INCREF(tag); PyTuple_SET_ITEM(args, 1, (PyObject*) tag);
926
927 result = PyObject_CallObject(elementtree_getiterator_obj, args);
928
929 Py_DECREF(args);
930
931 return result;
932}
933
934static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000935element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000936{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000937 ElementObject* self = (ElementObject*) self_;
938
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000939 if (!self->extra || index < 0 || index >= self->extra->length) {
940 PyErr_SetString(
941 PyExc_IndexError,
942 "child index out of range"
943 );
944 return NULL;
945 }
946
947 Py_INCREF(self->extra->children[index]);
948 return self->extra->children[index];
949}
950
951static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000952element_getslice(PyObject* self_, Py_ssize_t start, Py_ssize_t end)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000953{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000954 ElementObject* self = (ElementObject*) self_;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000955 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000956 PyObject* list;
957
958 if (!self->extra)
959 return PyList_New(0);
960
961 /* standard clamping */
962 if (start < 0)
963 start = 0;
964 if (end < 0)
965 end = 0;
966 if (end > self->extra->length)
967 end = self->extra->length;
968 if (start > end)
969 start = end;
970
971 list = PyList_New(end - start);
972 if (!list)
973 return NULL;
974
975 for (i = start; i < end; i++) {
976 PyObject* item = self->extra->children[i];
977 Py_INCREF(item);
978 PyList_SET_ITEM(list, i - start, item);
979 }
980
981 return list;
982}
983
984static PyObject*
985element_insert(ElementObject* self, PyObject* args)
986{
987 int i;
988
989 int index;
990 PyObject* element;
991 if (!PyArg_ParseTuple(args, "iO!:insert", &index,
992 &Element_Type, &element))
993 return NULL;
994
995 if (!self->extra)
996 element_new_extra(self, NULL);
997
998 if (index < 0)
999 index = 0;
1000 if (index > self->extra->length)
1001 index = self->extra->length;
1002
1003 if (element_resize(self, 1) < 0)
1004 return NULL;
1005
1006 for (i = self->extra->length; i > index; i--)
1007 self->extra->children[i] = self->extra->children[i-1];
1008
1009 Py_INCREF(element);
1010 self->extra->children[index] = element;
1011
1012 self->extra->length++;
1013
1014 Py_RETURN_NONE;
1015}
1016
1017static PyObject*
1018element_items(ElementObject* self, PyObject* args)
1019{
1020 if (!PyArg_ParseTuple(args, ":items"))
1021 return NULL;
1022
1023 if (!self->extra || self->extra->attrib == Py_None)
1024 return PyList_New(0);
1025
1026 return PyDict_Items(self->extra->attrib);
1027}
1028
1029static PyObject*
1030element_keys(ElementObject* self, PyObject* args)
1031{
1032 if (!PyArg_ParseTuple(args, ":keys"))
1033 return NULL;
1034
1035 if (!self->extra || self->extra->attrib == Py_None)
1036 return PyList_New(0);
1037
1038 return PyDict_Keys(self->extra->attrib);
1039}
1040
Martin v. Löwis18e16552006-02-15 17:27:45 +00001041static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001042element_length(ElementObject* self)
1043{
1044 if (!self->extra)
1045 return 0;
1046
1047 return self->extra->length;
1048}
1049
1050static PyObject*
1051element_makeelement(PyObject* self, PyObject* args, PyObject* kw)
1052{
1053 PyObject* elem;
1054
1055 PyObject* tag;
1056 PyObject* attrib;
1057 if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib))
1058 return NULL;
1059
1060 attrib = PyDict_Copy(attrib);
1061 if (!attrib)
1062 return NULL;
1063
1064 elem = element_new(tag, attrib);
1065
1066 Py_DECREF(attrib);
1067
1068 return elem;
1069}
1070
1071static PyObject*
1072element_reduce(ElementObject* self, PyObject* args)
1073{
1074 if (!PyArg_ParseTuple(args, ":__reduce__"))
1075 return NULL;
1076
1077 /* Hack alert: This method is used to work around a __copy__
1078 problem on certain 2.3 and 2.4 versions. To save time and
1079 simplify the code, we create the copy in here, and use a dummy
1080 copyelement helper to trick the copy module into doing the
1081 right thing. */
1082
1083 if (!elementtree_copyelement_obj) {
1084 PyErr_SetString(
1085 PyExc_RuntimeError,
1086 "copyelement helper not found"
1087 );
1088 return NULL;
1089 }
1090
1091 return Py_BuildValue(
1092 "O(N)", elementtree_copyelement_obj, element_copy(self, args)
1093 );
1094}
1095
1096static PyObject*
1097element_remove(ElementObject* self, PyObject* args)
1098{
1099 int i;
1100
1101 PyObject* element;
1102 if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element))
1103 return NULL;
1104
1105 if (!self->extra) {
1106 /* element has no children, so raise exception */
1107 PyErr_SetString(
1108 PyExc_ValueError,
1109 "list.remove(x): x not in list"
1110 );
1111 return NULL;
1112 }
1113
1114 for (i = 0; i < self->extra->length; i++) {
1115 if (self->extra->children[i] == element)
1116 break;
1117 if (PyObject_Compare(self->extra->children[i], element) == 0)
1118 break;
1119 }
1120
1121 if (i == self->extra->length) {
1122 /* element is not in children, so raise exception */
1123 PyErr_SetString(
1124 PyExc_ValueError,
1125 "list.remove(x): x not in list"
1126 );
1127 return NULL;
1128 }
1129
1130 Py_DECREF(self->extra->children[i]);
1131
1132 self->extra->length--;
1133
1134 for (; i < self->extra->length; i++)
1135 self->extra->children[i] = self->extra->children[i+1];
1136
1137 Py_RETURN_NONE;
1138}
1139
1140static PyObject*
1141element_repr(ElementObject* self)
1142{
1143 PyObject* repr;
1144 char buffer[100];
1145
1146 repr = PyString_FromString("<Element ");
1147
1148 PyString_ConcatAndDel(&repr, PyObject_Repr(self->tag));
1149
1150 sprintf(buffer, " at %p>", self);
1151 PyString_ConcatAndDel(&repr, PyString_FromString(buffer));
1152
1153 return repr;
1154}
1155
1156static PyObject*
1157element_set(ElementObject* self, PyObject* args)
1158{
1159 PyObject* attrib;
1160
1161 PyObject* key;
1162 PyObject* value;
1163 if (!PyArg_ParseTuple(args, "OO:set", &key, &value))
1164 return NULL;
1165
1166 if (!self->extra)
1167 element_new_extra(self, NULL);
1168
1169 attrib = element_get_attrib(self);
1170 if (!attrib)
1171 return NULL;
1172
1173 if (PyDict_SetItem(attrib, key, value) < 0)
1174 return NULL;
1175
1176 Py_RETURN_NONE;
1177}
1178
1179static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001180element_setslice(PyObject* self_, Py_ssize_t start, Py_ssize_t end, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001181{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001182 ElementObject* self = (ElementObject*) self_;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001183 int i, new, old;
1184 PyObject* recycle = NULL;
1185
1186 if (!self->extra)
1187 element_new_extra(self, NULL);
1188
1189 /* standard clamping */
1190 if (start < 0)
1191 start = 0;
1192 if (end < 0)
1193 end = 0;
1194 if (end > self->extra->length)
1195 end = self->extra->length;
1196 if (start > end)
1197 start = end;
1198
1199 old = end - start;
1200
1201 if (item == NULL)
1202 new = 0;
1203 else if (PyList_CheckExact(item)) {
1204 new = PyList_GET_SIZE(item);
1205 } else {
1206 /* FIXME: support arbitrary sequences? */
1207 PyErr_Format(
1208 PyExc_TypeError,
1209 "expected list, not \"%.200s\"", item->ob_type->tp_name
1210 );
1211 return -1;
1212 }
1213
1214 if (old > 0) {
1215 /* to avoid recursive calls to this method (via decref), move
1216 old items to the recycle bin here, and get rid of them when
1217 we're done modifying the element */
1218 recycle = PyList_New(old);
1219 for (i = 0; i < old; i++)
1220 PyList_SET_ITEM(recycle, i, self->extra->children[i + start]);
1221 }
1222
1223 if (new < old) {
1224 /* delete slice */
1225 for (i = end; i < self->extra->length; i++)
1226 self->extra->children[i + new - old] = self->extra->children[i];
1227 } else if (new > old) {
1228 /* insert slice */
1229 if (element_resize(self, new - old) < 0)
1230 return -1;
1231 for (i = self->extra->length-1; i >= end; i--)
1232 self->extra->children[i + new - old] = self->extra->children[i];
1233 }
1234
1235 /* replace the slice */
1236 for (i = 0; i < new; i++) {
1237 PyObject* element = PyList_GET_ITEM(item, i);
1238 Py_INCREF(element);
1239 self->extra->children[i + start] = element;
1240 }
1241
1242 self->extra->length += new - old;
1243
1244 /* discard the recycle bin, and everything in it */
1245 Py_XDECREF(recycle);
1246
1247 return 0;
1248}
1249
1250static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001251element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001252{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001253 ElementObject* self = (ElementObject*) self_;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001254 int i;
1255 PyObject* old;
1256
1257 if (!self->extra || index < 0 || index >= self->extra->length) {
1258 PyErr_SetString(
1259 PyExc_IndexError,
1260 "child assignment index out of range");
1261 return -1;
1262 }
1263
1264 old = self->extra->children[index];
1265
1266 if (item) {
1267 Py_INCREF(item);
1268 self->extra->children[index] = item;
1269 } else {
1270 self->extra->length--;
1271 for (i = index; i < self->extra->length; i++)
1272 self->extra->children[i] = self->extra->children[i+1];
1273 }
1274
1275 Py_DECREF(old);
1276
1277 return 0;
1278}
1279
1280static PyMethodDef element_methods[] = {
1281
1282 {"clear", (PyCFunction) element_clear, METH_VARARGS},
1283
1284 {"get", (PyCFunction) element_get, METH_VARARGS},
1285 {"set", (PyCFunction) element_set, METH_VARARGS},
1286
1287 {"find", (PyCFunction) element_find, METH_VARARGS},
1288 {"findtext", (PyCFunction) element_findtext, METH_VARARGS},
1289 {"findall", (PyCFunction) element_findall, METH_VARARGS},
1290
1291 {"append", (PyCFunction) element_append, METH_VARARGS},
1292 {"insert", (PyCFunction) element_insert, METH_VARARGS},
1293 {"remove", (PyCFunction) element_remove, METH_VARARGS},
1294
1295 {"getiterator", (PyCFunction) element_getiterator, METH_VARARGS},
1296 {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS},
1297
1298 {"items", (PyCFunction) element_items, METH_VARARGS},
1299 {"keys", (PyCFunction) element_keys, METH_VARARGS},
1300
1301 {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS},
1302
1303 {"__copy__", (PyCFunction) element_copy, METH_VARARGS},
1304 {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS},
1305
1306 /* Some 2.3 and 2.4 versions do not handle the __copy__ method on
1307 C objects correctly, so we have to fake it using a __reduce__-
1308 based hack (see the element_reduce implementation above for
1309 details). */
1310
1311 /* The behaviour has been changed in 2.3.5 and 2.4.1, so we're
1312 using a runtime test to figure out if we need to fake things
1313 or now (see the init code below). The following entry is
1314 enabled only if the hack is needed. */
1315
1316 {"!__reduce__", (PyCFunction) element_reduce, METH_VARARGS},
1317
1318 {NULL, NULL}
1319};
1320
1321static PyObject*
1322element_getattr(ElementObject* self, char* name)
1323{
1324 PyObject* res;
1325
1326 res = Py_FindMethod(element_methods, (PyObject*) self, name);
1327 if (res)
1328 return res;
1329
1330 PyErr_Clear();
1331
1332 if (strcmp(name, "tag") == 0)
1333 res = self->tag;
1334 else if (strcmp(name, "text") == 0)
1335 res = element_get_text(self);
1336 else if (strcmp(name, "tail") == 0) {
1337 res = element_get_tail(self);
1338 } else if (strcmp(name, "attrib") == 0) {
1339 if (!self->extra)
1340 element_new_extra(self, NULL);
1341 res = element_get_attrib(self);
1342 } else {
1343 PyErr_SetString(PyExc_AttributeError, name);
1344 return NULL;
1345 }
1346
1347 if (!res)
1348 return NULL;
1349
1350 Py_INCREF(res);
1351 return res;
1352}
1353
1354static int
1355element_setattr(ElementObject* self, const char* name, PyObject* value)
1356{
1357 if (value == NULL) {
1358 PyErr_SetString(
1359 PyExc_AttributeError,
1360 "can't delete element attributes"
1361 );
1362 return -1;
1363 }
1364
1365 if (strcmp(name, "tag") == 0) {
1366 Py_DECREF(self->tag);
1367 self->tag = value;
1368 Py_INCREF(self->tag);
1369 } else if (strcmp(name, "text") == 0) {
1370 Py_DECREF(JOIN_OBJ(self->text));
1371 self->text = value;
1372 Py_INCREF(self->text);
1373 } else if (strcmp(name, "tail") == 0) {
1374 Py_DECREF(JOIN_OBJ(self->tail));
1375 self->tail = value;
1376 Py_INCREF(self->tail);
1377 } else if (strcmp(name, "attrib") == 0) {
1378 if (!self->extra)
1379 element_new_extra(self, NULL);
1380 Py_DECREF(self->extra->attrib);
1381 self->extra->attrib = value;
1382 Py_INCREF(self->extra->attrib);
1383 } else {
1384 PyErr_SetString(PyExc_AttributeError, name);
1385 return -1;
1386 }
1387
1388 return 0;
1389}
1390
1391static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001392 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001393 0, /* sq_concat */
1394 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001395 element_getitem,
1396 element_getslice,
1397 element_setitem,
1398 element_setslice,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001399};
1400
Neal Norwitz227b5332006-03-22 09:28:35 +00001401static PyTypeObject Element_Type = {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001402 PyObject_HEAD_INIT(NULL)
1403 0, "Element", sizeof(ElementObject), 0,
1404 /* methods */
1405 (destructor)element_dealloc, /* tp_dealloc */
1406 0, /* tp_print */
1407 (getattrfunc)element_getattr, /* tp_getattr */
1408 (setattrfunc)element_setattr, /* tp_setattr */
1409 0, /* tp_compare */
1410 (reprfunc)element_repr, /* tp_repr */
1411 0, /* tp_as_number */
1412 &element_as_sequence, /* tp_as_sequence */
1413};
1414
1415/* ==================================================================== */
1416/* the tree builder type */
1417
1418typedef struct {
1419 PyObject_HEAD
1420
1421 PyObject* root; /* root node (first created node) */
1422
1423 ElementObject* this; /* current node */
1424 ElementObject* last; /* most recently created node */
1425
1426 PyObject* data; /* data collector (string or list), or NULL */
1427
1428 PyObject* stack; /* element stack */
1429 int index; /* current stack size (0=empty) */
1430
1431 /* element tracing */
1432 PyObject* events; /* list of events, or NULL if not collecting */
1433 PyObject* start_event_obj; /* event objects (NULL to ignore) */
1434 PyObject* end_event_obj;
1435 PyObject* start_ns_event_obj;
1436 PyObject* end_ns_event_obj;
1437
1438} TreeBuilderObject;
1439
Neal Norwitz227b5332006-03-22 09:28:35 +00001440static PyTypeObject TreeBuilder_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001441
1442#define TreeBuilder_CheckExact(op) ((op)->ob_type == &TreeBuilder_Type)
1443
1444/* -------------------------------------------------------------------- */
1445/* constructor and destructor */
1446
1447LOCAL(PyObject*)
1448treebuilder_new(void)
1449{
1450 TreeBuilderObject* self;
1451
1452 self = PyObject_New(TreeBuilderObject, &TreeBuilder_Type);
1453 if (self == NULL)
1454 return NULL;
1455
1456 self->root = NULL;
1457
1458 Py_INCREF(Py_None);
1459 self->this = (ElementObject*) Py_None;
1460
1461 Py_INCREF(Py_None);
1462 self->last = (ElementObject*) Py_None;
1463
1464 self->data = NULL;
1465
1466 self->stack = PyList_New(20);
1467 self->index = 0;
1468
1469 self->events = NULL;
1470 self->start_event_obj = self->end_event_obj = NULL;
1471 self->start_ns_event_obj = self->end_ns_event_obj = NULL;
1472
1473 ALLOC(sizeof(TreeBuilderObject), "create treebuilder");
1474
1475 return (PyObject*) self;
1476}
1477
1478static PyObject*
1479treebuilder(PyObject* _self, PyObject* args)
1480{
1481 if (!PyArg_ParseTuple(args, ":TreeBuilder"))
1482 return NULL;
1483
1484 return treebuilder_new();
1485}
1486
1487static void
1488treebuilder_dealloc(TreeBuilderObject* self)
1489{
1490 Py_XDECREF(self->end_ns_event_obj);
1491 Py_XDECREF(self->start_ns_event_obj);
1492 Py_XDECREF(self->end_event_obj);
1493 Py_XDECREF(self->start_event_obj);
1494 Py_XDECREF(self->events);
1495 Py_DECREF(self->stack);
1496 Py_XDECREF(self->data);
1497 Py_DECREF(self->last);
1498 Py_DECREF(self->this);
1499 Py_XDECREF(self->root);
1500
1501 RELEASE(sizeof(TreeBuilderObject), "destroy treebuilder");
1502
1503 PyObject_Del(self);
1504}
1505
1506/* -------------------------------------------------------------------- */
1507/* handlers */
1508
1509LOCAL(PyObject*)
1510treebuilder_handle_xml(TreeBuilderObject* self, PyObject* encoding,
1511 PyObject* standalone)
1512{
1513 Py_RETURN_NONE;
1514}
1515
1516LOCAL(PyObject*)
1517treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
1518 PyObject* attrib)
1519{
1520 PyObject* node;
1521 PyObject* this;
1522
1523 if (self->data) {
1524 if (self->this == self->last) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001525 Py_DECREF(JOIN_OBJ(self->last->text));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001526 self->last->text = JOIN_SET(
1527 self->data, PyList_CheckExact(self->data)
1528 );
1529 } else {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001530 Py_DECREF(JOIN_OBJ(self->last->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001531 self->last->tail = JOIN_SET(
1532 self->data, PyList_CheckExact(self->data)
1533 );
1534 }
1535 self->data = NULL;
1536 }
1537
1538 node = element_new(tag, attrib);
1539 if (!node)
1540 return NULL;
1541
1542 this = (PyObject*) self->this;
1543
1544 if (this != Py_None) {
1545 if (element_add_subelement((ElementObject*) this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001546 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001547 } else {
1548 if (self->root) {
1549 PyErr_SetString(
1550 PyExc_SyntaxError,
1551 "multiple elements on top level"
1552 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001553 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001554 }
1555 Py_INCREF(node);
1556 self->root = node;
1557 }
1558
1559 if (self->index < PyList_GET_SIZE(self->stack)) {
1560 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001561 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001562 Py_INCREF(this);
1563 } else {
1564 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001565 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001566 }
1567 self->index++;
1568
1569 Py_DECREF(this);
1570 Py_INCREF(node);
1571 self->this = (ElementObject*) node;
1572
1573 Py_DECREF(self->last);
1574 Py_INCREF(node);
1575 self->last = (ElementObject*) node;
1576
1577 if (self->start_event_obj) {
1578 PyObject* res;
1579 PyObject* action = self->start_event_obj;
1580 res = PyTuple_New(2);
1581 if (res) {
1582 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
1583 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
1584 PyList_Append(self->events, res);
1585 Py_DECREF(res);
1586 } else
1587 PyErr_Clear(); /* FIXME: propagate error */
1588 }
1589
1590 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001591
1592 error:
1593 Py_DECREF(node);
1594 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001595}
1596
1597LOCAL(PyObject*)
1598treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
1599{
1600 if (!self->data) {
1601 /* store the first item as is */
1602 Py_INCREF(data); self->data = data;
1603 } else {
1604 /* more than one item; use a list to collect items */
1605 if (PyString_CheckExact(self->data) && self->data->ob_refcnt == 1 &&
1606 PyString_CheckExact(data) && PyString_GET_SIZE(data) == 1) {
1607 /* expat often generates single character data sections; handle
1608 the most common case by resizing the existing string... */
1609 int size = PyString_GET_SIZE(self->data);
1610 if (_PyString_Resize(&self->data, size + 1) < 0)
1611 return NULL;
1612 PyString_AS_STRING(self->data)[size] = PyString_AS_STRING(data)[0];
1613 } else if (PyList_CheckExact(self->data)) {
1614 if (PyList_Append(self->data, data) < 0)
1615 return NULL;
1616 } else {
1617 PyObject* list = PyList_New(2);
1618 if (!list)
1619 return NULL;
1620 PyList_SET_ITEM(list, 0, self->data);
1621 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
1622 self->data = list;
1623 }
1624 }
1625
1626 Py_RETURN_NONE;
1627}
1628
1629LOCAL(PyObject*)
1630treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
1631{
1632 PyObject* item;
1633
1634 if (self->data) {
1635 if (self->this == self->last) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001636 Py_DECREF(JOIN_OBJ(self->last->text));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001637 self->last->text = JOIN_SET(
1638 self->data, PyList_CheckExact(self->data)
1639 );
1640 } else {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001641 Py_DECREF(JOIN_OBJ(self->last->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001642 self->last->tail = JOIN_SET(
1643 self->data, PyList_CheckExact(self->data)
1644 );
1645 }
1646 self->data = NULL;
1647 }
1648
1649 if (self->index == 0) {
1650 PyErr_SetString(
1651 PyExc_IndexError,
1652 "pop from empty stack"
1653 );
1654 return NULL;
1655 }
1656
1657 self->index--;
1658
1659 item = PyList_GET_ITEM(self->stack, self->index);
1660 Py_INCREF(item);
1661
1662 Py_DECREF(self->last);
1663
1664 self->last = (ElementObject*) self->this;
1665 self->this = (ElementObject*) item;
1666
1667 if (self->end_event_obj) {
1668 PyObject* res;
1669 PyObject* action = self->end_event_obj;
1670 PyObject* node = (PyObject*) self->last;
1671 res = PyTuple_New(2);
1672 if (res) {
1673 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
1674 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
1675 PyList_Append(self->events, res);
1676 Py_DECREF(res);
1677 } else
1678 PyErr_Clear(); /* FIXME: propagate error */
1679 }
1680
1681 Py_INCREF(self->last);
1682 return (PyObject*) self->last;
1683}
1684
1685LOCAL(void)
1686treebuilder_handle_namespace(TreeBuilderObject* self, int start,
1687 const char* prefix, const char *uri)
1688{
1689 PyObject* res;
1690 PyObject* action;
1691 PyObject* parcel;
1692
1693 if (!self->events)
1694 return;
1695
1696 if (start) {
1697 if (!self->start_ns_event_obj)
1698 return;
1699 action = self->start_ns_event_obj;
1700 /* FIXME: prefix and uri use utf-8 encoding! */
1701 parcel = Py_BuildValue("ss", (prefix) ? prefix : "", uri);
1702 if (!parcel)
1703 return;
1704 Py_INCREF(action);
1705 } else {
1706 if (!self->end_ns_event_obj)
1707 return;
1708 action = self->end_ns_event_obj;
1709 Py_INCREF(action);
1710 parcel = Py_None;
1711 Py_INCREF(parcel);
1712 }
1713
1714 res = PyTuple_New(2);
1715
1716 if (res) {
1717 PyTuple_SET_ITEM(res, 0, action);
1718 PyTuple_SET_ITEM(res, 1, parcel);
1719 PyList_Append(self->events, res);
1720 Py_DECREF(res);
1721 } else
1722 PyErr_Clear(); /* FIXME: propagate error */
1723}
1724
1725/* -------------------------------------------------------------------- */
1726/* methods (in alphabetical order) */
1727
1728static PyObject*
1729treebuilder_data(TreeBuilderObject* self, PyObject* args)
1730{
1731 PyObject* data;
1732 if (!PyArg_ParseTuple(args, "O:data", &data))
1733 return NULL;
1734
1735 return treebuilder_handle_data(self, data);
1736}
1737
1738static PyObject*
1739treebuilder_end(TreeBuilderObject* self, PyObject* args)
1740{
1741 PyObject* tag;
1742 if (!PyArg_ParseTuple(args, "O:end", &tag))
1743 return NULL;
1744
1745 return treebuilder_handle_end(self, tag);
1746}
1747
1748LOCAL(PyObject*)
1749treebuilder_done(TreeBuilderObject* self)
1750{
1751 PyObject* res;
1752
1753 /* FIXME: check stack size? */
1754
1755 if (self->root)
1756 res = self->root;
1757 else
1758 res = Py_None;
1759
1760 Py_INCREF(res);
1761 return res;
1762}
1763
1764static PyObject*
1765treebuilder_close(TreeBuilderObject* self, PyObject* args)
1766{
1767 if (!PyArg_ParseTuple(args, ":close"))
1768 return NULL;
1769
1770 return treebuilder_done(self);
1771}
1772
1773static PyObject*
1774treebuilder_start(TreeBuilderObject* self, PyObject* args)
1775{
1776 PyObject* tag;
1777 PyObject* attrib = Py_None;
1778 if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib))
1779 return NULL;
1780
1781 return treebuilder_handle_start(self, tag, attrib);
1782}
1783
1784static PyObject*
1785treebuilder_xml(TreeBuilderObject* self, PyObject* args)
1786{
1787 PyObject* encoding;
1788 PyObject* standalone;
1789 if (!PyArg_ParseTuple(args, "OO:xml", &encoding, &standalone))
1790 return NULL;
1791
1792 return treebuilder_handle_xml(self, encoding, standalone);
1793}
1794
1795static PyMethodDef treebuilder_methods[] = {
1796 {"data", (PyCFunction) treebuilder_data, METH_VARARGS},
1797 {"start", (PyCFunction) treebuilder_start, METH_VARARGS},
1798 {"end", (PyCFunction) treebuilder_end, METH_VARARGS},
1799 {"xml", (PyCFunction) treebuilder_xml, METH_VARARGS},
1800 {"close", (PyCFunction) treebuilder_close, METH_VARARGS},
1801 {NULL, NULL}
1802};
1803
1804static PyObject*
1805treebuilder_getattr(TreeBuilderObject* self, char* name)
1806{
1807 return Py_FindMethod(treebuilder_methods, (PyObject*) self, name);
1808}
1809
Neal Norwitz227b5332006-03-22 09:28:35 +00001810static PyTypeObject TreeBuilder_Type = {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001811 PyObject_HEAD_INIT(NULL)
1812 0, "TreeBuilder", sizeof(TreeBuilderObject), 0,
1813 /* methods */
1814 (destructor)treebuilder_dealloc, /* tp_dealloc */
1815 0, /* tp_print */
1816 (getattrfunc)treebuilder_getattr, /* tp_getattr */
1817};
1818
1819/* ==================================================================== */
1820/* the expat interface */
1821
1822#if defined(USE_EXPAT)
1823
1824#include "expat.h"
1825
1826#if defined(USE_PYEXPAT_CAPI)
1827#include "pyexpat.h"
1828static struct PyExpat_CAPI* expat_capi;
1829#define EXPAT(func) (expat_capi->func)
1830#else
1831#define EXPAT(func) (XML_##func)
1832#endif
1833
1834typedef struct {
1835 PyObject_HEAD
1836
1837 XML_Parser parser;
1838
1839 PyObject* target;
1840 PyObject* entity;
1841
1842 PyObject* names;
1843
1844 PyObject* handle_xml;
1845 PyObject* handle_start;
1846 PyObject* handle_data;
1847 PyObject* handle_end;
1848
1849 PyObject* handle_comment;
1850 PyObject* handle_pi;
1851
1852} XMLParserObject;
1853
Neal Norwitz227b5332006-03-22 09:28:35 +00001854static PyTypeObject XMLParser_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001855
1856/* helpers */
1857
1858#if defined(Py_USING_UNICODE)
1859LOCAL(int)
1860checkstring(const char* string, int size)
1861{
1862 int i;
1863
1864 /* check if an 8-bit string contains UTF-8 characters */
1865 for (i = 0; i < size; i++)
1866 if (string[i] & 0x80)
1867 return 1;
1868
1869 return 0;
1870}
1871#endif
1872
1873LOCAL(PyObject*)
1874makestring(const char* string, int size)
1875{
1876 /* convert a UTF-8 string to either a 7-bit ascii string or a
1877 Unicode string */
1878
1879#if defined(Py_USING_UNICODE)
1880 if (checkstring(string, size))
1881 return PyUnicode_DecodeUTF8(string, size, "strict");
1882#endif
1883
1884 return PyString_FromStringAndSize(string, size);
1885}
1886
1887LOCAL(PyObject*)
1888makeuniversal(XMLParserObject* self, const char* string)
1889{
1890 /* convert a UTF-8 tag/attribute name from the expat parser
1891 to a universal name string */
1892
1893 int size = strlen(string);
1894 PyObject* key;
1895 PyObject* value;
1896
1897 /* look the 'raw' name up in the names dictionary */
1898 key = PyString_FromStringAndSize(string, size);
1899 if (!key)
1900 return NULL;
1901
1902 value = PyDict_GetItem(self->names, key);
1903
1904 if (value) {
1905 Py_INCREF(value);
1906 } else {
1907 /* new name. convert to universal name, and decode as
1908 necessary */
1909
1910 PyObject* tag;
1911 char* p;
1912 int i;
1913
1914 /* look for namespace separator */
1915 for (i = 0; i < size; i++)
1916 if (string[i] == '}')
1917 break;
1918 if (i != size) {
1919 /* convert to universal name */
1920 tag = PyString_FromStringAndSize(NULL, size+1);
1921 p = PyString_AS_STRING(tag);
1922 p[0] = '{';
1923 memcpy(p+1, string, size);
1924 size++;
1925 } else {
1926 /* plain name; use key as tag */
1927 Py_INCREF(key);
1928 tag = key;
1929 }
1930
1931 /* decode universal name */
1932#if defined(Py_USING_UNICODE)
1933 /* inline makestring, to avoid duplicating the source string if
1934 it's not an utf-8 string */
1935 p = PyString_AS_STRING(tag);
1936 if (checkstring(p, size)) {
1937 value = PyUnicode_DecodeUTF8(p, size, "strict");
1938 Py_DECREF(tag);
1939 if (!value) {
1940 Py_DECREF(key);
1941 return NULL;
1942 }
1943 } else
1944#endif
1945 value = tag; /* use tag as is */
1946
1947 /* add to names dictionary */
1948 if (PyDict_SetItem(self->names, key, value) < 0) {
1949 Py_DECREF(key);
1950 Py_DECREF(value);
1951 return NULL;
1952 }
1953 }
1954
1955 Py_DECREF(key);
1956 return value;
1957}
1958
1959/* -------------------------------------------------------------------- */
1960/* handlers */
1961
1962static void
1963expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
1964 int data_len)
1965{
1966 PyObject* key;
1967 PyObject* value;
1968 PyObject* res;
1969
1970 if (data_len < 2 || data_in[0] != '&')
1971 return;
1972
1973 key = makestring(data_in + 1, data_len - 2);
1974 if (!key)
1975 return;
1976
1977 value = PyDict_GetItem(self->entity, key);
1978
1979 if (value) {
1980 if (TreeBuilder_CheckExact(self->target))
1981 res = treebuilder_handle_data(
1982 (TreeBuilderObject*) self->target, value
1983 );
1984 else if (self->handle_data)
1985 res = PyObject_CallFunction(self->handle_data, "O", value);
1986 else
1987 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001988 Py_XDECREF(res);
1989 } else {
1990 PyErr_Format(
1991 PyExc_SyntaxError, "undefined entity &%s;: line %d, column %d",
1992 PyString_AS_STRING(key),
1993 EXPAT(GetErrorLineNumber)(self->parser),
1994 EXPAT(GetErrorColumnNumber)(self->parser)
1995 );
1996 }
1997
1998 Py_DECREF(key);
1999}
2000
2001static void
2002expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2003 const XML_Char **attrib_in)
2004{
2005 PyObject* res;
2006 PyObject* tag;
2007 PyObject* attrib;
2008 int ok;
2009
2010 /* tag name */
2011 tag = makeuniversal(self, tag_in);
2012 if (!tag)
2013 return; /* parser will look for errors */
2014
2015 /* attributes */
2016 if (attrib_in[0]) {
2017 attrib = PyDict_New();
2018 if (!attrib)
2019 return;
2020 while (attrib_in[0] && attrib_in[1]) {
2021 PyObject* key = makeuniversal(self, attrib_in[0]);
2022 PyObject* value = makestring(attrib_in[1], strlen(attrib_in[1]));
2023 if (!key || !value) {
2024 Py_XDECREF(value);
2025 Py_XDECREF(key);
2026 Py_DECREF(attrib);
2027 return;
2028 }
2029 ok = PyDict_SetItem(attrib, key, value);
2030 Py_DECREF(value);
2031 Py_DECREF(key);
2032 if (ok < 0) {
2033 Py_DECREF(attrib);
2034 return;
2035 }
2036 attrib_in += 2;
2037 }
2038 } else {
2039 Py_INCREF(Py_None);
2040 attrib = Py_None;
2041 }
2042
2043 if (TreeBuilder_CheckExact(self->target))
2044 /* shortcut */
2045 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2046 tag, attrib);
2047 else if (self->handle_start)
2048 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
2049 else
2050 res = NULL;
2051
2052 Py_DECREF(tag);
2053 Py_DECREF(attrib);
2054
2055 Py_XDECREF(res);
2056}
2057
2058static void
2059expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2060 int data_len)
2061{
2062 PyObject* data;
2063 PyObject* res;
2064
2065 data = makestring(data_in, data_len);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002066 if (!data)
2067 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002068
2069 if (TreeBuilder_CheckExact(self->target))
2070 /* shortcut */
2071 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
2072 else if (self->handle_data)
2073 res = PyObject_CallFunction(self->handle_data, "O", data);
2074 else
2075 res = NULL;
2076
2077 Py_DECREF(data);
2078
2079 Py_XDECREF(res);
2080}
2081
2082static void
2083expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
2084{
2085 PyObject* tag;
2086 PyObject* res = NULL;
2087
2088 if (TreeBuilder_CheckExact(self->target))
2089 /* shortcut */
2090 /* the standard tree builder doesn't look at the end tag */
2091 res = treebuilder_handle_end(
2092 (TreeBuilderObject*) self->target, Py_None
2093 );
2094 else if (self->handle_end) {
2095 tag = makeuniversal(self, tag_in);
2096 if (tag) {
2097 res = PyObject_CallFunction(self->handle_end, "O", tag);
2098 Py_DECREF(tag);
2099 }
2100 }
2101
2102 Py_XDECREF(res);
2103}
2104
2105static void
2106expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
2107 const XML_Char *uri)
2108{
2109 treebuilder_handle_namespace(
2110 (TreeBuilderObject*) self->target, 1, prefix, uri
2111 );
2112}
2113
2114static void
2115expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
2116{
2117 treebuilder_handle_namespace(
2118 (TreeBuilderObject*) self->target, 0, NULL, NULL
2119 );
2120}
2121
2122static void
2123expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
2124{
2125 PyObject* comment;
2126 PyObject* res;
2127
2128 if (self->handle_comment) {
2129 comment = makestring(comment_in, strlen(comment_in));
2130 if (comment) {
2131 res = PyObject_CallFunction(self->handle_comment, "O", comment);
2132 Py_XDECREF(res);
2133 Py_DECREF(comment);
2134 }
2135 }
2136}
2137
2138static void
2139expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
2140 const XML_Char* data_in)
2141{
2142 PyObject* target;
2143 PyObject* data;
2144 PyObject* res;
2145
2146 if (self->handle_pi) {
2147 target = makestring(target_in, strlen(target_in));
2148 data = makestring(data_in, strlen(data_in));
2149 if (target && data) {
2150 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
2151 Py_XDECREF(res);
2152 Py_DECREF(data);
2153 Py_DECREF(target);
2154 } else {
2155 Py_XDECREF(data);
2156 Py_XDECREF(target);
2157 }
2158 }
2159}
2160
2161#if defined(Py_USING_UNICODE)
2162static int
2163expat_unknown_encoding_handler(XMLParserObject *self, const XML_Char *name,
2164 XML_Encoding *info)
2165{
2166 PyObject* u;
2167 Py_UNICODE* p;
2168 unsigned char s[256];
2169 int i;
2170
2171 memset(info, 0, sizeof(XML_Encoding));
2172
2173 for (i = 0; i < 256; i++)
2174 s[i] = i;
2175
Fredrik Lundhc3389992005-12-25 11:40:19 +00002176 u = PyUnicode_Decode((char*) s, 256, name, "replace");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002177 if (!u)
2178 return XML_STATUS_ERROR;
2179
2180 if (PyUnicode_GET_SIZE(u) != 256) {
2181 Py_DECREF(u);
2182 return XML_STATUS_ERROR;
2183 }
2184
2185 p = PyUnicode_AS_UNICODE(u);
2186
2187 for (i = 0; i < 256; i++) {
2188 if (p[i] != Py_UNICODE_REPLACEMENT_CHARACTER)
2189 info->map[i] = p[i];
2190 else
2191 info->map[i] = -1;
2192 }
2193
2194 Py_DECREF(u);
2195
2196 return XML_STATUS_OK;
2197}
2198#endif
2199
2200/* -------------------------------------------------------------------- */
2201/* constructor and destructor */
2202
2203static PyObject*
2204xmlparser(PyObject* _self, PyObject* args, PyObject* kw)
2205{
2206 XMLParserObject* self;
2207 /* FIXME: does this need to be static? */
2208 static XML_Memory_Handling_Suite memory_handler;
2209
2210 PyObject* target = NULL;
2211 char* encoding = NULL;
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +00002212 static char* kwlist[] = { "target", "encoding", NULL };
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002213 if (!PyArg_ParseTupleAndKeywords(args, kw, "|Oz:XMLParser", kwlist,
2214 &target, &encoding))
2215 return NULL;
2216
2217#if defined(USE_PYEXPAT_CAPI)
2218 if (!expat_capi) {
2219 PyErr_SetString(
2220 PyExc_RuntimeError, "cannot load dispatch table from pyexpat"
2221 );
2222 return NULL;
2223 }
2224#endif
2225
2226 self = PyObject_New(XMLParserObject, &XMLParser_Type);
2227 if (self == NULL)
2228 return NULL;
2229
2230 self->entity = PyDict_New();
2231 if (!self->entity) {
2232 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002233 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002234 }
2235
2236 self->names = PyDict_New();
2237 if (!self->names) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002238 PyObject_Del(self->entity);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002239 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002240 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002241 }
2242
2243 memory_handler.malloc_fcn = PyObject_Malloc;
2244 memory_handler.realloc_fcn = PyObject_Realloc;
2245 memory_handler.free_fcn = PyObject_Free;
2246
2247 self->parser = EXPAT(ParserCreate_MM)(encoding, &memory_handler, "}");
2248 if (!self->parser) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002249 PyObject_Del(self->names);
2250 PyObject_Del(self->entity);
2251 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002252 PyErr_NoMemory();
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002253 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002254 }
2255
2256 /* setup target handlers */
2257 if (!target) {
2258 target = treebuilder_new();
2259 if (!target) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002260 EXPAT(ParserFree)(self->parser);
2261 PyObject_Del(self->names);
2262 PyObject_Del(self->entity);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002263 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002264 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002265 }
2266 } else
2267 Py_INCREF(target);
2268 self->target = target;
2269
2270 self->handle_xml = PyObject_GetAttrString(target, "xml");
2271 self->handle_start = PyObject_GetAttrString(target, "start");
2272 self->handle_data = PyObject_GetAttrString(target, "data");
2273 self->handle_end = PyObject_GetAttrString(target, "end");
2274 self->handle_comment = PyObject_GetAttrString(target, "comment");
2275 self->handle_pi = PyObject_GetAttrString(target, "pi");
2276
2277 PyErr_Clear();
2278
2279 /* configure parser */
2280 EXPAT(SetUserData)(self->parser, self);
2281 EXPAT(SetElementHandler)(
2282 self->parser,
2283 (XML_StartElementHandler) expat_start_handler,
2284 (XML_EndElementHandler) expat_end_handler
2285 );
2286 EXPAT(SetDefaultHandlerExpand)(
2287 self->parser,
2288 (XML_DefaultHandler) expat_default_handler
2289 );
2290 EXPAT(SetCharacterDataHandler)(
2291 self->parser,
2292 (XML_CharacterDataHandler) expat_data_handler
2293 );
2294 if (self->handle_comment)
2295 EXPAT(SetCommentHandler)(
2296 self->parser,
2297 (XML_CommentHandler) expat_comment_handler
2298 );
2299 if (self->handle_pi)
2300 EXPAT(SetProcessingInstructionHandler)(
2301 self->parser,
2302 (XML_ProcessingInstructionHandler) expat_pi_handler
2303 );
2304#if defined(Py_USING_UNICODE)
2305 EXPAT(SetUnknownEncodingHandler)(
2306 self->parser,
2307 (XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL
2308 );
2309#endif
2310
2311 ALLOC(sizeof(XMLParserObject), "create expatparser");
2312
2313 return (PyObject*) self;
2314}
2315
2316static void
2317xmlparser_dealloc(XMLParserObject* self)
2318{
2319 EXPAT(ParserFree)(self->parser);
2320
2321 Py_XDECREF(self->handle_pi);
2322 Py_XDECREF(self->handle_comment);
2323 Py_XDECREF(self->handle_end);
2324 Py_XDECREF(self->handle_data);
2325 Py_XDECREF(self->handle_start);
2326 Py_XDECREF(self->handle_xml);
2327
2328 Py_DECREF(self->target);
2329 Py_DECREF(self->entity);
2330 Py_DECREF(self->names);
2331
2332 RELEASE(sizeof(XMLParserObject), "destroy expatparser");
2333
2334 PyObject_Del(self);
2335}
2336
2337/* -------------------------------------------------------------------- */
2338/* methods (in alphabetical order) */
2339
2340LOCAL(PyObject*)
2341expat_parse(XMLParserObject* self, char* data, int data_len, int final)
2342{
2343 int ok;
2344
2345 ok = EXPAT(Parse)(self->parser, data, data_len, final);
2346
2347 if (PyErr_Occurred())
2348 return NULL;
2349
2350 if (!ok) {
2351 PyErr_Format(
2352 PyExc_SyntaxError, "%s: line %d, column %d",
2353 EXPAT(ErrorString)(EXPAT(GetErrorCode)(self->parser)),
2354 EXPAT(GetErrorLineNumber)(self->parser),
2355 EXPAT(GetErrorColumnNumber)(self->parser)
2356 );
2357 return NULL;
2358 }
2359
2360 Py_RETURN_NONE;
2361}
2362
2363static PyObject*
2364xmlparser_close(XMLParserObject* self, PyObject* args)
2365{
2366 /* end feeding data to parser */
2367
2368 PyObject* res;
2369 if (!PyArg_ParseTuple(args, ":close"))
2370 return NULL;
2371
2372 res = expat_parse(self, "", 0, 1);
2373
2374 if (res && TreeBuilder_CheckExact(self->target)) {
2375 Py_DECREF(res);
2376 return treebuilder_done((TreeBuilderObject*) self->target);
2377 }
2378
2379 return res;
2380}
2381
2382static PyObject*
2383xmlparser_feed(XMLParserObject* self, PyObject* args)
2384{
2385 /* feed data to parser */
2386
2387 char* data;
2388 int data_len;
2389 if (!PyArg_ParseTuple(args, "s#:feed", &data, &data_len))
2390 return NULL;
2391
2392 return expat_parse(self, data, data_len, 0);
2393}
2394
2395static PyObject*
2396xmlparser_parse(XMLParserObject* self, PyObject* args)
2397{
2398 /* (internal) parse until end of input stream */
2399
2400 PyObject* reader;
2401 PyObject* buffer;
2402 PyObject* res;
2403
2404 PyObject* fileobj;
2405 if (!PyArg_ParseTuple(args, "O:_parse", &fileobj))
2406 return NULL;
2407
2408 reader = PyObject_GetAttrString(fileobj, "read");
2409 if (!reader)
2410 return NULL;
2411
2412 /* read from open file object */
2413 for (;;) {
2414
2415 buffer = PyObject_CallFunction(reader, "i", 64*1024);
2416
2417 if (!buffer) {
2418 /* read failed (e.g. due to KeyboardInterrupt) */
2419 Py_DECREF(reader);
2420 return NULL;
2421 }
2422
2423 if (!PyString_CheckExact(buffer) || PyString_GET_SIZE(buffer) == 0) {
2424 Py_DECREF(buffer);
2425 break;
2426 }
2427
2428 res = expat_parse(
2429 self, PyString_AS_STRING(buffer), PyString_GET_SIZE(buffer), 0
2430 );
2431
2432 Py_DECREF(buffer);
2433
2434 if (!res) {
2435 Py_DECREF(reader);
2436 return NULL;
2437 }
2438 Py_DECREF(res);
2439
2440 }
2441
2442 Py_DECREF(reader);
2443
2444 res = expat_parse(self, "", 0, 1);
2445
2446 if (res && TreeBuilder_CheckExact(self->target)) {
2447 Py_DECREF(res);
2448 return treebuilder_done((TreeBuilderObject*) self->target);
2449 }
2450
2451 return res;
2452}
2453
2454static PyObject*
2455xmlparser_setevents(XMLParserObject* self, PyObject* args)
2456{
2457 /* activate element event reporting */
2458
2459 int i;
2460 TreeBuilderObject* target;
2461
2462 PyObject* events; /* event collector */
2463 PyObject* event_set = Py_None;
2464 if (!PyArg_ParseTuple(args, "O!|O:_setevents", &PyList_Type, &events,
2465 &event_set))
2466 return NULL;
2467
2468 if (!TreeBuilder_CheckExact(self->target)) {
2469 PyErr_SetString(
2470 PyExc_TypeError,
2471 "event handling only supported for cElementTree.Treebuilder "
2472 "targets"
2473 );
2474 return NULL;
2475 }
2476
2477 target = (TreeBuilderObject*) self->target;
2478
2479 Py_INCREF(events);
2480 Py_XDECREF(target->events);
2481 target->events = events;
2482
2483 /* clear out existing events */
2484 Py_XDECREF(target->start_event_obj); target->start_event_obj = NULL;
2485 Py_XDECREF(target->end_event_obj); target->end_event_obj = NULL;
2486 Py_XDECREF(target->start_ns_event_obj); target->start_ns_event_obj = NULL;
2487 Py_XDECREF(target->end_ns_event_obj); target->end_ns_event_obj = NULL;
2488
2489 if (event_set == Py_None) {
2490 /* default is "end" only */
2491 target->end_event_obj = PyString_FromString("end");
2492 Py_RETURN_NONE;
2493 }
2494
2495 if (!PyTuple_Check(event_set)) /* FIXME: handle arbitrary sequences */
2496 goto error;
2497
2498 for (i = 0; i < PyTuple_GET_SIZE(event_set); i++) {
2499 PyObject* item = PyTuple_GET_ITEM(event_set, i);
2500 char* event;
2501 if (!PyString_Check(item))
2502 goto error;
2503 event = PyString_AS_STRING(item);
2504 if (strcmp(event, "start") == 0) {
2505 Py_INCREF(item);
2506 target->start_event_obj = item;
2507 } else if (strcmp(event, "end") == 0) {
2508 Py_INCREF(item);
2509 Py_XDECREF(target->end_event_obj);
2510 target->end_event_obj = item;
2511 } else if (strcmp(event, "start-ns") == 0) {
2512 Py_INCREF(item);
2513 Py_XDECREF(target->start_ns_event_obj);
2514 target->start_ns_event_obj = item;
2515 EXPAT(SetNamespaceDeclHandler)(
2516 self->parser,
2517 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2518 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2519 );
2520 } else if (strcmp(event, "end-ns") == 0) {
2521 Py_INCREF(item);
2522 Py_XDECREF(target->end_ns_event_obj);
2523 target->end_ns_event_obj = item;
2524 EXPAT(SetNamespaceDeclHandler)(
2525 self->parser,
2526 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2527 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2528 );
2529 } else {
2530 PyErr_Format(
2531 PyExc_ValueError,
2532 "unknown event '%s'", event
2533 );
2534 return NULL;
2535 }
2536 }
2537
2538 Py_RETURN_NONE;
2539
2540 error:
2541 PyErr_SetString(
2542 PyExc_TypeError,
2543 "invalid event tuple"
2544 );
2545 return NULL;
2546}
2547
2548static PyMethodDef xmlparser_methods[] = {
2549 {"feed", (PyCFunction) xmlparser_feed, METH_VARARGS},
2550 {"close", (PyCFunction) xmlparser_close, METH_VARARGS},
2551 {"_parse", (PyCFunction) xmlparser_parse, METH_VARARGS},
2552 {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
2553 {NULL, NULL}
2554};
2555
2556static PyObject*
2557xmlparser_getattr(XMLParserObject* self, char* name)
2558{
2559 PyObject* res;
2560
2561 res = Py_FindMethod(xmlparser_methods, (PyObject*) self, name);
2562 if (res)
2563 return res;
2564
2565 PyErr_Clear();
2566
2567 if (strcmp(name, "entity") == 0)
2568 res = self->entity;
2569 else if (strcmp(name, "target") == 0)
2570 res = self->target;
2571 else if (strcmp(name, "version") == 0) {
2572 char buffer[100];
2573 sprintf(buffer, "Expat %d.%d.%d", XML_MAJOR_VERSION,
2574 XML_MINOR_VERSION, XML_MICRO_VERSION);
2575 return PyString_FromString(buffer);
2576 } else {
2577 PyErr_SetString(PyExc_AttributeError, name);
2578 return NULL;
2579 }
2580
2581 Py_INCREF(res);
2582 return res;
2583}
2584
Neal Norwitz227b5332006-03-22 09:28:35 +00002585static PyTypeObject XMLParser_Type = {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002586 PyObject_HEAD_INIT(NULL)
2587 0, "XMLParser", sizeof(XMLParserObject), 0,
2588 /* methods */
2589 (destructor)xmlparser_dealloc, /* tp_dealloc */
2590 0, /* tp_print */
2591 (getattrfunc)xmlparser_getattr, /* tp_getattr */
2592};
2593
2594#endif
2595
2596/* ==================================================================== */
2597/* python module interface */
2598
2599static PyMethodDef _functions[] = {
2600 {"Element", (PyCFunction) element, METH_VARARGS|METH_KEYWORDS},
2601 {"SubElement", (PyCFunction) subelement, METH_VARARGS|METH_KEYWORDS},
2602 {"TreeBuilder", (PyCFunction) treebuilder, METH_VARARGS},
2603#if defined(USE_EXPAT)
2604 {"XMLParser", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
2605 {"XMLTreeBuilder", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
2606#endif
2607 {NULL, NULL}
2608};
2609
2610DL_EXPORT(void)
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002611init_elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002612{
2613 PyObject* m;
2614 PyObject* g;
2615 char* bootstrap;
2616#if defined(USE_PYEXPAT_CAPI)
2617 struct PyExpat_CAPI* capi;
2618#endif
2619
2620 /* Patch object type */
2621 Element_Type.ob_type = TreeBuilder_Type.ob_type = &PyType_Type;
2622#if defined(USE_EXPAT)
2623 XMLParser_Type.ob_type = &PyType_Type;
2624#endif
2625
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002626 m = Py_InitModule("_elementtree", _functions);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002627 if (!m)
2628 return;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002629
2630 /* python glue code */
2631
2632 g = PyDict_New();
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002633 if (!g)
2634 return;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002635
2636 PyDict_SetItemString(g, "__builtins__", PyEval_GetBuiltins());
2637
2638 bootstrap = (
2639
2640#if (PY_VERSION_HEX >= 0x02020000 && PY_VERSION_HEX < 0x02030000)
2641 "from __future__ import generators\n" /* enable yield under 2.2 */
2642#endif
2643
2644 "from copy import copy, deepcopy\n"
2645
2646 "try:\n"
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002647 " from xml.etree import ElementTree\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002648 "except ImportError:\n"
2649 " import ElementTree\n"
2650 "ET = ElementTree\n"
2651 "del ElementTree\n"
2652
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002653 "import _elementtree as cElementTree\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002654
2655 "try:\n" /* check if copy works as is */
2656 " copy(cElementTree.Element('x'))\n"
2657 "except:\n"
2658 " def copyelement(elem):\n"
2659 " return elem\n"
2660
2661 "def Comment(text=None):\n" /* public */
2662 " element = cElementTree.Element(ET.Comment)\n"
2663 " element.text = text\n"
2664 " return element\n"
2665 "cElementTree.Comment = Comment\n"
2666
2667 "class ElementTree(ET.ElementTree):\n" /* public */
2668 " def parse(self, source, parser=None):\n"
2669 " if not hasattr(source, 'read'):\n"
2670 " source = open(source, 'rb')\n"
2671 " if parser is not None:\n"
2672 " while 1:\n"
2673 " data = source.read(65536)\n"
2674 " if not data:\n"
2675 " break\n"
2676 " parser.feed(data)\n"
2677 " self._root = parser.close()\n"
2678 " else:\n"
2679 " parser = cElementTree.XMLParser()\n"
2680 " self._root = parser._parse(source)\n"
2681 " return self._root\n"
2682 "cElementTree.ElementTree = ElementTree\n"
2683
2684 "def getiterator(node, tag=None):\n" /* helper */
2685 " if tag == '*':\n"
2686 " tag = None\n"
2687#if (PY_VERSION_HEX < 0x02020000)
2688 " nodes = []\n" /* 2.1 doesn't have yield */
2689 " if tag is None or node.tag == tag:\n"
2690 " nodes.append(node)\n"
2691 " for node in node:\n"
2692 " nodes.extend(getiterator(node, tag))\n"
2693 " return nodes\n"
2694#else
2695 " if tag is None or node.tag == tag:\n"
2696 " yield node\n"
2697 " for node in node:\n"
2698 " for node in getiterator(node, tag):\n"
2699 " yield node\n"
2700#endif
2701
2702 "def parse(source, parser=None):\n" /* public */
2703 " tree = ElementTree()\n"
2704 " tree.parse(source, parser)\n"
2705 " return tree\n"
2706 "cElementTree.parse = parse\n"
2707
2708#if (PY_VERSION_HEX < 0x02020000)
2709 "if hasattr(ET, 'iterparse'):\n"
2710 " cElementTree.iterparse = ET.iterparse\n" /* delegate on 2.1 */
2711#else
2712 "class iterparse(object):\n"
2713 " root = None\n"
2714 " def __init__(self, file, events=None):\n"
2715 " if not hasattr(file, 'read'):\n"
2716 " file = open(file, 'rb')\n"
2717 " self._file = file\n"
2718 " self._events = events\n"
2719 " def __iter__(self):\n"
2720 " events = []\n"
2721 " b = cElementTree.TreeBuilder()\n"
2722 " p = cElementTree.XMLParser(b)\n"
2723 " p._setevents(events, self._events)\n"
2724 " while 1:\n"
2725 " data = self._file.read(16384)\n"
2726 " if not data:\n"
2727 " break\n"
2728 " p.feed(data)\n"
2729 " for event in events:\n"
2730 " yield event\n"
2731 " del events[:]\n"
2732 " root = p.close()\n"
2733 " for event in events:\n"
2734 " yield event\n"
2735 " self.root = root\n"
2736 "cElementTree.iterparse = iterparse\n"
2737#endif
2738
2739 "def PI(target, text=None):\n" /* public */
2740 " element = cElementTree.Element(ET.ProcessingInstruction)\n"
2741 " element.text = target\n"
2742 " if text:\n"
2743 " element.text = element.text + ' ' + text\n"
2744 " return element\n"
2745
2746 " elem = cElementTree.Element(ET.PI)\n"
2747 " elem.text = text\n"
2748 " return elem\n"
2749 "cElementTree.PI = cElementTree.ProcessingInstruction = PI\n"
2750
2751 "def XML(text):\n" /* public */
2752 " parser = cElementTree.XMLParser()\n"
2753 " parser.feed(text)\n"
2754 " return parser.close()\n"
2755 "cElementTree.XML = cElementTree.fromstring = XML\n"
2756
2757 "def XMLID(text):\n" /* public */
2758 " tree = XML(text)\n"
2759 " ids = {}\n"
2760 " for elem in tree.getiterator():\n"
2761 " id = elem.get('id')\n"
2762 " if id:\n"
2763 " ids[id] = elem\n"
2764 " return tree, ids\n"
2765 "cElementTree.XMLID = XMLID\n"
2766
2767 "cElementTree.dump = ET.dump\n"
2768 "cElementTree.ElementPath = ElementPath = ET.ElementPath\n"
2769 "cElementTree.iselement = ET.iselement\n"
2770 "cElementTree.QName = ET.QName\n"
2771 "cElementTree.tostring = ET.tostring\n"
2772 "cElementTree.VERSION = '" VERSION "'\n"
2773 "cElementTree.__version__ = '" VERSION "'\n"
2774 "cElementTree.XMLParserError = SyntaxError\n"
2775
2776 );
2777
2778 PyRun_String(bootstrap, Py_file_input, g, NULL);
2779
2780 elementpath_obj = PyDict_GetItemString(g, "ElementPath");
2781
2782 elementtree_copyelement_obj = PyDict_GetItemString(g, "copyelement");
2783 if (elementtree_copyelement_obj) {
2784 /* reduce hack needed; enable reduce method */
2785 PyMethodDef* mp;
2786 for (mp = element_methods; mp->ml_name; mp++)
2787 if (mp->ml_meth == (PyCFunction) element_reduce) {
2788 mp->ml_name = "__reduce__";
2789 break;
2790 }
2791 } else
2792 PyErr_Clear();
2793 elementtree_deepcopy_obj = PyDict_GetItemString(g, "deepcopy");
2794 elementtree_getiterator_obj = PyDict_GetItemString(g, "getiterator");
2795
2796#if defined(USE_PYEXPAT_CAPI)
2797 /* link against pyexpat, if possible */
2798 capi = PyCObject_Import("pyexpat", "expat_CAPI");
2799 if (capi &&
2800 strcmp(capi->magic, PyExpat_CAPI_MAGIC) == 0 &&
2801 capi->size <= sizeof(*expat_capi) &&
2802 capi->MAJOR_VERSION == XML_MAJOR_VERSION &&
2803 capi->MINOR_VERSION == XML_MINOR_VERSION &&
2804 capi->MICRO_VERSION == XML_MICRO_VERSION)
2805 expat_capi = capi;
2806 else
2807 expat_capi = NULL;
2808#endif
2809
2810}