blob: b468e71b3d4cd8884aa054ea1001db19a5ce481f [file] [log] [blame]
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001/*
2 * ElementTree
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003 * $Id: _elementtree.c 2657 2006-03-12 20:50:32Z fredrik $
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
5 * elementtree accelerator
6 *
7 * History:
8 * 1999-06-20 fl created (as part of sgmlop)
9 * 2001-05-29 fl effdom edition
10 * 2001-06-05 fl backported to unix; fixed bogus free in clear
11 * 2001-07-10 fl added findall helper
12 * 2003-02-27 fl elementtree edition (alpha)
13 * 2004-06-03 fl updates for elementtree 1.2
14 * 2005-01-05 fl added universal name cache, Element/SubElement factories
15 * 2005-01-06 fl moved python helpers into C module; removed 1.5.2 support
16 * 2005-01-07 fl added 2.1 support; work around broken __copy__ in 2.3
17 * 2005-01-08 fl added makeelement method; fixed path support
18 * 2005-01-10 fl optimized memory usage
19 * 2005-01-11 fl first public release (cElementTree 0.8)
20 * 2005-01-12 fl split element object into base and extras
21 * 2005-01-13 fl use tagged pointers for tail/text (cElementTree 0.9)
22 * 2005-01-17 fl added treebuilder close method
23 * 2005-01-17 fl fixed crash in getchildren
24 * 2005-01-18 fl removed observer api, added iterparse (cElementTree 0.9.3)
25 * 2005-01-23 fl revised iterparse api; added namespace event support (0.9.8)
26 * 2005-01-26 fl added VERSION module property (cElementTree 1.0)
27 * 2005-01-28 fl added remove method (1.0.1)
28 * 2005-03-01 fl added iselement function; fixed makeelement aliasing (1.0.2)
29 * 2005-03-13 fl export Comment and ProcessingInstruction/PI helpers
30 * 2005-03-26 fl added Comment and PI support to XMLParser
31 * 2005-03-27 fl event optimizations; complain about bogus events
32 * 2005-08-08 fl fixed read error handling in parse
33 * 2005-08-11 fl added runtime test for copy workaround (1.0.3)
34 * 2005-12-13 fl added expat_capi support (for xml.etree) (1.0.4)
35 * 2005-12-16 fl added support for non-standard encodings
Fredrik Lundh44ed4db2006-03-12 21:06:35 +000036 * 2006-03-08 fl fixed a couple of potential null-refs and leaks
37 * 2006-03-12 fl merge in 2.5 ssize_t changes
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000038 *
Fredrik Lundh44ed4db2006-03-12 21:06:35 +000039 * Copyright (c) 1999-2006 by Secret Labs AB. All rights reserved.
40 * Copyright (c) 1999-2006 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000041 *
42 * info@pythonware.com
43 * http://www.pythonware.com
44 */
45
Fredrik Lundh6d52b552005-12-16 22:06:43 +000046/* Licensed to PSF under a Contributor Agreement. */
47/* See http://www.python.org/2.4/license for licensing details. */
48
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000049#include "Python.h"
50
Fredrik Lundh44ed4db2006-03-12 21:06:35 +000051#define VERSION "1.0.6-snapshot"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000052
53/* -------------------------------------------------------------------- */
54/* configuration */
55
56/* Leave defined to include the expat-based XMLParser type */
57#define USE_EXPAT
58
59/* Define to to all expat calls via pyexpat's embedded expat library */
60/* #define USE_PYEXPAT_CAPI */
61
62/* An element can hold this many children without extra memory
63 allocations. */
64#define STATIC_CHILDREN 4
65
66/* For best performance, chose a value so that 80-90% of all nodes
67 have no more than the given number of children. Set this to zero
68 to minimize the size of the element structure itself (this only
69 helps if you have lots of leaf nodes with attributes). */
70
71/* Also note that pymalloc always allocates blocks in multiples of
72 eight bytes. For the current version of cElementTree, this means
73 that the number of children should be an even number, at least on
74 32-bit platforms. */
75
76/* -------------------------------------------------------------------- */
77
78#if 0
79static int memory = 0;
80#define ALLOC(size, comment)\
81do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
82#define RELEASE(size, comment)\
83do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
84#else
85#define ALLOC(size, comment)
86#define RELEASE(size, comment)
87#endif
88
89/* compiler tweaks */
90#if defined(_MSC_VER)
91#define LOCAL(type) static __inline type __fastcall
92#else
93#define LOCAL(type) static type
94#endif
95
96/* compatibility macros */
Martin v. Löwis18e16552006-02-15 17:27:45 +000097#if (PY_VERSION_HEX < 0x02050000)
98typedef int Py_ssize_t;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +000099#define lenfunc inquiry
Martin v. Löwis18e16552006-02-15 17:27:45 +0000100#endif
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000101
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000102#if (PY_VERSION_HEX < 0x02040000)
103#define PyDict_CheckExact PyDict_Check
104#if (PY_VERSION_HEX < 0x02020000)
105#define PyList_CheckExact PyList_Check
106#define PyString_CheckExact PyString_Check
107#if (PY_VERSION_HEX >= 0x01060000)
108#define Py_USING_UNICODE /* always enabled for 2.0 and 2.1 */
109#endif
110#endif
111#endif
112
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000113#if !defined(Py_RETURN_NONE)
114#define Py_RETURN_NONE return Py_INCREF(Py_None), Py_None
115#endif
116
117/* macros used to store 'join' flags in string object pointers. note
118 that all use of text and tail as object pointers must be wrapped in
119 JOIN_OBJ. see comments in the ElementObject definition for more
120 info. */
121#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
122#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
123#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~1))
124
125/* glue functions (see the init function for details) */
126static PyObject* elementtree_copyelement_obj;
127static PyObject* elementtree_deepcopy_obj;
128static PyObject* elementtree_getiterator_obj;
129static PyObject* elementpath_obj;
130
131/* helpers */
132
133LOCAL(PyObject*)
134deepcopy(PyObject* object, PyObject* memo)
135{
136 /* do a deep copy of the given object */
137
138 PyObject* args;
139 PyObject* result;
140
141 if (!elementtree_deepcopy_obj) {
142 PyErr_SetString(
143 PyExc_RuntimeError,
144 "deepcopy helper not found"
145 );
146 return NULL;
147 }
148
149 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000150 if (!args)
151 return NULL;
152
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000153 Py_INCREF(object); PyTuple_SET_ITEM(args, 0, (PyObject*) object);
154 Py_INCREF(memo); PyTuple_SET_ITEM(args, 1, (PyObject*) memo);
155
156 result = PyObject_CallObject(elementtree_deepcopy_obj, args);
157
158 Py_DECREF(args);
159
160 return result;
161}
162
163LOCAL(PyObject*)
164list_join(PyObject* list)
165{
166 /* join list elements (destroying the list in the process) */
167
168 PyObject* joiner;
169 PyObject* function;
170 PyObject* args;
171 PyObject* result;
172
173 switch (PyList_GET_SIZE(list)) {
174 case 0:
175 Py_DECREF(list);
176 return PyString_FromString("");
177 case 1:
178 result = PyList_GET_ITEM(list, 0);
179 Py_INCREF(result);
180 Py_DECREF(list);
181 return result;
182 }
183
184 /* two or more elements: slice out a suitable separator from the
185 first member, and use that to join the entire list */
186
187 joiner = PySequence_GetSlice(PyList_GET_ITEM(list, 0), 0, 0);
188 if (!joiner)
189 return NULL;
190
191 function = PyObject_GetAttrString(joiner, "join");
192 if (!function) {
193 Py_DECREF(joiner);
194 return NULL;
195 }
196
197 args = PyTuple_New(1);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000198 if (!args)
199 return NULL;
200
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000201 PyTuple_SET_ITEM(args, 0, list);
202
203 result = PyObject_CallObject(function, args);
204
205 Py_DECREF(args); /* also removes list */
206 Py_DECREF(function);
207 Py_DECREF(joiner);
208
209 return result;
210}
211
212#if (PY_VERSION_HEX < 0x02020000)
213LOCAL(int)
214PyDict_Update(PyObject* dict, PyObject* other)
215{
216 /* PyDict_Update emulation for 2.1 and earlier */
217
218 PyObject* res;
219
220 res = PyObject_CallMethod(dict, "update", "O", other);
221 if (!res)
222 return -1;
223
224 Py_DECREF(res);
225 return 0;
226}
227#endif
228
229/* -------------------------------------------------------------------- */
230/* the element type */
231
232typedef struct {
233
234 /* attributes (a dictionary object), or None if no attributes */
235 PyObject* attrib;
236
237 /* child elements */
238 int length; /* actual number of items */
239 int allocated; /* allocated items */
240
241 /* this either points to _children or to a malloced buffer */
242 PyObject* *children;
243
244 PyObject* _children[STATIC_CHILDREN];
245
246} ElementObjectExtra;
247
248typedef struct {
249 PyObject_HEAD
250
251 /* element tag (a string). */
252 PyObject* tag;
253
254 /* text before first child. note that this is a tagged pointer;
255 use JOIN_OBJ to get the object pointer. the join flag is used
256 to distinguish lists created by the tree builder from lists
257 assigned to the attribute by application code; the former
258 should be joined before being returned to the user, the latter
259 should be left intact. */
260 PyObject* text;
261
262 /* text after this element, in parent. note that this is a tagged
263 pointer; use JOIN_OBJ to get the object pointer. */
264 PyObject* tail;
265
266 ElementObjectExtra* extra;
267
268} ElementObject;
269
270staticforward PyTypeObject Element_Type;
271
272#define Element_CheckExact(op) ((op)->ob_type == &Element_Type)
273
274/* -------------------------------------------------------------------- */
275/* element constructor and destructor */
276
277LOCAL(int)
278element_new_extra(ElementObject* self, PyObject* attrib)
279{
280 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
281 if (!self->extra)
282 return -1;
283
284 if (!attrib)
285 attrib = Py_None;
286
287 Py_INCREF(attrib);
288 self->extra->attrib = attrib;
289
290 self->extra->length = 0;
291 self->extra->allocated = STATIC_CHILDREN;
292 self->extra->children = self->extra->_children;
293
294 return 0;
295}
296
297LOCAL(void)
298element_dealloc_extra(ElementObject* self)
299{
300 int i;
301
302 Py_DECREF(self->extra->attrib);
303
304 for (i = 0; i < self->extra->length; i++)
305 Py_DECREF(self->extra->children[i]);
306
307 if (self->extra->children != self->extra->_children)
308 PyObject_Free(self->extra->children);
309
310 PyObject_Free(self->extra);
311}
312
313LOCAL(PyObject*)
314element_new(PyObject* tag, PyObject* attrib)
315{
316 ElementObject* self;
317
318 self = PyObject_New(ElementObject, &Element_Type);
319 if (self == NULL)
320 return NULL;
321
322 /* use None for empty dictionaries */
323 if (PyDict_CheckExact(attrib) && !PyDict_Size(attrib))
324 attrib = Py_None;
325
326 self->extra = NULL;
327
328 if (attrib != Py_None) {
329
Neal Norwitzc6a989a2006-05-10 06:57:58 +0000330 if (element_new_extra(self, attrib) < 0) {
331 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000332 return NULL;
Neal Norwitzc6a989a2006-05-10 06:57:58 +0000333 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000334
335 self->extra->length = 0;
336 self->extra->allocated = STATIC_CHILDREN;
337 self->extra->children = self->extra->_children;
338
339 }
340
341 Py_INCREF(tag);
342 self->tag = tag;
343
344 Py_INCREF(Py_None);
345 self->text = Py_None;
346
347 Py_INCREF(Py_None);
348 self->tail = Py_None;
349
350 ALLOC(sizeof(ElementObject), "create element");
351
352 return (PyObject*) self;
353}
354
355LOCAL(int)
356element_resize(ElementObject* self, int extra)
357{
358 int size;
359 PyObject* *children;
360
361 /* make sure self->children can hold the given number of extra
362 elements. set an exception and return -1 if allocation failed */
363
364 if (!self->extra)
365 element_new_extra(self, NULL);
366
367 size = self->extra->length + extra;
368
369 if (size > self->extra->allocated) {
370 /* use Python 2.4's list growth strategy */
371 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
372 if (self->extra->children != self->extra->_children) {
373 children = PyObject_Realloc(self->extra->children,
374 size * sizeof(PyObject*));
375 if (!children)
376 goto nomemory;
377 } else {
378 children = PyObject_Malloc(size * sizeof(PyObject*));
379 if (!children)
380 goto nomemory;
381 /* copy existing children from static area to malloc buffer */
382 memcpy(children, self->extra->children,
383 self->extra->length * sizeof(PyObject*));
384 }
385 self->extra->children = children;
386 self->extra->allocated = size;
387 }
388
389 return 0;
390
391 nomemory:
392 PyErr_NoMemory();
393 return -1;
394}
395
396LOCAL(int)
397element_add_subelement(ElementObject* self, PyObject* element)
398{
399 /* add a child element to a parent */
400
401 if (element_resize(self, 1) < 0)
402 return -1;
403
404 Py_INCREF(element);
405 self->extra->children[self->extra->length] = element;
406
407 self->extra->length++;
408
409 return 0;
410}
411
412LOCAL(PyObject*)
413element_get_attrib(ElementObject* self)
414{
415 /* return borrowed reference to attrib dictionary */
416 /* note: this function assumes that the extra section exists */
417
418 PyObject* res = self->extra->attrib;
419
420 if (res == Py_None) {
421 /* create missing dictionary */
422 res = PyDict_New();
423 if (!res)
424 return NULL;
425 self->extra->attrib = res;
426 }
427
428 return res;
429}
430
431LOCAL(PyObject*)
432element_get_text(ElementObject* self)
433{
434 /* return borrowed reference to text attribute */
435
436 PyObject* res = self->text;
437
438 if (JOIN_GET(res)) {
439 res = JOIN_OBJ(res);
440 if (PyList_CheckExact(res)) {
441 res = list_join(res);
442 if (!res)
443 return NULL;
444 self->text = res;
445 }
446 }
447
448 return res;
449}
450
451LOCAL(PyObject*)
452element_get_tail(ElementObject* self)
453{
454 /* return borrowed reference to text attribute */
455
456 PyObject* res = self->tail;
457
458 if (JOIN_GET(res)) {
459 res = JOIN_OBJ(res);
460 if (PyList_CheckExact(res)) {
461 res = list_join(res);
462 if (!res)
463 return NULL;
464 self->tail = res;
465 }
466 }
467
468 return res;
469}
470
471static PyObject*
472element(PyObject* self, PyObject* args, PyObject* kw)
473{
474 PyObject* elem;
475
476 PyObject* tag;
477 PyObject* attrib = NULL;
478 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag,
479 &PyDict_Type, &attrib))
480 return NULL;
481
482 if (attrib || kw) {
483 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
484 if (!attrib)
485 return NULL;
486 if (kw)
487 PyDict_Update(attrib, kw);
488 } else {
489 Py_INCREF(Py_None);
490 attrib = Py_None;
491 }
492
493 elem = element_new(tag, attrib);
494
495 Py_DECREF(attrib);
496
497 return elem;
498}
499
500static PyObject*
501subelement(PyObject* self, PyObject* args, PyObject* kw)
502{
503 PyObject* elem;
504
505 ElementObject* parent;
506 PyObject* tag;
507 PyObject* attrib = NULL;
508 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
509 &Element_Type, &parent, &tag,
510 &PyDict_Type, &attrib))
511 return NULL;
512
513 if (attrib || kw) {
514 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
515 if (!attrib)
516 return NULL;
517 if (kw)
518 PyDict_Update(attrib, kw);
519 } else {
520 Py_INCREF(Py_None);
521 attrib = Py_None;
522 }
523
524 elem = element_new(tag, attrib);
525
526 Py_DECREF(attrib);
527
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000528 if (element_add_subelement(parent, elem) < 0) {
529 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000530 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000531 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000532
533 return elem;
534}
535
536static void
537element_dealloc(ElementObject* self)
538{
539 if (self->extra)
540 element_dealloc_extra(self);
541
542 /* discard attributes */
543 Py_DECREF(self->tag);
544 Py_DECREF(JOIN_OBJ(self->text));
545 Py_DECREF(JOIN_OBJ(self->tail));
546
547 RELEASE(sizeof(ElementObject), "destroy element");
548
549 PyObject_Del(self);
550}
551
552/* -------------------------------------------------------------------- */
553/* methods (in alphabetical order) */
554
555static PyObject*
556element_append(ElementObject* self, PyObject* args)
557{
558 PyObject* element;
559 if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element))
560 return NULL;
561
562 if (element_add_subelement(self, element) < 0)
563 return NULL;
564
565 Py_RETURN_NONE;
566}
567
568static PyObject*
569element_clear(ElementObject* self, PyObject* args)
570{
571 if (!PyArg_ParseTuple(args, ":clear"))
572 return NULL;
573
574 if (self->extra) {
575 element_dealloc_extra(self);
576 self->extra = NULL;
577 }
578
579 Py_INCREF(Py_None);
580 Py_DECREF(JOIN_OBJ(self->text));
581 self->text = Py_None;
582
583 Py_INCREF(Py_None);
584 Py_DECREF(JOIN_OBJ(self->tail));
585 self->tail = Py_None;
586
587 Py_RETURN_NONE;
588}
589
590static PyObject*
591element_copy(ElementObject* self, PyObject* args)
592{
593 int i;
594 ElementObject* element;
595
596 if (!PyArg_ParseTuple(args, ":__copy__"))
597 return NULL;
598
599 element = (ElementObject*) element_new(
600 self->tag, (self->extra) ? self->extra->attrib : Py_None
601 );
602 if (!element)
603 return NULL;
604
605 Py_DECREF(JOIN_OBJ(element->text));
606 element->text = self->text;
607 Py_INCREF(JOIN_OBJ(element->text));
608
609 Py_DECREF(JOIN_OBJ(element->tail));
610 element->tail = self->tail;
611 Py_INCREF(JOIN_OBJ(element->tail));
612
613 if (self->extra) {
614
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000615 if (element_resize(element, self->extra->length) < 0) {
616 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000617 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000618 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000619
620 for (i = 0; i < self->extra->length; i++) {
621 Py_INCREF(self->extra->children[i]);
622 element->extra->children[i] = self->extra->children[i];
623 }
624
625 element->extra->length = self->extra->length;
626
627 }
628
629 return (PyObject*) element;
630}
631
632static PyObject*
633element_deepcopy(ElementObject* self, PyObject* args)
634{
635 int i;
636 ElementObject* element;
637 PyObject* tag;
638 PyObject* attrib;
639 PyObject* text;
640 PyObject* tail;
641 PyObject* id;
642
643 PyObject* memo;
644 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
645 return NULL;
646
647 tag = deepcopy(self->tag, memo);
648 if (!tag)
649 return NULL;
650
651 if (self->extra) {
652 attrib = deepcopy(self->extra->attrib, memo);
653 if (!attrib) {
654 Py_DECREF(tag);
655 return NULL;
656 }
657 } else {
658 Py_INCREF(Py_None);
659 attrib = Py_None;
660 }
661
662 element = (ElementObject*) element_new(tag, attrib);
663
664 Py_DECREF(tag);
665 Py_DECREF(attrib);
666
667 if (!element)
668 return NULL;
669
670 text = deepcopy(JOIN_OBJ(self->text), memo);
671 if (!text)
672 goto error;
673 Py_DECREF(element->text);
674 element->text = JOIN_SET(text, JOIN_GET(self->text));
675
676 tail = deepcopy(JOIN_OBJ(self->tail), memo);
677 if (!tail)
678 goto error;
679 Py_DECREF(element->tail);
680 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
681
682 if (self->extra) {
683
684 if (element_resize(element, self->extra->length) < 0)
685 goto error;
686
687 for (i = 0; i < self->extra->length; i++) {
688 PyObject* child = deepcopy(self->extra->children[i], memo);
689 if (!child) {
690 element->extra->length = i;
691 goto error;
692 }
693 element->extra->children[i] = child;
694 }
695
696 element->extra->length = self->extra->length;
697
698 }
699
700 /* add object to memo dictionary (so deepcopy won't visit it again) */
701 id = PyInt_FromLong((Py_uintptr_t) self);
702
703 i = PyDict_SetItem(memo, id, (PyObject*) element);
704
705 Py_DECREF(id);
706
707 if (i < 0)
708 goto error;
709
710 return (PyObject*) element;
711
712 error:
713 Py_DECREF(element);
714 return NULL;
715}
716
717LOCAL(int)
718checkpath(PyObject* tag)
719{
Neal Norwitzc7074382006-06-12 02:06:17 +0000720 Py_ssize_t i;
721 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000722
723 /* check if a tag contains an xpath character */
724
725#define PATHCHAR(ch) (ch == '/' || ch == '*' || ch == '[' || ch == '@')
726
727#if defined(Py_USING_UNICODE)
728 if (PyUnicode_Check(tag)) {
729 Py_UNICODE *p = PyUnicode_AS_UNICODE(tag);
730 for (i = 0; i < PyUnicode_GET_SIZE(tag); i++) {
731 if (p[i] == '{')
732 check = 0;
733 else if (p[i] == '}')
734 check = 1;
735 else if (check && PATHCHAR(p[i]))
736 return 1;
737 }
738 return 0;
739 }
740#endif
741 if (PyString_Check(tag)) {
742 char *p = PyString_AS_STRING(tag);
743 for (i = 0; i < PyString_GET_SIZE(tag); i++) {
744 if (p[i] == '{')
745 check = 0;
746 else if (p[i] == '}')
747 check = 1;
748 else if (check && PATHCHAR(p[i]))
749 return 1;
750 }
751 return 0;
752 }
753
754 return 1; /* unknown type; might be path expression */
755}
756
757static PyObject*
758element_find(ElementObject* self, PyObject* args)
759{
760 int i;
761
762 PyObject* tag;
763 if (!PyArg_ParseTuple(args, "O:find", &tag))
764 return NULL;
765
766 if (checkpath(tag))
767 return PyObject_CallMethod(
768 elementpath_obj, "find", "OO", self, tag
769 );
770
771 if (!self->extra)
772 Py_RETURN_NONE;
773
774 for (i = 0; i < self->extra->length; i++) {
775 PyObject* item = self->extra->children[i];
776 if (Element_CheckExact(item) &&
777 PyObject_Compare(((ElementObject*)item)->tag, tag) == 0) {
778 Py_INCREF(item);
779 return item;
780 }
781 }
782
783 Py_RETURN_NONE;
784}
785
786static PyObject*
787element_findtext(ElementObject* self, PyObject* args)
788{
789 int i;
790
791 PyObject* tag;
792 PyObject* default_value = Py_None;
793 if (!PyArg_ParseTuple(args, "O|O:findtext", &tag, &default_value))
794 return NULL;
795
796 if (checkpath(tag))
797 return PyObject_CallMethod(
798 elementpath_obj, "findtext", "OOO", self, tag, default_value
799 );
800
801 if (!self->extra) {
802 Py_INCREF(default_value);
803 return default_value;
804 }
805
806 for (i = 0; i < self->extra->length; i++) {
807 ElementObject* item = (ElementObject*) self->extra->children[i];
808 if (Element_CheckExact(item) && !PyObject_Compare(item->tag, tag)) {
809 PyObject* text = element_get_text(item);
810 if (text == Py_None)
811 return PyString_FromString("");
812 Py_INCREF(text);
813 return text;
814 }
815 }
816
817 Py_INCREF(default_value);
818 return default_value;
819}
820
821static PyObject*
822element_findall(ElementObject* self, PyObject* args)
823{
824 int i;
825 PyObject* out;
826
827 PyObject* tag;
828 if (!PyArg_ParseTuple(args, "O:findall", &tag))
829 return NULL;
830
831 if (checkpath(tag))
832 return PyObject_CallMethod(
833 elementpath_obj, "findall", "OO", self, tag
834 );
835
836 out = PyList_New(0);
837 if (!out)
838 return NULL;
839
840 if (!self->extra)
841 return out;
842
843 for (i = 0; i < self->extra->length; i++) {
844 PyObject* item = self->extra->children[i];
845 if (Element_CheckExact(item) &&
846 PyObject_Compare(((ElementObject*)item)->tag, tag) == 0) {
847 if (PyList_Append(out, item) < 0) {
848 Py_DECREF(out);
849 return NULL;
850 }
851 }
852 }
853
854 return out;
855}
856
857static PyObject*
858element_get(ElementObject* self, PyObject* args)
859{
860 PyObject* value;
861
862 PyObject* key;
863 PyObject* default_value = Py_None;
864 if (!PyArg_ParseTuple(args, "O|O:get", &key, &default_value))
865 return NULL;
866
867 if (!self->extra || self->extra->attrib == Py_None)
868 value = default_value;
869 else {
870 value = PyDict_GetItem(self->extra->attrib, key);
871 if (!value)
872 value = default_value;
873 }
874
875 Py_INCREF(value);
876 return value;
877}
878
879static PyObject*
880element_getchildren(ElementObject* self, PyObject* args)
881{
882 int i;
883 PyObject* list;
884
885 if (!PyArg_ParseTuple(args, ":getchildren"))
886 return NULL;
887
888 if (!self->extra)
889 return PyList_New(0);
890
891 list = PyList_New(self->extra->length);
892 if (!list)
893 return NULL;
894
895 for (i = 0; i < self->extra->length; i++) {
896 PyObject* item = self->extra->children[i];
897 Py_INCREF(item);
898 PyList_SET_ITEM(list, i, item);
899 }
900
901 return list;
902}
903
904static PyObject*
905element_getiterator(ElementObject* self, PyObject* args)
906{
907 PyObject* result;
908
909 PyObject* tag = Py_None;
910 if (!PyArg_ParseTuple(args, "|O:getiterator", &tag))
911 return NULL;
912
913 if (!elementtree_getiterator_obj) {
914 PyErr_SetString(
915 PyExc_RuntimeError,
916 "getiterator helper not found"
917 );
918 return NULL;
919 }
920
921 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000922 if (!args)
923 return NULL;
Neal Norwitz02876df2006-02-07 06:58:52 +0000924
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000925 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
926 Py_INCREF(tag); PyTuple_SET_ITEM(args, 1, (PyObject*) tag);
927
928 result = PyObject_CallObject(elementtree_getiterator_obj, args);
929
930 Py_DECREF(args);
931
932 return result;
933}
934
935static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000936element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000937{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000938 ElementObject* self = (ElementObject*) self_;
939
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000940 if (!self->extra || index < 0 || index >= self->extra->length) {
941 PyErr_SetString(
942 PyExc_IndexError,
943 "child index out of range"
944 );
945 return NULL;
946 }
947
948 Py_INCREF(self->extra->children[index]);
949 return self->extra->children[index];
950}
951
952static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000953element_getslice(PyObject* self_, Py_ssize_t start, Py_ssize_t end)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000954{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000955 ElementObject* self = (ElementObject*) self_;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000956 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000957 PyObject* list;
958
959 if (!self->extra)
960 return PyList_New(0);
961
962 /* standard clamping */
963 if (start < 0)
964 start = 0;
965 if (end < 0)
966 end = 0;
967 if (end > self->extra->length)
968 end = self->extra->length;
969 if (start > end)
970 start = end;
971
972 list = PyList_New(end - start);
973 if (!list)
974 return NULL;
975
976 for (i = start; i < end; i++) {
977 PyObject* item = self->extra->children[i];
978 Py_INCREF(item);
979 PyList_SET_ITEM(list, i - start, item);
980 }
981
982 return list;
983}
984
985static PyObject*
986element_insert(ElementObject* self, PyObject* args)
987{
988 int i;
989
990 int index;
991 PyObject* element;
992 if (!PyArg_ParseTuple(args, "iO!:insert", &index,
993 &Element_Type, &element))
994 return NULL;
995
996 if (!self->extra)
997 element_new_extra(self, NULL);
998
999 if (index < 0)
1000 index = 0;
1001 if (index > self->extra->length)
1002 index = self->extra->length;
1003
1004 if (element_resize(self, 1) < 0)
1005 return NULL;
1006
1007 for (i = self->extra->length; i > index; i--)
1008 self->extra->children[i] = self->extra->children[i-1];
1009
1010 Py_INCREF(element);
1011 self->extra->children[index] = element;
1012
1013 self->extra->length++;
1014
1015 Py_RETURN_NONE;
1016}
1017
1018static PyObject*
1019element_items(ElementObject* self, PyObject* args)
1020{
1021 if (!PyArg_ParseTuple(args, ":items"))
1022 return NULL;
1023
1024 if (!self->extra || self->extra->attrib == Py_None)
1025 return PyList_New(0);
1026
1027 return PyDict_Items(self->extra->attrib);
1028}
1029
1030static PyObject*
1031element_keys(ElementObject* self, PyObject* args)
1032{
1033 if (!PyArg_ParseTuple(args, ":keys"))
1034 return NULL;
1035
1036 if (!self->extra || self->extra->attrib == Py_None)
1037 return PyList_New(0);
1038
1039 return PyDict_Keys(self->extra->attrib);
1040}
1041
Martin v. Löwis18e16552006-02-15 17:27:45 +00001042static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001043element_length(ElementObject* self)
1044{
1045 if (!self->extra)
1046 return 0;
1047
1048 return self->extra->length;
1049}
1050
1051static PyObject*
1052element_makeelement(PyObject* self, PyObject* args, PyObject* kw)
1053{
1054 PyObject* elem;
1055
1056 PyObject* tag;
1057 PyObject* attrib;
1058 if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib))
1059 return NULL;
1060
1061 attrib = PyDict_Copy(attrib);
1062 if (!attrib)
1063 return NULL;
1064
1065 elem = element_new(tag, attrib);
1066
1067 Py_DECREF(attrib);
1068
1069 return elem;
1070}
1071
1072static PyObject*
1073element_reduce(ElementObject* self, PyObject* args)
1074{
1075 if (!PyArg_ParseTuple(args, ":__reduce__"))
1076 return NULL;
1077
1078 /* Hack alert: This method is used to work around a __copy__
1079 problem on certain 2.3 and 2.4 versions. To save time and
1080 simplify the code, we create the copy in here, and use a dummy
1081 copyelement helper to trick the copy module into doing the
1082 right thing. */
1083
1084 if (!elementtree_copyelement_obj) {
1085 PyErr_SetString(
1086 PyExc_RuntimeError,
1087 "copyelement helper not found"
1088 );
1089 return NULL;
1090 }
1091
1092 return Py_BuildValue(
1093 "O(N)", elementtree_copyelement_obj, element_copy(self, args)
1094 );
1095}
1096
1097static PyObject*
1098element_remove(ElementObject* self, PyObject* args)
1099{
1100 int i;
1101
1102 PyObject* element;
1103 if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element))
1104 return NULL;
1105
1106 if (!self->extra) {
1107 /* element has no children, so raise exception */
1108 PyErr_SetString(
1109 PyExc_ValueError,
1110 "list.remove(x): x not in list"
1111 );
1112 return NULL;
1113 }
1114
1115 for (i = 0; i < self->extra->length; i++) {
1116 if (self->extra->children[i] == element)
1117 break;
1118 if (PyObject_Compare(self->extra->children[i], element) == 0)
1119 break;
1120 }
1121
1122 if (i == self->extra->length) {
1123 /* element is not in children, so raise exception */
1124 PyErr_SetString(
1125 PyExc_ValueError,
1126 "list.remove(x): x not in list"
1127 );
1128 return NULL;
1129 }
1130
1131 Py_DECREF(self->extra->children[i]);
1132
1133 self->extra->length--;
1134
1135 for (; i < self->extra->length; i++)
1136 self->extra->children[i] = self->extra->children[i+1];
1137
1138 Py_RETURN_NONE;
1139}
1140
1141static PyObject*
1142element_repr(ElementObject* self)
1143{
1144 PyObject* repr;
1145 char buffer[100];
1146
1147 repr = PyString_FromString("<Element ");
1148
1149 PyString_ConcatAndDel(&repr, PyObject_Repr(self->tag));
1150
1151 sprintf(buffer, " at %p>", self);
1152 PyString_ConcatAndDel(&repr, PyString_FromString(buffer));
1153
1154 return repr;
1155}
1156
1157static PyObject*
1158element_set(ElementObject* self, PyObject* args)
1159{
1160 PyObject* attrib;
1161
1162 PyObject* key;
1163 PyObject* value;
1164 if (!PyArg_ParseTuple(args, "OO:set", &key, &value))
1165 return NULL;
1166
1167 if (!self->extra)
1168 element_new_extra(self, NULL);
1169
1170 attrib = element_get_attrib(self);
1171 if (!attrib)
1172 return NULL;
1173
1174 if (PyDict_SetItem(attrib, key, value) < 0)
1175 return NULL;
1176
1177 Py_RETURN_NONE;
1178}
1179
1180static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001181element_setslice(PyObject* self_, Py_ssize_t start, Py_ssize_t end, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001182{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001183 ElementObject* self = (ElementObject*) self_;
Neal Norwitzc7074382006-06-12 02:06:17 +00001184 Py_ssize_t i, new, old;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001185 PyObject* recycle = NULL;
1186
1187 if (!self->extra)
1188 element_new_extra(self, NULL);
1189
1190 /* standard clamping */
1191 if (start < 0)
1192 start = 0;
1193 if (end < 0)
1194 end = 0;
1195 if (end > self->extra->length)
1196 end = self->extra->length;
1197 if (start > end)
1198 start = end;
1199
1200 old = end - start;
1201
1202 if (item == NULL)
1203 new = 0;
1204 else if (PyList_CheckExact(item)) {
1205 new = PyList_GET_SIZE(item);
1206 } else {
1207 /* FIXME: support arbitrary sequences? */
1208 PyErr_Format(
1209 PyExc_TypeError,
1210 "expected list, not \"%.200s\"", item->ob_type->tp_name
1211 );
1212 return -1;
1213 }
1214
1215 if (old > 0) {
1216 /* to avoid recursive calls to this method (via decref), move
1217 old items to the recycle bin here, and get rid of them when
1218 we're done modifying the element */
1219 recycle = PyList_New(old);
1220 for (i = 0; i < old; i++)
1221 PyList_SET_ITEM(recycle, i, self->extra->children[i + start]);
1222 }
1223
1224 if (new < old) {
1225 /* delete slice */
1226 for (i = end; i < self->extra->length; i++)
1227 self->extra->children[i + new - old] = self->extra->children[i];
1228 } else if (new > old) {
1229 /* insert slice */
1230 if (element_resize(self, new - old) < 0)
1231 return -1;
1232 for (i = self->extra->length-1; i >= end; i--)
1233 self->extra->children[i + new - old] = self->extra->children[i];
1234 }
1235
1236 /* replace the slice */
1237 for (i = 0; i < new; i++) {
1238 PyObject* element = PyList_GET_ITEM(item, i);
1239 Py_INCREF(element);
1240 self->extra->children[i + start] = element;
1241 }
1242
1243 self->extra->length += new - old;
1244
1245 /* discard the recycle bin, and everything in it */
1246 Py_XDECREF(recycle);
1247
1248 return 0;
1249}
1250
1251static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001252element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001253{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001254 ElementObject* self = (ElementObject*) self_;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001255 int i;
1256 PyObject* old;
1257
1258 if (!self->extra || index < 0 || index >= self->extra->length) {
1259 PyErr_SetString(
1260 PyExc_IndexError,
1261 "child assignment index out of range");
1262 return -1;
1263 }
1264
1265 old = self->extra->children[index];
1266
1267 if (item) {
1268 Py_INCREF(item);
1269 self->extra->children[index] = item;
1270 } else {
1271 self->extra->length--;
1272 for (i = index; i < self->extra->length; i++)
1273 self->extra->children[i] = self->extra->children[i+1];
1274 }
1275
1276 Py_DECREF(old);
1277
1278 return 0;
1279}
1280
1281static PyMethodDef element_methods[] = {
1282
1283 {"clear", (PyCFunction) element_clear, METH_VARARGS},
1284
1285 {"get", (PyCFunction) element_get, METH_VARARGS},
1286 {"set", (PyCFunction) element_set, METH_VARARGS},
1287
1288 {"find", (PyCFunction) element_find, METH_VARARGS},
1289 {"findtext", (PyCFunction) element_findtext, METH_VARARGS},
1290 {"findall", (PyCFunction) element_findall, METH_VARARGS},
1291
1292 {"append", (PyCFunction) element_append, METH_VARARGS},
1293 {"insert", (PyCFunction) element_insert, METH_VARARGS},
1294 {"remove", (PyCFunction) element_remove, METH_VARARGS},
1295
1296 {"getiterator", (PyCFunction) element_getiterator, METH_VARARGS},
1297 {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS},
1298
1299 {"items", (PyCFunction) element_items, METH_VARARGS},
1300 {"keys", (PyCFunction) element_keys, METH_VARARGS},
1301
1302 {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS},
1303
1304 {"__copy__", (PyCFunction) element_copy, METH_VARARGS},
1305 {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS},
1306
1307 /* Some 2.3 and 2.4 versions do not handle the __copy__ method on
1308 C objects correctly, so we have to fake it using a __reduce__-
1309 based hack (see the element_reduce implementation above for
1310 details). */
1311
1312 /* The behaviour has been changed in 2.3.5 and 2.4.1, so we're
1313 using a runtime test to figure out if we need to fake things
1314 or now (see the init code below). The following entry is
1315 enabled only if the hack is needed. */
1316
1317 {"!__reduce__", (PyCFunction) element_reduce, METH_VARARGS},
1318
1319 {NULL, NULL}
1320};
1321
1322static PyObject*
1323element_getattr(ElementObject* self, char* name)
1324{
1325 PyObject* res;
1326
1327 res = Py_FindMethod(element_methods, (PyObject*) self, name);
1328 if (res)
1329 return res;
1330
1331 PyErr_Clear();
1332
1333 if (strcmp(name, "tag") == 0)
1334 res = self->tag;
1335 else if (strcmp(name, "text") == 0)
1336 res = element_get_text(self);
1337 else if (strcmp(name, "tail") == 0) {
1338 res = element_get_tail(self);
1339 } else if (strcmp(name, "attrib") == 0) {
1340 if (!self->extra)
1341 element_new_extra(self, NULL);
1342 res = element_get_attrib(self);
1343 } else {
1344 PyErr_SetString(PyExc_AttributeError, name);
1345 return NULL;
1346 }
1347
1348 if (!res)
1349 return NULL;
1350
1351 Py_INCREF(res);
1352 return res;
1353}
1354
1355static int
1356element_setattr(ElementObject* self, const char* name, PyObject* value)
1357{
1358 if (value == NULL) {
1359 PyErr_SetString(
1360 PyExc_AttributeError,
1361 "can't delete element attributes"
1362 );
1363 return -1;
1364 }
1365
1366 if (strcmp(name, "tag") == 0) {
1367 Py_DECREF(self->tag);
1368 self->tag = value;
1369 Py_INCREF(self->tag);
1370 } else if (strcmp(name, "text") == 0) {
1371 Py_DECREF(JOIN_OBJ(self->text));
1372 self->text = value;
1373 Py_INCREF(self->text);
1374 } else if (strcmp(name, "tail") == 0) {
1375 Py_DECREF(JOIN_OBJ(self->tail));
1376 self->tail = value;
1377 Py_INCREF(self->tail);
1378 } else if (strcmp(name, "attrib") == 0) {
1379 if (!self->extra)
1380 element_new_extra(self, NULL);
1381 Py_DECREF(self->extra->attrib);
1382 self->extra->attrib = value;
1383 Py_INCREF(self->extra->attrib);
1384 } else {
1385 PyErr_SetString(PyExc_AttributeError, name);
1386 return -1;
1387 }
1388
1389 return 0;
1390}
1391
1392static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001393 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001394 0, /* sq_concat */
1395 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001396 element_getitem,
1397 element_getslice,
1398 element_setitem,
1399 element_setslice,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001400};
1401
1402statichere PyTypeObject Element_Type = {
1403 PyObject_HEAD_INIT(NULL)
1404 0, "Element", sizeof(ElementObject), 0,
1405 /* methods */
1406 (destructor)element_dealloc, /* tp_dealloc */
1407 0, /* tp_print */
1408 (getattrfunc)element_getattr, /* tp_getattr */
1409 (setattrfunc)element_setattr, /* tp_setattr */
1410 0, /* tp_compare */
1411 (reprfunc)element_repr, /* tp_repr */
1412 0, /* tp_as_number */
1413 &element_as_sequence, /* tp_as_sequence */
1414};
1415
1416/* ==================================================================== */
1417/* the tree builder type */
1418
1419typedef struct {
1420 PyObject_HEAD
1421
1422 PyObject* root; /* root node (first created node) */
1423
1424 ElementObject* this; /* current node */
1425 ElementObject* last; /* most recently created node */
1426
1427 PyObject* data; /* data collector (string or list), or NULL */
1428
1429 PyObject* stack; /* element stack */
Neal Norwitzc7074382006-06-12 02:06:17 +00001430 Py_ssize_t index; /* current stack size (0=empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001431
1432 /* element tracing */
1433 PyObject* events; /* list of events, or NULL if not collecting */
1434 PyObject* start_event_obj; /* event objects (NULL to ignore) */
1435 PyObject* end_event_obj;
1436 PyObject* start_ns_event_obj;
1437 PyObject* end_ns_event_obj;
1438
1439} TreeBuilderObject;
1440
1441staticforward PyTypeObject TreeBuilder_Type;
1442
1443#define TreeBuilder_CheckExact(op) ((op)->ob_type == &TreeBuilder_Type)
1444
1445/* -------------------------------------------------------------------- */
1446/* constructor and destructor */
1447
1448LOCAL(PyObject*)
1449treebuilder_new(void)
1450{
1451 TreeBuilderObject* self;
1452
1453 self = PyObject_New(TreeBuilderObject, &TreeBuilder_Type);
1454 if (self == NULL)
1455 return NULL;
1456
1457 self->root = NULL;
1458
1459 Py_INCREF(Py_None);
1460 self->this = (ElementObject*) Py_None;
1461
1462 Py_INCREF(Py_None);
1463 self->last = (ElementObject*) Py_None;
1464
1465 self->data = NULL;
1466
1467 self->stack = PyList_New(20);
1468 self->index = 0;
1469
1470 self->events = NULL;
1471 self->start_event_obj = self->end_event_obj = NULL;
1472 self->start_ns_event_obj = self->end_ns_event_obj = NULL;
1473
1474 ALLOC(sizeof(TreeBuilderObject), "create treebuilder");
1475
1476 return (PyObject*) self;
1477}
1478
1479static PyObject*
Fredrik Lundh81707f12006-06-03 21:56:05 +00001480treebuilder(PyObject* self_, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001481{
1482 if (!PyArg_ParseTuple(args, ":TreeBuilder"))
1483 return NULL;
1484
1485 return treebuilder_new();
1486}
1487
1488static void
1489treebuilder_dealloc(TreeBuilderObject* self)
1490{
1491 Py_XDECREF(self->end_ns_event_obj);
1492 Py_XDECREF(self->start_ns_event_obj);
1493 Py_XDECREF(self->end_event_obj);
1494 Py_XDECREF(self->start_event_obj);
1495 Py_XDECREF(self->events);
1496 Py_DECREF(self->stack);
1497 Py_XDECREF(self->data);
1498 Py_DECREF(self->last);
1499 Py_DECREF(self->this);
1500 Py_XDECREF(self->root);
1501
1502 RELEASE(sizeof(TreeBuilderObject), "destroy treebuilder");
1503
1504 PyObject_Del(self);
1505}
1506
1507/* -------------------------------------------------------------------- */
1508/* handlers */
1509
1510LOCAL(PyObject*)
1511treebuilder_handle_xml(TreeBuilderObject* self, PyObject* encoding,
1512 PyObject* standalone)
1513{
1514 Py_RETURN_NONE;
1515}
1516
1517LOCAL(PyObject*)
1518treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
1519 PyObject* attrib)
1520{
1521 PyObject* node;
1522 PyObject* this;
1523
1524 if (self->data) {
1525 if (self->this == self->last) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001526 Py_DECREF(JOIN_OBJ(self->last->text));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001527 self->last->text = JOIN_SET(
1528 self->data, PyList_CheckExact(self->data)
1529 );
1530 } else {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001531 Py_DECREF(JOIN_OBJ(self->last->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001532 self->last->tail = JOIN_SET(
1533 self->data, PyList_CheckExact(self->data)
1534 );
1535 }
1536 self->data = NULL;
1537 }
1538
1539 node = element_new(tag, attrib);
1540 if (!node)
1541 return NULL;
1542
1543 this = (PyObject*) self->this;
1544
1545 if (this != Py_None) {
1546 if (element_add_subelement((ElementObject*) this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001547 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001548 } else {
1549 if (self->root) {
1550 PyErr_SetString(
1551 PyExc_SyntaxError,
1552 "multiple elements on top level"
1553 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001554 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001555 }
1556 Py_INCREF(node);
1557 self->root = node;
1558 }
1559
1560 if (self->index < PyList_GET_SIZE(self->stack)) {
1561 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001562 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001563 Py_INCREF(this);
1564 } else {
1565 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001566 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001567 }
1568 self->index++;
1569
1570 Py_DECREF(this);
1571 Py_INCREF(node);
1572 self->this = (ElementObject*) node;
1573
1574 Py_DECREF(self->last);
1575 Py_INCREF(node);
1576 self->last = (ElementObject*) node;
1577
1578 if (self->start_event_obj) {
1579 PyObject* res;
1580 PyObject* action = self->start_event_obj;
1581 res = PyTuple_New(2);
1582 if (res) {
1583 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
1584 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
1585 PyList_Append(self->events, res);
1586 Py_DECREF(res);
1587 } else
1588 PyErr_Clear(); /* FIXME: propagate error */
1589 }
1590
1591 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001592
1593 error:
1594 Py_DECREF(node);
1595 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001596}
1597
1598LOCAL(PyObject*)
1599treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
1600{
1601 if (!self->data) {
1602 /* store the first item as is */
1603 Py_INCREF(data); self->data = data;
1604 } else {
1605 /* more than one item; use a list to collect items */
1606 if (PyString_CheckExact(self->data) && self->data->ob_refcnt == 1 &&
1607 PyString_CheckExact(data) && PyString_GET_SIZE(data) == 1) {
1608 /* expat often generates single character data sections; handle
1609 the most common case by resizing the existing string... */
Neal Norwitzc7074382006-06-12 02:06:17 +00001610 Py_ssize_t size = PyString_GET_SIZE(self->data);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001611 if (_PyString_Resize(&self->data, size + 1) < 0)
1612 return NULL;
1613 PyString_AS_STRING(self->data)[size] = PyString_AS_STRING(data)[0];
1614 } else if (PyList_CheckExact(self->data)) {
1615 if (PyList_Append(self->data, data) < 0)
1616 return NULL;
1617 } else {
1618 PyObject* list = PyList_New(2);
1619 if (!list)
1620 return NULL;
1621 PyList_SET_ITEM(list, 0, self->data);
1622 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
1623 self->data = list;
1624 }
1625 }
1626
1627 Py_RETURN_NONE;
1628}
1629
1630LOCAL(PyObject*)
1631treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
1632{
1633 PyObject* item;
1634
1635 if (self->data) {
1636 if (self->this == self->last) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001637 Py_DECREF(JOIN_OBJ(self->last->text));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001638 self->last->text = JOIN_SET(
1639 self->data, PyList_CheckExact(self->data)
1640 );
1641 } else {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001642 Py_DECREF(JOIN_OBJ(self->last->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001643 self->last->tail = JOIN_SET(
1644 self->data, PyList_CheckExact(self->data)
1645 );
1646 }
1647 self->data = NULL;
1648 }
1649
1650 if (self->index == 0) {
1651 PyErr_SetString(
1652 PyExc_IndexError,
1653 "pop from empty stack"
1654 );
1655 return NULL;
1656 }
1657
1658 self->index--;
1659
1660 item = PyList_GET_ITEM(self->stack, self->index);
1661 Py_INCREF(item);
1662
1663 Py_DECREF(self->last);
1664
1665 self->last = (ElementObject*) self->this;
1666 self->this = (ElementObject*) item;
1667
1668 if (self->end_event_obj) {
1669 PyObject* res;
1670 PyObject* action = self->end_event_obj;
1671 PyObject* node = (PyObject*) self->last;
1672 res = PyTuple_New(2);
1673 if (res) {
1674 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
1675 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
1676 PyList_Append(self->events, res);
1677 Py_DECREF(res);
1678 } else
1679 PyErr_Clear(); /* FIXME: propagate error */
1680 }
1681
1682 Py_INCREF(self->last);
1683 return (PyObject*) self->last;
1684}
1685
1686LOCAL(void)
1687treebuilder_handle_namespace(TreeBuilderObject* self, int start,
1688 const char* prefix, const char *uri)
1689{
1690 PyObject* res;
1691 PyObject* action;
1692 PyObject* parcel;
1693
1694 if (!self->events)
1695 return;
1696
1697 if (start) {
1698 if (!self->start_ns_event_obj)
1699 return;
1700 action = self->start_ns_event_obj;
1701 /* FIXME: prefix and uri use utf-8 encoding! */
1702 parcel = Py_BuildValue("ss", (prefix) ? prefix : "", uri);
1703 if (!parcel)
1704 return;
1705 Py_INCREF(action);
1706 } else {
1707 if (!self->end_ns_event_obj)
1708 return;
1709 action = self->end_ns_event_obj;
1710 Py_INCREF(action);
1711 parcel = Py_None;
1712 Py_INCREF(parcel);
1713 }
1714
1715 res = PyTuple_New(2);
1716
1717 if (res) {
1718 PyTuple_SET_ITEM(res, 0, action);
1719 PyTuple_SET_ITEM(res, 1, parcel);
1720 PyList_Append(self->events, res);
1721 Py_DECREF(res);
1722 } else
1723 PyErr_Clear(); /* FIXME: propagate error */
1724}
1725
1726/* -------------------------------------------------------------------- */
1727/* methods (in alphabetical order) */
1728
1729static PyObject*
1730treebuilder_data(TreeBuilderObject* self, PyObject* args)
1731{
1732 PyObject* data;
1733 if (!PyArg_ParseTuple(args, "O:data", &data))
1734 return NULL;
1735
1736 return treebuilder_handle_data(self, data);
1737}
1738
1739static PyObject*
1740treebuilder_end(TreeBuilderObject* self, PyObject* args)
1741{
1742 PyObject* tag;
1743 if (!PyArg_ParseTuple(args, "O:end", &tag))
1744 return NULL;
1745
1746 return treebuilder_handle_end(self, tag);
1747}
1748
1749LOCAL(PyObject*)
1750treebuilder_done(TreeBuilderObject* self)
1751{
1752 PyObject* res;
1753
1754 /* FIXME: check stack size? */
1755
1756 if (self->root)
1757 res = self->root;
1758 else
1759 res = Py_None;
1760
1761 Py_INCREF(res);
1762 return res;
1763}
1764
1765static PyObject*
1766treebuilder_close(TreeBuilderObject* self, PyObject* args)
1767{
1768 if (!PyArg_ParseTuple(args, ":close"))
1769 return NULL;
1770
1771 return treebuilder_done(self);
1772}
1773
1774static PyObject*
1775treebuilder_start(TreeBuilderObject* self, PyObject* args)
1776{
1777 PyObject* tag;
1778 PyObject* attrib = Py_None;
1779 if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib))
1780 return NULL;
1781
1782 return treebuilder_handle_start(self, tag, attrib);
1783}
1784
1785static PyObject*
1786treebuilder_xml(TreeBuilderObject* self, PyObject* args)
1787{
1788 PyObject* encoding;
1789 PyObject* standalone;
1790 if (!PyArg_ParseTuple(args, "OO:xml", &encoding, &standalone))
1791 return NULL;
1792
1793 return treebuilder_handle_xml(self, encoding, standalone);
1794}
1795
1796static PyMethodDef treebuilder_methods[] = {
1797 {"data", (PyCFunction) treebuilder_data, METH_VARARGS},
1798 {"start", (PyCFunction) treebuilder_start, METH_VARARGS},
1799 {"end", (PyCFunction) treebuilder_end, METH_VARARGS},
1800 {"xml", (PyCFunction) treebuilder_xml, METH_VARARGS},
1801 {"close", (PyCFunction) treebuilder_close, METH_VARARGS},
1802 {NULL, NULL}
1803};
1804
1805static PyObject*
1806treebuilder_getattr(TreeBuilderObject* self, char* name)
1807{
1808 return Py_FindMethod(treebuilder_methods, (PyObject*) self, name);
1809}
1810
1811statichere PyTypeObject TreeBuilder_Type = {
1812 PyObject_HEAD_INIT(NULL)
1813 0, "TreeBuilder", sizeof(TreeBuilderObject), 0,
1814 /* methods */
1815 (destructor)treebuilder_dealloc, /* tp_dealloc */
1816 0, /* tp_print */
1817 (getattrfunc)treebuilder_getattr, /* tp_getattr */
1818};
1819
1820/* ==================================================================== */
1821/* the expat interface */
1822
1823#if defined(USE_EXPAT)
1824
1825#include "expat.h"
1826
1827#if defined(USE_PYEXPAT_CAPI)
1828#include "pyexpat.h"
1829static struct PyExpat_CAPI* expat_capi;
1830#define EXPAT(func) (expat_capi->func)
1831#else
1832#define EXPAT(func) (XML_##func)
1833#endif
1834
1835typedef struct {
1836 PyObject_HEAD
1837
1838 XML_Parser parser;
1839
1840 PyObject* target;
1841 PyObject* entity;
1842
1843 PyObject* names;
1844
1845 PyObject* handle_xml;
1846 PyObject* handle_start;
1847 PyObject* handle_data;
1848 PyObject* handle_end;
1849
1850 PyObject* handle_comment;
1851 PyObject* handle_pi;
1852
1853} XMLParserObject;
1854
1855staticforward PyTypeObject XMLParser_Type;
1856
1857/* helpers */
1858
1859#if defined(Py_USING_UNICODE)
1860LOCAL(int)
1861checkstring(const char* string, int size)
1862{
1863 int i;
1864
1865 /* check if an 8-bit string contains UTF-8 characters */
1866 for (i = 0; i < size; i++)
1867 if (string[i] & 0x80)
1868 return 1;
1869
1870 return 0;
1871}
1872#endif
1873
1874LOCAL(PyObject*)
1875makestring(const char* string, int size)
1876{
1877 /* convert a UTF-8 string to either a 7-bit ascii string or a
1878 Unicode string */
1879
1880#if defined(Py_USING_UNICODE)
1881 if (checkstring(string, size))
1882 return PyUnicode_DecodeUTF8(string, size, "strict");
1883#endif
1884
1885 return PyString_FromStringAndSize(string, size);
1886}
1887
1888LOCAL(PyObject*)
1889makeuniversal(XMLParserObject* self, const char* string)
1890{
1891 /* convert a UTF-8 tag/attribute name from the expat parser
1892 to a universal name string */
1893
1894 int size = strlen(string);
1895 PyObject* key;
1896 PyObject* value;
1897
1898 /* look the 'raw' name up in the names dictionary */
1899 key = PyString_FromStringAndSize(string, size);
1900 if (!key)
1901 return NULL;
1902
1903 value = PyDict_GetItem(self->names, key);
1904
1905 if (value) {
1906 Py_INCREF(value);
1907 } else {
1908 /* new name. convert to universal name, and decode as
1909 necessary */
1910
1911 PyObject* tag;
1912 char* p;
1913 int i;
1914
1915 /* look for namespace separator */
1916 for (i = 0; i < size; i++)
1917 if (string[i] == '}')
1918 break;
1919 if (i != size) {
1920 /* convert to universal name */
1921 tag = PyString_FromStringAndSize(NULL, size+1);
1922 p = PyString_AS_STRING(tag);
1923 p[0] = '{';
1924 memcpy(p+1, string, size);
1925 size++;
1926 } else {
1927 /* plain name; use key as tag */
1928 Py_INCREF(key);
1929 tag = key;
1930 }
1931
1932 /* decode universal name */
1933#if defined(Py_USING_UNICODE)
1934 /* inline makestring, to avoid duplicating the source string if
1935 it's not an utf-8 string */
1936 p = PyString_AS_STRING(tag);
1937 if (checkstring(p, size)) {
1938 value = PyUnicode_DecodeUTF8(p, size, "strict");
1939 Py_DECREF(tag);
1940 if (!value) {
1941 Py_DECREF(key);
1942 return NULL;
1943 }
1944 } else
1945#endif
1946 value = tag; /* use tag as is */
1947
1948 /* add to names dictionary */
1949 if (PyDict_SetItem(self->names, key, value) < 0) {
1950 Py_DECREF(key);
1951 Py_DECREF(value);
1952 return NULL;
1953 }
1954 }
1955
1956 Py_DECREF(key);
1957 return value;
1958}
1959
1960/* -------------------------------------------------------------------- */
1961/* handlers */
1962
1963static void
1964expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
1965 int data_len)
1966{
1967 PyObject* key;
1968 PyObject* value;
1969 PyObject* res;
1970
1971 if (data_len < 2 || data_in[0] != '&')
1972 return;
1973
1974 key = makestring(data_in + 1, data_len - 2);
1975 if (!key)
1976 return;
1977
1978 value = PyDict_GetItem(self->entity, key);
1979
1980 if (value) {
1981 if (TreeBuilder_CheckExact(self->target))
1982 res = treebuilder_handle_data(
1983 (TreeBuilderObject*) self->target, value
1984 );
1985 else if (self->handle_data)
1986 res = PyObject_CallFunction(self->handle_data, "O", value);
1987 else
1988 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001989 Py_XDECREF(res);
1990 } else {
1991 PyErr_Format(
Trent Mickf08d6632006-06-19 23:21:25 +00001992 PyExc_SyntaxError, "undefined entity &%s;: line %ld, column %ld",
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001993 PyString_AS_STRING(key),
1994 EXPAT(GetErrorLineNumber)(self->parser),
1995 EXPAT(GetErrorColumnNumber)(self->parser)
1996 );
1997 }
1998
1999 Py_DECREF(key);
2000}
2001
2002static void
2003expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2004 const XML_Char **attrib_in)
2005{
2006 PyObject* res;
2007 PyObject* tag;
2008 PyObject* attrib;
2009 int ok;
2010
2011 /* tag name */
2012 tag = makeuniversal(self, tag_in);
2013 if (!tag)
2014 return; /* parser will look for errors */
2015
2016 /* attributes */
2017 if (attrib_in[0]) {
2018 attrib = PyDict_New();
2019 if (!attrib)
2020 return;
2021 while (attrib_in[0] && attrib_in[1]) {
2022 PyObject* key = makeuniversal(self, attrib_in[0]);
2023 PyObject* value = makestring(attrib_in[1], strlen(attrib_in[1]));
2024 if (!key || !value) {
2025 Py_XDECREF(value);
2026 Py_XDECREF(key);
2027 Py_DECREF(attrib);
2028 return;
2029 }
2030 ok = PyDict_SetItem(attrib, key, value);
2031 Py_DECREF(value);
2032 Py_DECREF(key);
2033 if (ok < 0) {
2034 Py_DECREF(attrib);
2035 return;
2036 }
2037 attrib_in += 2;
2038 }
2039 } else {
2040 Py_INCREF(Py_None);
2041 attrib = Py_None;
2042 }
2043
2044 if (TreeBuilder_CheckExact(self->target))
2045 /* shortcut */
2046 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2047 tag, attrib);
2048 else if (self->handle_start)
2049 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
2050 else
2051 res = NULL;
2052
2053 Py_DECREF(tag);
2054 Py_DECREF(attrib);
2055
2056 Py_XDECREF(res);
2057}
2058
2059static void
2060expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2061 int data_len)
2062{
2063 PyObject* data;
2064 PyObject* res;
2065
2066 data = makestring(data_in, data_len);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002067 if (!data)
2068 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002069
2070 if (TreeBuilder_CheckExact(self->target))
2071 /* shortcut */
2072 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
2073 else if (self->handle_data)
2074 res = PyObject_CallFunction(self->handle_data, "O", data);
2075 else
2076 res = NULL;
2077
2078 Py_DECREF(data);
2079
2080 Py_XDECREF(res);
2081}
2082
2083static void
2084expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
2085{
2086 PyObject* tag;
2087 PyObject* res = NULL;
2088
2089 if (TreeBuilder_CheckExact(self->target))
2090 /* shortcut */
2091 /* the standard tree builder doesn't look at the end tag */
2092 res = treebuilder_handle_end(
2093 (TreeBuilderObject*) self->target, Py_None
2094 );
2095 else if (self->handle_end) {
2096 tag = makeuniversal(self, tag_in);
2097 if (tag) {
2098 res = PyObject_CallFunction(self->handle_end, "O", tag);
2099 Py_DECREF(tag);
2100 }
2101 }
2102
2103 Py_XDECREF(res);
2104}
2105
2106static void
2107expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
2108 const XML_Char *uri)
2109{
2110 treebuilder_handle_namespace(
2111 (TreeBuilderObject*) self->target, 1, prefix, uri
2112 );
2113}
2114
2115static void
2116expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
2117{
2118 treebuilder_handle_namespace(
2119 (TreeBuilderObject*) self->target, 0, NULL, NULL
2120 );
2121}
2122
2123static void
2124expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
2125{
2126 PyObject* comment;
2127 PyObject* res;
2128
2129 if (self->handle_comment) {
2130 comment = makestring(comment_in, strlen(comment_in));
2131 if (comment) {
2132 res = PyObject_CallFunction(self->handle_comment, "O", comment);
2133 Py_XDECREF(res);
2134 Py_DECREF(comment);
2135 }
2136 }
2137}
2138
2139static void
2140expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
2141 const XML_Char* data_in)
2142{
2143 PyObject* target;
2144 PyObject* data;
2145 PyObject* res;
2146
2147 if (self->handle_pi) {
2148 target = makestring(target_in, strlen(target_in));
2149 data = makestring(data_in, strlen(data_in));
2150 if (target && data) {
2151 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
2152 Py_XDECREF(res);
2153 Py_DECREF(data);
2154 Py_DECREF(target);
2155 } else {
2156 Py_XDECREF(data);
2157 Py_XDECREF(target);
2158 }
2159 }
2160}
2161
2162#if defined(Py_USING_UNICODE)
2163static int
2164expat_unknown_encoding_handler(XMLParserObject *self, const XML_Char *name,
2165 XML_Encoding *info)
2166{
2167 PyObject* u;
2168 Py_UNICODE* p;
2169 unsigned char s[256];
2170 int i;
2171
2172 memset(info, 0, sizeof(XML_Encoding));
2173
2174 for (i = 0; i < 256; i++)
2175 s[i] = i;
2176
Fredrik Lundhc3389992005-12-25 11:40:19 +00002177 u = PyUnicode_Decode((char*) s, 256, name, "replace");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002178 if (!u)
2179 return XML_STATUS_ERROR;
2180
2181 if (PyUnicode_GET_SIZE(u) != 256) {
2182 Py_DECREF(u);
2183 return XML_STATUS_ERROR;
2184 }
2185
2186 p = PyUnicode_AS_UNICODE(u);
2187
2188 for (i = 0; i < 256; i++) {
2189 if (p[i] != Py_UNICODE_REPLACEMENT_CHARACTER)
2190 info->map[i] = p[i];
2191 else
2192 info->map[i] = -1;
2193 }
2194
2195 Py_DECREF(u);
2196
2197 return XML_STATUS_OK;
2198}
2199#endif
2200
2201/* -------------------------------------------------------------------- */
2202/* constructor and destructor */
2203
2204static PyObject*
Fredrik Lundh81707f12006-06-03 21:56:05 +00002205xmlparser(PyObject* self_, PyObject* args, PyObject* kw)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002206{
2207 XMLParserObject* self;
2208 /* FIXME: does this need to be static? */
2209 static XML_Memory_Handling_Suite memory_handler;
2210
2211 PyObject* target = NULL;
2212 char* encoding = NULL;
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +00002213 static char* kwlist[] = { "target", "encoding", NULL };
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002214 if (!PyArg_ParseTupleAndKeywords(args, kw, "|Oz:XMLParser", kwlist,
2215 &target, &encoding))
2216 return NULL;
2217
2218#if defined(USE_PYEXPAT_CAPI)
2219 if (!expat_capi) {
2220 PyErr_SetString(
2221 PyExc_RuntimeError, "cannot load dispatch table from pyexpat"
2222 );
2223 return NULL;
2224 }
2225#endif
2226
2227 self = PyObject_New(XMLParserObject, &XMLParser_Type);
2228 if (self == NULL)
2229 return NULL;
2230
2231 self->entity = PyDict_New();
2232 if (!self->entity) {
2233 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002234 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002235 }
2236
2237 self->names = PyDict_New();
2238 if (!self->names) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002239 PyObject_Del(self->entity);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002240 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002241 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002242 }
2243
2244 memory_handler.malloc_fcn = PyObject_Malloc;
2245 memory_handler.realloc_fcn = PyObject_Realloc;
2246 memory_handler.free_fcn = PyObject_Free;
2247
2248 self->parser = EXPAT(ParserCreate_MM)(encoding, &memory_handler, "}");
2249 if (!self->parser) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002250 PyObject_Del(self->names);
2251 PyObject_Del(self->entity);
2252 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002253 PyErr_NoMemory();
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002254 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002255 }
2256
2257 /* setup target handlers */
2258 if (!target) {
2259 target = treebuilder_new();
2260 if (!target) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002261 EXPAT(ParserFree)(self->parser);
2262 PyObject_Del(self->names);
2263 PyObject_Del(self->entity);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002264 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002265 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002266 }
2267 } else
2268 Py_INCREF(target);
2269 self->target = target;
2270
2271 self->handle_xml = PyObject_GetAttrString(target, "xml");
2272 self->handle_start = PyObject_GetAttrString(target, "start");
2273 self->handle_data = PyObject_GetAttrString(target, "data");
2274 self->handle_end = PyObject_GetAttrString(target, "end");
2275 self->handle_comment = PyObject_GetAttrString(target, "comment");
2276 self->handle_pi = PyObject_GetAttrString(target, "pi");
2277
2278 PyErr_Clear();
2279
2280 /* configure parser */
2281 EXPAT(SetUserData)(self->parser, self);
2282 EXPAT(SetElementHandler)(
2283 self->parser,
2284 (XML_StartElementHandler) expat_start_handler,
2285 (XML_EndElementHandler) expat_end_handler
2286 );
2287 EXPAT(SetDefaultHandlerExpand)(
2288 self->parser,
2289 (XML_DefaultHandler) expat_default_handler
2290 );
2291 EXPAT(SetCharacterDataHandler)(
2292 self->parser,
2293 (XML_CharacterDataHandler) expat_data_handler
2294 );
2295 if (self->handle_comment)
2296 EXPAT(SetCommentHandler)(
2297 self->parser,
2298 (XML_CommentHandler) expat_comment_handler
2299 );
2300 if (self->handle_pi)
2301 EXPAT(SetProcessingInstructionHandler)(
2302 self->parser,
2303 (XML_ProcessingInstructionHandler) expat_pi_handler
2304 );
2305#if defined(Py_USING_UNICODE)
2306 EXPAT(SetUnknownEncodingHandler)(
2307 self->parser,
2308 (XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL
2309 );
2310#endif
2311
2312 ALLOC(sizeof(XMLParserObject), "create expatparser");
2313
2314 return (PyObject*) self;
2315}
2316
2317static void
2318xmlparser_dealloc(XMLParserObject* self)
2319{
2320 EXPAT(ParserFree)(self->parser);
2321
2322 Py_XDECREF(self->handle_pi);
2323 Py_XDECREF(self->handle_comment);
2324 Py_XDECREF(self->handle_end);
2325 Py_XDECREF(self->handle_data);
2326 Py_XDECREF(self->handle_start);
2327 Py_XDECREF(self->handle_xml);
2328
2329 Py_DECREF(self->target);
2330 Py_DECREF(self->entity);
2331 Py_DECREF(self->names);
2332
2333 RELEASE(sizeof(XMLParserObject), "destroy expatparser");
2334
2335 PyObject_Del(self);
2336}
2337
2338/* -------------------------------------------------------------------- */
2339/* methods (in alphabetical order) */
2340
2341LOCAL(PyObject*)
2342expat_parse(XMLParserObject* self, char* data, int data_len, int final)
2343{
2344 int ok;
2345
2346 ok = EXPAT(Parse)(self->parser, data, data_len, final);
2347
2348 if (PyErr_Occurred())
2349 return NULL;
2350
2351 if (!ok) {
2352 PyErr_Format(
Trent Mickf08d6632006-06-19 23:21:25 +00002353 PyExc_SyntaxError, "%s: line %ld, column %ld",
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002354 EXPAT(ErrorString)(EXPAT(GetErrorCode)(self->parser)),
2355 EXPAT(GetErrorLineNumber)(self->parser),
2356 EXPAT(GetErrorColumnNumber)(self->parser)
2357 );
2358 return NULL;
2359 }
2360
2361 Py_RETURN_NONE;
2362}
2363
2364static PyObject*
2365xmlparser_close(XMLParserObject* self, PyObject* args)
2366{
2367 /* end feeding data to parser */
2368
2369 PyObject* res;
2370 if (!PyArg_ParseTuple(args, ":close"))
2371 return NULL;
2372
2373 res = expat_parse(self, "", 0, 1);
2374
2375 if (res && TreeBuilder_CheckExact(self->target)) {
2376 Py_DECREF(res);
2377 return treebuilder_done((TreeBuilderObject*) self->target);
2378 }
2379
2380 return res;
2381}
2382
2383static PyObject*
2384xmlparser_feed(XMLParserObject* self, PyObject* args)
2385{
2386 /* feed data to parser */
2387
2388 char* data;
2389 int data_len;
2390 if (!PyArg_ParseTuple(args, "s#:feed", &data, &data_len))
2391 return NULL;
2392
2393 return expat_parse(self, data, data_len, 0);
2394}
2395
2396static PyObject*
2397xmlparser_parse(XMLParserObject* self, PyObject* args)
2398{
2399 /* (internal) parse until end of input stream */
2400
2401 PyObject* reader;
2402 PyObject* buffer;
2403 PyObject* res;
2404
2405 PyObject* fileobj;
2406 if (!PyArg_ParseTuple(args, "O:_parse", &fileobj))
2407 return NULL;
2408
2409 reader = PyObject_GetAttrString(fileobj, "read");
2410 if (!reader)
2411 return NULL;
2412
2413 /* read from open file object */
2414 for (;;) {
2415
2416 buffer = PyObject_CallFunction(reader, "i", 64*1024);
2417
2418 if (!buffer) {
2419 /* read failed (e.g. due to KeyboardInterrupt) */
2420 Py_DECREF(reader);
2421 return NULL;
2422 }
2423
2424 if (!PyString_CheckExact(buffer) || PyString_GET_SIZE(buffer) == 0) {
2425 Py_DECREF(buffer);
2426 break;
2427 }
2428
2429 res = expat_parse(
2430 self, PyString_AS_STRING(buffer), PyString_GET_SIZE(buffer), 0
2431 );
2432
2433 Py_DECREF(buffer);
2434
2435 if (!res) {
2436 Py_DECREF(reader);
2437 return NULL;
2438 }
2439 Py_DECREF(res);
2440
2441 }
2442
2443 Py_DECREF(reader);
2444
2445 res = expat_parse(self, "", 0, 1);
2446
2447 if (res && TreeBuilder_CheckExact(self->target)) {
2448 Py_DECREF(res);
2449 return treebuilder_done((TreeBuilderObject*) self->target);
2450 }
2451
2452 return res;
2453}
2454
2455static PyObject*
2456xmlparser_setevents(XMLParserObject* self, PyObject* args)
2457{
2458 /* activate element event reporting */
2459
Neal Norwitzc7074382006-06-12 02:06:17 +00002460 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002461 TreeBuilderObject* target;
2462
2463 PyObject* events; /* event collector */
2464 PyObject* event_set = Py_None;
2465 if (!PyArg_ParseTuple(args, "O!|O:_setevents", &PyList_Type, &events,
2466 &event_set))
2467 return NULL;
2468
2469 if (!TreeBuilder_CheckExact(self->target)) {
2470 PyErr_SetString(
2471 PyExc_TypeError,
2472 "event handling only supported for cElementTree.Treebuilder "
2473 "targets"
2474 );
2475 return NULL;
2476 }
2477
2478 target = (TreeBuilderObject*) self->target;
2479
2480 Py_INCREF(events);
2481 Py_XDECREF(target->events);
2482 target->events = events;
2483
2484 /* clear out existing events */
2485 Py_XDECREF(target->start_event_obj); target->start_event_obj = NULL;
2486 Py_XDECREF(target->end_event_obj); target->end_event_obj = NULL;
2487 Py_XDECREF(target->start_ns_event_obj); target->start_ns_event_obj = NULL;
2488 Py_XDECREF(target->end_ns_event_obj); target->end_ns_event_obj = NULL;
2489
2490 if (event_set == Py_None) {
2491 /* default is "end" only */
2492 target->end_event_obj = PyString_FromString("end");
2493 Py_RETURN_NONE;
2494 }
2495
2496 if (!PyTuple_Check(event_set)) /* FIXME: handle arbitrary sequences */
2497 goto error;
2498
2499 for (i = 0; i < PyTuple_GET_SIZE(event_set); i++) {
2500 PyObject* item = PyTuple_GET_ITEM(event_set, i);
2501 char* event;
2502 if (!PyString_Check(item))
2503 goto error;
2504 event = PyString_AS_STRING(item);
2505 if (strcmp(event, "start") == 0) {
2506 Py_INCREF(item);
2507 target->start_event_obj = item;
2508 } else if (strcmp(event, "end") == 0) {
2509 Py_INCREF(item);
2510 Py_XDECREF(target->end_event_obj);
2511 target->end_event_obj = item;
2512 } else if (strcmp(event, "start-ns") == 0) {
2513 Py_INCREF(item);
2514 Py_XDECREF(target->start_ns_event_obj);
2515 target->start_ns_event_obj = item;
2516 EXPAT(SetNamespaceDeclHandler)(
2517 self->parser,
2518 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2519 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2520 );
2521 } else if (strcmp(event, "end-ns") == 0) {
2522 Py_INCREF(item);
2523 Py_XDECREF(target->end_ns_event_obj);
2524 target->end_ns_event_obj = item;
2525 EXPAT(SetNamespaceDeclHandler)(
2526 self->parser,
2527 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2528 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2529 );
2530 } else {
2531 PyErr_Format(
2532 PyExc_ValueError,
2533 "unknown event '%s'", event
2534 );
2535 return NULL;
2536 }
2537 }
2538
2539 Py_RETURN_NONE;
2540
2541 error:
2542 PyErr_SetString(
2543 PyExc_TypeError,
2544 "invalid event tuple"
2545 );
2546 return NULL;
2547}
2548
2549static PyMethodDef xmlparser_methods[] = {
2550 {"feed", (PyCFunction) xmlparser_feed, METH_VARARGS},
2551 {"close", (PyCFunction) xmlparser_close, METH_VARARGS},
2552 {"_parse", (PyCFunction) xmlparser_parse, METH_VARARGS},
2553 {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
2554 {NULL, NULL}
2555};
2556
2557static PyObject*
2558xmlparser_getattr(XMLParserObject* self, char* name)
2559{
2560 PyObject* res;
2561
2562 res = Py_FindMethod(xmlparser_methods, (PyObject*) self, name);
2563 if (res)
2564 return res;
2565
2566 PyErr_Clear();
2567
2568 if (strcmp(name, "entity") == 0)
2569 res = self->entity;
2570 else if (strcmp(name, "target") == 0)
2571 res = self->target;
2572 else if (strcmp(name, "version") == 0) {
2573 char buffer[100];
2574 sprintf(buffer, "Expat %d.%d.%d", XML_MAJOR_VERSION,
2575 XML_MINOR_VERSION, XML_MICRO_VERSION);
2576 return PyString_FromString(buffer);
2577 } else {
2578 PyErr_SetString(PyExc_AttributeError, name);
2579 return NULL;
2580 }
2581
2582 Py_INCREF(res);
2583 return res;
2584}
2585
2586statichere PyTypeObject XMLParser_Type = {
2587 PyObject_HEAD_INIT(NULL)
2588 0, "XMLParser", sizeof(XMLParserObject), 0,
2589 /* methods */
2590 (destructor)xmlparser_dealloc, /* tp_dealloc */
2591 0, /* tp_print */
2592 (getattrfunc)xmlparser_getattr, /* tp_getattr */
2593};
2594
2595#endif
2596
2597/* ==================================================================== */
2598/* python module interface */
2599
2600static PyMethodDef _functions[] = {
2601 {"Element", (PyCFunction) element, METH_VARARGS|METH_KEYWORDS},
2602 {"SubElement", (PyCFunction) subelement, METH_VARARGS|METH_KEYWORDS},
2603 {"TreeBuilder", (PyCFunction) treebuilder, METH_VARARGS},
2604#if defined(USE_EXPAT)
2605 {"XMLParser", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
2606 {"XMLTreeBuilder", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
2607#endif
2608 {NULL, NULL}
2609};
2610
2611DL_EXPORT(void)
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002612init_elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002613{
2614 PyObject* m;
2615 PyObject* g;
2616 char* bootstrap;
2617#if defined(USE_PYEXPAT_CAPI)
2618 struct PyExpat_CAPI* capi;
2619#endif
2620
2621 /* Patch object type */
2622 Element_Type.ob_type = TreeBuilder_Type.ob_type = &PyType_Type;
2623#if defined(USE_EXPAT)
2624 XMLParser_Type.ob_type = &PyType_Type;
2625#endif
2626
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002627 m = Py_InitModule("_elementtree", _functions);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002628 if (!m)
2629 return;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002630
2631 /* python glue code */
2632
2633 g = PyDict_New();
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002634 if (!g)
2635 return;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002636
2637 PyDict_SetItemString(g, "__builtins__", PyEval_GetBuiltins());
2638
2639 bootstrap = (
2640
2641#if (PY_VERSION_HEX >= 0x02020000 && PY_VERSION_HEX < 0x02030000)
2642 "from __future__ import generators\n" /* enable yield under 2.2 */
2643#endif
2644
2645 "from copy import copy, deepcopy\n"
2646
2647 "try:\n"
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002648 " from xml.etree import ElementTree\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002649 "except ImportError:\n"
2650 " import ElementTree\n"
2651 "ET = ElementTree\n"
2652 "del ElementTree\n"
2653
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002654 "import _elementtree as cElementTree\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002655
2656 "try:\n" /* check if copy works as is */
2657 " copy(cElementTree.Element('x'))\n"
2658 "except:\n"
2659 " def copyelement(elem):\n"
2660 " return elem\n"
2661
2662 "def Comment(text=None):\n" /* public */
2663 " element = cElementTree.Element(ET.Comment)\n"
2664 " element.text = text\n"
2665 " return element\n"
2666 "cElementTree.Comment = Comment\n"
2667
2668 "class ElementTree(ET.ElementTree):\n" /* public */
2669 " def parse(self, source, parser=None):\n"
2670 " if not hasattr(source, 'read'):\n"
2671 " source = open(source, 'rb')\n"
2672 " if parser is not None:\n"
2673 " while 1:\n"
2674 " data = source.read(65536)\n"
2675 " if not data:\n"
2676 " break\n"
2677 " parser.feed(data)\n"
2678 " self._root = parser.close()\n"
2679 " else:\n"
2680 " parser = cElementTree.XMLParser()\n"
2681 " self._root = parser._parse(source)\n"
2682 " return self._root\n"
2683 "cElementTree.ElementTree = ElementTree\n"
2684
2685 "def getiterator(node, tag=None):\n" /* helper */
2686 " if tag == '*':\n"
2687 " tag = None\n"
2688#if (PY_VERSION_HEX < 0x02020000)
2689 " nodes = []\n" /* 2.1 doesn't have yield */
2690 " if tag is None or node.tag == tag:\n"
2691 " nodes.append(node)\n"
2692 " for node in node:\n"
2693 " nodes.extend(getiterator(node, tag))\n"
2694 " return nodes\n"
2695#else
2696 " if tag is None or node.tag == tag:\n"
2697 " yield node\n"
2698 " for node in node:\n"
2699 " for node in getiterator(node, tag):\n"
2700 " yield node\n"
2701#endif
2702
2703 "def parse(source, parser=None):\n" /* public */
2704 " tree = ElementTree()\n"
2705 " tree.parse(source, parser)\n"
2706 " return tree\n"
2707 "cElementTree.parse = parse\n"
2708
2709#if (PY_VERSION_HEX < 0x02020000)
2710 "if hasattr(ET, 'iterparse'):\n"
2711 " cElementTree.iterparse = ET.iterparse\n" /* delegate on 2.1 */
2712#else
2713 "class iterparse(object):\n"
2714 " root = None\n"
2715 " def __init__(self, file, events=None):\n"
2716 " if not hasattr(file, 'read'):\n"
2717 " file = open(file, 'rb')\n"
2718 " self._file = file\n"
2719 " self._events = events\n"
2720 " def __iter__(self):\n"
2721 " events = []\n"
2722 " b = cElementTree.TreeBuilder()\n"
2723 " p = cElementTree.XMLParser(b)\n"
2724 " p._setevents(events, self._events)\n"
2725 " while 1:\n"
2726 " data = self._file.read(16384)\n"
2727 " if not data:\n"
2728 " break\n"
2729 " p.feed(data)\n"
2730 " for event in events:\n"
2731 " yield event\n"
2732 " del events[:]\n"
2733 " root = p.close()\n"
2734 " for event in events:\n"
2735 " yield event\n"
2736 " self.root = root\n"
2737 "cElementTree.iterparse = iterparse\n"
2738#endif
2739
2740 "def PI(target, text=None):\n" /* public */
2741 " element = cElementTree.Element(ET.ProcessingInstruction)\n"
2742 " element.text = target\n"
2743 " if text:\n"
2744 " element.text = element.text + ' ' + text\n"
2745 " return element\n"
2746
2747 " elem = cElementTree.Element(ET.PI)\n"
2748 " elem.text = text\n"
2749 " return elem\n"
2750 "cElementTree.PI = cElementTree.ProcessingInstruction = PI\n"
2751
2752 "def XML(text):\n" /* public */
2753 " parser = cElementTree.XMLParser()\n"
2754 " parser.feed(text)\n"
2755 " return parser.close()\n"
2756 "cElementTree.XML = cElementTree.fromstring = XML\n"
2757
2758 "def XMLID(text):\n" /* public */
2759 " tree = XML(text)\n"
2760 " ids = {}\n"
2761 " for elem in tree.getiterator():\n"
2762 " id = elem.get('id')\n"
2763 " if id:\n"
2764 " ids[id] = elem\n"
2765 " return tree, ids\n"
2766 "cElementTree.XMLID = XMLID\n"
2767
2768 "cElementTree.dump = ET.dump\n"
2769 "cElementTree.ElementPath = ElementPath = ET.ElementPath\n"
2770 "cElementTree.iselement = ET.iselement\n"
2771 "cElementTree.QName = ET.QName\n"
2772 "cElementTree.tostring = ET.tostring\n"
2773 "cElementTree.VERSION = '" VERSION "'\n"
2774 "cElementTree.__version__ = '" VERSION "'\n"
2775 "cElementTree.XMLParserError = SyntaxError\n"
2776
2777 );
2778
2779 PyRun_String(bootstrap, Py_file_input, g, NULL);
2780
2781 elementpath_obj = PyDict_GetItemString(g, "ElementPath");
2782
2783 elementtree_copyelement_obj = PyDict_GetItemString(g, "copyelement");
2784 if (elementtree_copyelement_obj) {
2785 /* reduce hack needed; enable reduce method */
2786 PyMethodDef* mp;
2787 for (mp = element_methods; mp->ml_name; mp++)
2788 if (mp->ml_meth == (PyCFunction) element_reduce) {
2789 mp->ml_name = "__reduce__";
2790 break;
2791 }
2792 } else
2793 PyErr_Clear();
2794 elementtree_deepcopy_obj = PyDict_GetItemString(g, "deepcopy");
2795 elementtree_getiterator_obj = PyDict_GetItemString(g, "getiterator");
2796
2797#if defined(USE_PYEXPAT_CAPI)
2798 /* link against pyexpat, if possible */
2799 capi = PyCObject_Import("pyexpat", "expat_CAPI");
2800 if (capi &&
2801 strcmp(capi->magic, PyExpat_CAPI_MAGIC) == 0 &&
2802 capi->size <= sizeof(*expat_capi) &&
2803 capi->MAJOR_VERSION == XML_MAJOR_VERSION &&
2804 capi->MINOR_VERSION == XML_MINOR_VERSION &&
2805 capi->MICRO_VERSION == XML_MICRO_VERSION)
2806 expat_capi = capi;
2807 else
2808 expat_capi = NULL;
2809#endif
2810
2811}