blob: b2bbbcf5a49a54bcbeed914583b45adbeaf901a2 [file] [log] [blame]
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001/*
2 * ElementTree
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003 * $Id: _elementtree.c 2657 2006-03-12 20:50:32Z fredrik $
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
5 * elementtree accelerator
6 *
7 * History:
8 * 1999-06-20 fl created (as part of sgmlop)
9 * 2001-05-29 fl effdom edition
10 * 2001-06-05 fl backported to unix; fixed bogus free in clear
11 * 2001-07-10 fl added findall helper
12 * 2003-02-27 fl elementtree edition (alpha)
13 * 2004-06-03 fl updates for elementtree 1.2
14 * 2005-01-05 fl added universal name cache, Element/SubElement factories
15 * 2005-01-06 fl moved python helpers into C module; removed 1.5.2 support
16 * 2005-01-07 fl added 2.1 support; work around broken __copy__ in 2.3
17 * 2005-01-08 fl added makeelement method; fixed path support
18 * 2005-01-10 fl optimized memory usage
19 * 2005-01-11 fl first public release (cElementTree 0.8)
20 * 2005-01-12 fl split element object into base and extras
21 * 2005-01-13 fl use tagged pointers for tail/text (cElementTree 0.9)
22 * 2005-01-17 fl added treebuilder close method
23 * 2005-01-17 fl fixed crash in getchildren
24 * 2005-01-18 fl removed observer api, added iterparse (cElementTree 0.9.3)
25 * 2005-01-23 fl revised iterparse api; added namespace event support (0.9.8)
26 * 2005-01-26 fl added VERSION module property (cElementTree 1.0)
27 * 2005-01-28 fl added remove method (1.0.1)
28 * 2005-03-01 fl added iselement function; fixed makeelement aliasing (1.0.2)
29 * 2005-03-13 fl export Comment and ProcessingInstruction/PI helpers
30 * 2005-03-26 fl added Comment and PI support to XMLParser
31 * 2005-03-27 fl event optimizations; complain about bogus events
32 * 2005-08-08 fl fixed read error handling in parse
33 * 2005-08-11 fl added runtime test for copy workaround (1.0.3)
34 * 2005-12-13 fl added expat_capi support (for xml.etree) (1.0.4)
35 * 2005-12-16 fl added support for non-standard encodings
Fredrik Lundh44ed4db2006-03-12 21:06:35 +000036 * 2006-03-08 fl fixed a couple of potential null-refs and leaks
37 * 2006-03-12 fl merge in 2.5 ssize_t changes
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000038 *
Fredrik Lundh44ed4db2006-03-12 21:06:35 +000039 * Copyright (c) 1999-2006 by Secret Labs AB. All rights reserved.
40 * Copyright (c) 1999-2006 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000041 *
42 * info@pythonware.com
43 * http://www.pythonware.com
44 */
45
Fredrik Lundh6d52b552005-12-16 22:06:43 +000046/* Licensed to PSF under a Contributor Agreement. */
47/* See http://www.python.org/2.4/license for licensing details. */
48
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000049#include "Python.h"
50
Thomas Wouters00ee7ba2006-08-21 19:07:27 +000051#define VERSION "1.0.6"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000052
53/* -------------------------------------------------------------------- */
54/* configuration */
55
56/* Leave defined to include the expat-based XMLParser type */
57#define USE_EXPAT
58
59/* Define to to all expat calls via pyexpat's embedded expat library */
60/* #define USE_PYEXPAT_CAPI */
61
62/* An element can hold this many children without extra memory
63 allocations. */
64#define STATIC_CHILDREN 4
65
66/* For best performance, chose a value so that 80-90% of all nodes
67 have no more than the given number of children. Set this to zero
68 to minimize the size of the element structure itself (this only
69 helps if you have lots of leaf nodes with attributes). */
70
71/* Also note that pymalloc always allocates blocks in multiples of
72 eight bytes. For the current version of cElementTree, this means
73 that the number of children should be an even number, at least on
74 32-bit platforms. */
75
76/* -------------------------------------------------------------------- */
77
78#if 0
79static int memory = 0;
80#define ALLOC(size, comment)\
81do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
82#define RELEASE(size, comment)\
83do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
84#else
85#define ALLOC(size, comment)
86#define RELEASE(size, comment)
87#endif
88
89/* compiler tweaks */
90#if defined(_MSC_VER)
91#define LOCAL(type) static __inline type __fastcall
92#else
93#define LOCAL(type) static type
94#endif
95
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000096/* macros used to store 'join' flags in string object pointers. note
97 that all use of text and tail as object pointers must be wrapped in
98 JOIN_OBJ. see comments in the ElementObject definition for more
99 info. */
100#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
101#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
102#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~1))
103
104/* glue functions (see the init function for details) */
105static PyObject* elementtree_copyelement_obj;
106static PyObject* elementtree_deepcopy_obj;
107static PyObject* elementtree_getiterator_obj;
108static PyObject* elementpath_obj;
109
110/* helpers */
111
112LOCAL(PyObject*)
113deepcopy(PyObject* object, PyObject* memo)
114{
115 /* do a deep copy of the given object */
116
117 PyObject* args;
118 PyObject* result;
119
120 if (!elementtree_deepcopy_obj) {
121 PyErr_SetString(
122 PyExc_RuntimeError,
123 "deepcopy helper not found"
124 );
125 return NULL;
126 }
127
128 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000129 if (!args)
130 return NULL;
131
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000132 Py_INCREF(object); PyTuple_SET_ITEM(args, 0, (PyObject*) object);
133 Py_INCREF(memo); PyTuple_SET_ITEM(args, 1, (PyObject*) memo);
134
135 result = PyObject_CallObject(elementtree_deepcopy_obj, args);
136
137 Py_DECREF(args);
138
139 return result;
140}
141
142LOCAL(PyObject*)
143list_join(PyObject* list)
144{
145 /* join list elements (destroying the list in the process) */
146
147 PyObject* joiner;
148 PyObject* function;
149 PyObject* args;
150 PyObject* result;
151
152 switch (PyList_GET_SIZE(list)) {
153 case 0:
154 Py_DECREF(list);
155 return PyString_FromString("");
156 case 1:
157 result = PyList_GET_ITEM(list, 0);
158 Py_INCREF(result);
159 Py_DECREF(list);
160 return result;
161 }
162
163 /* two or more elements: slice out a suitable separator from the
164 first member, and use that to join the entire list */
165
166 joiner = PySequence_GetSlice(PyList_GET_ITEM(list, 0), 0, 0);
167 if (!joiner)
168 return NULL;
169
170 function = PyObject_GetAttrString(joiner, "join");
171 if (!function) {
172 Py_DECREF(joiner);
173 return NULL;
174 }
175
176 args = PyTuple_New(1);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000177 if (!args)
178 return NULL;
179
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000180 PyTuple_SET_ITEM(args, 0, list);
181
182 result = PyObject_CallObject(function, args);
183
184 Py_DECREF(args); /* also removes list */
185 Py_DECREF(function);
186 Py_DECREF(joiner);
187
188 return result;
189}
190
191#if (PY_VERSION_HEX < 0x02020000)
192LOCAL(int)
193PyDict_Update(PyObject* dict, PyObject* other)
194{
195 /* PyDict_Update emulation for 2.1 and earlier */
196
197 PyObject* res;
198
199 res = PyObject_CallMethod(dict, "update", "O", other);
200 if (!res)
201 return -1;
202
203 Py_DECREF(res);
204 return 0;
205}
206#endif
207
208/* -------------------------------------------------------------------- */
209/* the element type */
210
211typedef struct {
212
213 /* attributes (a dictionary object), or None if no attributes */
214 PyObject* attrib;
215
216 /* child elements */
217 int length; /* actual number of items */
218 int allocated; /* allocated items */
219
220 /* this either points to _children or to a malloced buffer */
221 PyObject* *children;
222
223 PyObject* _children[STATIC_CHILDREN];
224
225} ElementObjectExtra;
226
227typedef struct {
228 PyObject_HEAD
229
230 /* element tag (a string). */
231 PyObject* tag;
232
233 /* text before first child. note that this is a tagged pointer;
234 use JOIN_OBJ to get the object pointer. the join flag is used
235 to distinguish lists created by the tree builder from lists
236 assigned to the attribute by application code; the former
237 should be joined before being returned to the user, the latter
238 should be left intact. */
239 PyObject* text;
240
241 /* text after this element, in parent. note that this is a tagged
242 pointer; use JOIN_OBJ to get the object pointer. */
243 PyObject* tail;
244
245 ElementObjectExtra* extra;
246
247} ElementObject;
248
Neal Norwitz227b5332006-03-22 09:28:35 +0000249static PyTypeObject Element_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000250
251#define Element_CheckExact(op) ((op)->ob_type == &Element_Type)
252
253/* -------------------------------------------------------------------- */
254/* element constructor and destructor */
255
256LOCAL(int)
257element_new_extra(ElementObject* self, PyObject* attrib)
258{
259 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
260 if (!self->extra)
261 return -1;
262
263 if (!attrib)
264 attrib = Py_None;
265
266 Py_INCREF(attrib);
267 self->extra->attrib = attrib;
268
269 self->extra->length = 0;
270 self->extra->allocated = STATIC_CHILDREN;
271 self->extra->children = self->extra->_children;
272
273 return 0;
274}
275
276LOCAL(void)
277element_dealloc_extra(ElementObject* self)
278{
279 int i;
280
281 Py_DECREF(self->extra->attrib);
282
283 for (i = 0; i < self->extra->length; i++)
284 Py_DECREF(self->extra->children[i]);
285
286 if (self->extra->children != self->extra->_children)
287 PyObject_Free(self->extra->children);
288
289 PyObject_Free(self->extra);
290}
291
292LOCAL(PyObject*)
293element_new(PyObject* tag, PyObject* attrib)
294{
295 ElementObject* self;
296
297 self = PyObject_New(ElementObject, &Element_Type);
298 if (self == NULL)
299 return NULL;
300
301 /* use None for empty dictionaries */
302 if (PyDict_CheckExact(attrib) && !PyDict_Size(attrib))
303 attrib = Py_None;
304
305 self->extra = NULL;
306
307 if (attrib != Py_None) {
308
Thomas Wouters477c8d52006-05-27 19:21:47 +0000309 if (element_new_extra(self, attrib) < 0) {
310 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000311 return NULL;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000312 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000313
314 self->extra->length = 0;
315 self->extra->allocated = STATIC_CHILDREN;
316 self->extra->children = self->extra->_children;
317
318 }
319
320 Py_INCREF(tag);
321 self->tag = tag;
322
323 Py_INCREF(Py_None);
324 self->text = Py_None;
325
326 Py_INCREF(Py_None);
327 self->tail = Py_None;
328
329 ALLOC(sizeof(ElementObject), "create element");
330
331 return (PyObject*) self;
332}
333
334LOCAL(int)
335element_resize(ElementObject* self, int extra)
336{
337 int size;
338 PyObject* *children;
339
340 /* make sure self->children can hold the given number of extra
341 elements. set an exception and return -1 if allocation failed */
342
343 if (!self->extra)
344 element_new_extra(self, NULL);
345
346 size = self->extra->length + extra;
347
348 if (size > self->extra->allocated) {
349 /* use Python 2.4's list growth strategy */
350 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
351 if (self->extra->children != self->extra->_children) {
352 children = PyObject_Realloc(self->extra->children,
353 size * sizeof(PyObject*));
354 if (!children)
355 goto nomemory;
356 } else {
357 children = PyObject_Malloc(size * sizeof(PyObject*));
358 if (!children)
359 goto nomemory;
360 /* copy existing children from static area to malloc buffer */
361 memcpy(children, self->extra->children,
362 self->extra->length * sizeof(PyObject*));
363 }
364 self->extra->children = children;
365 self->extra->allocated = size;
366 }
367
368 return 0;
369
370 nomemory:
371 PyErr_NoMemory();
372 return -1;
373}
374
375LOCAL(int)
376element_add_subelement(ElementObject* self, PyObject* element)
377{
378 /* add a child element to a parent */
379
380 if (element_resize(self, 1) < 0)
381 return -1;
382
383 Py_INCREF(element);
384 self->extra->children[self->extra->length] = element;
385
386 self->extra->length++;
387
388 return 0;
389}
390
391LOCAL(PyObject*)
392element_get_attrib(ElementObject* self)
393{
394 /* return borrowed reference to attrib dictionary */
395 /* note: this function assumes that the extra section exists */
396
397 PyObject* res = self->extra->attrib;
398
399 if (res == Py_None) {
400 /* create missing dictionary */
401 res = PyDict_New();
402 if (!res)
403 return NULL;
404 self->extra->attrib = res;
405 }
406
407 return res;
408}
409
410LOCAL(PyObject*)
411element_get_text(ElementObject* self)
412{
413 /* return borrowed reference to text attribute */
414
415 PyObject* res = self->text;
416
417 if (JOIN_GET(res)) {
418 res = JOIN_OBJ(res);
419 if (PyList_CheckExact(res)) {
420 res = list_join(res);
421 if (!res)
422 return NULL;
423 self->text = res;
424 }
425 }
426
427 return res;
428}
429
430LOCAL(PyObject*)
431element_get_tail(ElementObject* self)
432{
433 /* return borrowed reference to text attribute */
434
435 PyObject* res = self->tail;
436
437 if (JOIN_GET(res)) {
438 res = JOIN_OBJ(res);
439 if (PyList_CheckExact(res)) {
440 res = list_join(res);
441 if (!res)
442 return NULL;
443 self->tail = res;
444 }
445 }
446
447 return res;
448}
449
450static PyObject*
451element(PyObject* self, PyObject* args, PyObject* kw)
452{
453 PyObject* elem;
454
455 PyObject* tag;
456 PyObject* attrib = NULL;
457 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag,
458 &PyDict_Type, &attrib))
459 return NULL;
460
461 if (attrib || kw) {
462 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
463 if (!attrib)
464 return NULL;
465 if (kw)
466 PyDict_Update(attrib, kw);
467 } else {
468 Py_INCREF(Py_None);
469 attrib = Py_None;
470 }
471
472 elem = element_new(tag, attrib);
473
474 Py_DECREF(attrib);
475
476 return elem;
477}
478
479static PyObject*
480subelement(PyObject* self, PyObject* args, PyObject* kw)
481{
482 PyObject* elem;
483
484 ElementObject* parent;
485 PyObject* tag;
486 PyObject* attrib = NULL;
487 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
488 &Element_Type, &parent, &tag,
489 &PyDict_Type, &attrib))
490 return NULL;
491
492 if (attrib || kw) {
493 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
494 if (!attrib)
495 return NULL;
496 if (kw)
497 PyDict_Update(attrib, kw);
498 } else {
499 Py_INCREF(Py_None);
500 attrib = Py_None;
501 }
502
503 elem = element_new(tag, attrib);
504
505 Py_DECREF(attrib);
506
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000507 if (element_add_subelement(parent, elem) < 0) {
508 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000509 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000510 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000511
512 return elem;
513}
514
515static void
516element_dealloc(ElementObject* self)
517{
518 if (self->extra)
519 element_dealloc_extra(self);
520
521 /* discard attributes */
522 Py_DECREF(self->tag);
523 Py_DECREF(JOIN_OBJ(self->text));
524 Py_DECREF(JOIN_OBJ(self->tail));
525
526 RELEASE(sizeof(ElementObject), "destroy element");
527
528 PyObject_Del(self);
529}
530
531/* -------------------------------------------------------------------- */
532/* methods (in alphabetical order) */
533
534static PyObject*
535element_append(ElementObject* self, PyObject* args)
536{
537 PyObject* element;
538 if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element))
539 return NULL;
540
541 if (element_add_subelement(self, element) < 0)
542 return NULL;
543
544 Py_RETURN_NONE;
545}
546
547static PyObject*
548element_clear(ElementObject* self, PyObject* args)
549{
550 if (!PyArg_ParseTuple(args, ":clear"))
551 return NULL;
552
553 if (self->extra) {
554 element_dealloc_extra(self);
555 self->extra = NULL;
556 }
557
558 Py_INCREF(Py_None);
559 Py_DECREF(JOIN_OBJ(self->text));
560 self->text = Py_None;
561
562 Py_INCREF(Py_None);
563 Py_DECREF(JOIN_OBJ(self->tail));
564 self->tail = Py_None;
565
566 Py_RETURN_NONE;
567}
568
569static PyObject*
570element_copy(ElementObject* self, PyObject* args)
571{
572 int i;
573 ElementObject* element;
574
575 if (!PyArg_ParseTuple(args, ":__copy__"))
576 return NULL;
577
578 element = (ElementObject*) element_new(
579 self->tag, (self->extra) ? self->extra->attrib : Py_None
580 );
581 if (!element)
582 return NULL;
583
584 Py_DECREF(JOIN_OBJ(element->text));
585 element->text = self->text;
586 Py_INCREF(JOIN_OBJ(element->text));
587
588 Py_DECREF(JOIN_OBJ(element->tail));
589 element->tail = self->tail;
590 Py_INCREF(JOIN_OBJ(element->tail));
591
592 if (self->extra) {
593
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000594 if (element_resize(element, self->extra->length) < 0) {
595 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000596 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000597 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000598
599 for (i = 0; i < self->extra->length; i++) {
600 Py_INCREF(self->extra->children[i]);
601 element->extra->children[i] = self->extra->children[i];
602 }
603
604 element->extra->length = self->extra->length;
605
606 }
607
608 return (PyObject*) element;
609}
610
611static PyObject*
612element_deepcopy(ElementObject* self, PyObject* args)
613{
614 int i;
615 ElementObject* element;
616 PyObject* tag;
617 PyObject* attrib;
618 PyObject* text;
619 PyObject* tail;
620 PyObject* id;
621
622 PyObject* memo;
623 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
624 return NULL;
625
626 tag = deepcopy(self->tag, memo);
627 if (!tag)
628 return NULL;
629
630 if (self->extra) {
631 attrib = deepcopy(self->extra->attrib, memo);
632 if (!attrib) {
633 Py_DECREF(tag);
634 return NULL;
635 }
636 } else {
637 Py_INCREF(Py_None);
638 attrib = Py_None;
639 }
640
641 element = (ElementObject*) element_new(tag, attrib);
642
643 Py_DECREF(tag);
644 Py_DECREF(attrib);
645
646 if (!element)
647 return NULL;
648
649 text = deepcopy(JOIN_OBJ(self->text), memo);
650 if (!text)
651 goto error;
652 Py_DECREF(element->text);
653 element->text = JOIN_SET(text, JOIN_GET(self->text));
654
655 tail = deepcopy(JOIN_OBJ(self->tail), memo);
656 if (!tail)
657 goto error;
658 Py_DECREF(element->tail);
659 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
660
661 if (self->extra) {
662
663 if (element_resize(element, self->extra->length) < 0)
664 goto error;
665
666 for (i = 0; i < self->extra->length; i++) {
667 PyObject* child = deepcopy(self->extra->children[i], memo);
668 if (!child) {
669 element->extra->length = i;
670 goto error;
671 }
672 element->extra->children[i] = child;
673 }
674
675 element->extra->length = self->extra->length;
676
677 }
678
679 /* add object to memo dictionary (so deepcopy won't visit it again) */
680 id = PyInt_FromLong((Py_uintptr_t) self);
681
682 i = PyDict_SetItem(memo, id, (PyObject*) element);
683
684 Py_DECREF(id);
685
686 if (i < 0)
687 goto error;
688
689 return (PyObject*) element;
690
691 error:
692 Py_DECREF(element);
693 return NULL;
694}
695
696LOCAL(int)
697checkpath(PyObject* tag)
698{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000699 Py_ssize_t i;
700 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000701
702 /* check if a tag contains an xpath character */
703
704#define PATHCHAR(ch) (ch == '/' || ch == '*' || ch == '[' || ch == '@')
705
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000706 if (PyUnicode_Check(tag)) {
707 Py_UNICODE *p = PyUnicode_AS_UNICODE(tag);
708 for (i = 0; i < PyUnicode_GET_SIZE(tag); i++) {
709 if (p[i] == '{')
710 check = 0;
711 else if (p[i] == '}')
712 check = 1;
713 else if (check && PATHCHAR(p[i]))
714 return 1;
715 }
716 return 0;
717 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000718 if (PyString_Check(tag)) {
719 char *p = PyString_AS_STRING(tag);
720 for (i = 0; i < PyString_GET_SIZE(tag); i++) {
721 if (p[i] == '{')
722 check = 0;
723 else if (p[i] == '}')
724 check = 1;
725 else if (check && PATHCHAR(p[i]))
726 return 1;
727 }
728 return 0;
729 }
730
731 return 1; /* unknown type; might be path expression */
732}
733
734static PyObject*
735element_find(ElementObject* self, PyObject* args)
736{
737 int i;
738
739 PyObject* tag;
740 if (!PyArg_ParseTuple(args, "O:find", &tag))
741 return NULL;
742
743 if (checkpath(tag))
744 return PyObject_CallMethod(
745 elementpath_obj, "find", "OO", self, tag
746 );
747
748 if (!self->extra)
749 Py_RETURN_NONE;
750
751 for (i = 0; i < self->extra->length; i++) {
752 PyObject* item = self->extra->children[i];
753 if (Element_CheckExact(item) &&
754 PyObject_Compare(((ElementObject*)item)->tag, tag) == 0) {
755 Py_INCREF(item);
756 return item;
757 }
758 }
759
760 Py_RETURN_NONE;
761}
762
763static PyObject*
764element_findtext(ElementObject* self, PyObject* args)
765{
766 int i;
767
768 PyObject* tag;
769 PyObject* default_value = Py_None;
770 if (!PyArg_ParseTuple(args, "O|O:findtext", &tag, &default_value))
771 return NULL;
772
773 if (checkpath(tag))
774 return PyObject_CallMethod(
775 elementpath_obj, "findtext", "OOO", self, tag, default_value
776 );
777
778 if (!self->extra) {
779 Py_INCREF(default_value);
780 return default_value;
781 }
782
783 for (i = 0; i < self->extra->length; i++) {
784 ElementObject* item = (ElementObject*) self->extra->children[i];
785 if (Element_CheckExact(item) && !PyObject_Compare(item->tag, tag)) {
786 PyObject* text = element_get_text(item);
787 if (text == Py_None)
788 return PyString_FromString("");
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000789 Py_XINCREF(text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000790 return text;
791 }
792 }
793
794 Py_INCREF(default_value);
795 return default_value;
796}
797
798static PyObject*
799element_findall(ElementObject* self, PyObject* args)
800{
801 int i;
802 PyObject* out;
803
804 PyObject* tag;
805 if (!PyArg_ParseTuple(args, "O:findall", &tag))
806 return NULL;
807
808 if (checkpath(tag))
809 return PyObject_CallMethod(
810 elementpath_obj, "findall", "OO", self, tag
811 );
812
813 out = PyList_New(0);
814 if (!out)
815 return NULL;
816
817 if (!self->extra)
818 return out;
819
820 for (i = 0; i < self->extra->length; i++) {
821 PyObject* item = self->extra->children[i];
822 if (Element_CheckExact(item) &&
823 PyObject_Compare(((ElementObject*)item)->tag, tag) == 0) {
824 if (PyList_Append(out, item) < 0) {
825 Py_DECREF(out);
826 return NULL;
827 }
828 }
829 }
830
831 return out;
832}
833
834static PyObject*
835element_get(ElementObject* self, PyObject* args)
836{
837 PyObject* value;
838
839 PyObject* key;
840 PyObject* default_value = Py_None;
841 if (!PyArg_ParseTuple(args, "O|O:get", &key, &default_value))
842 return NULL;
843
844 if (!self->extra || self->extra->attrib == Py_None)
845 value = default_value;
846 else {
847 value = PyDict_GetItem(self->extra->attrib, key);
848 if (!value)
849 value = default_value;
850 }
851
852 Py_INCREF(value);
853 return value;
854}
855
856static PyObject*
857element_getchildren(ElementObject* self, PyObject* args)
858{
859 int i;
860 PyObject* list;
861
862 if (!PyArg_ParseTuple(args, ":getchildren"))
863 return NULL;
864
865 if (!self->extra)
866 return PyList_New(0);
867
868 list = PyList_New(self->extra->length);
869 if (!list)
870 return NULL;
871
872 for (i = 0; i < self->extra->length; i++) {
873 PyObject* item = self->extra->children[i];
874 Py_INCREF(item);
875 PyList_SET_ITEM(list, i, item);
876 }
877
878 return list;
879}
880
881static PyObject*
882element_getiterator(ElementObject* self, PyObject* args)
883{
884 PyObject* result;
885
886 PyObject* tag = Py_None;
887 if (!PyArg_ParseTuple(args, "|O:getiterator", &tag))
888 return NULL;
889
890 if (!elementtree_getiterator_obj) {
891 PyErr_SetString(
892 PyExc_RuntimeError,
893 "getiterator helper not found"
894 );
895 return NULL;
896 }
897
898 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000899 if (!args)
900 return NULL;
Neal Norwitz02876df2006-02-07 06:58:52 +0000901
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000902 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
903 Py_INCREF(tag); PyTuple_SET_ITEM(args, 1, (PyObject*) tag);
904
905 result = PyObject_CallObject(elementtree_getiterator_obj, args);
906
907 Py_DECREF(args);
908
909 return result;
910}
911
912static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000913element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000914{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000915 ElementObject* self = (ElementObject*) self_;
916
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000917 if (!self->extra || index < 0 || index >= self->extra->length) {
918 PyErr_SetString(
919 PyExc_IndexError,
920 "child index out of range"
921 );
922 return NULL;
923 }
924
925 Py_INCREF(self->extra->children[index]);
926 return self->extra->children[index];
927}
928
929static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000930element_getslice(PyObject* self_, Py_ssize_t start, Py_ssize_t end)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000931{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000932 ElementObject* self = (ElementObject*) self_;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000933 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000934 PyObject* list;
935
936 if (!self->extra)
937 return PyList_New(0);
938
939 /* standard clamping */
940 if (start < 0)
941 start = 0;
942 if (end < 0)
943 end = 0;
944 if (end > self->extra->length)
945 end = self->extra->length;
946 if (start > end)
947 start = end;
948
949 list = PyList_New(end - start);
950 if (!list)
951 return NULL;
952
953 for (i = start; i < end; i++) {
954 PyObject* item = self->extra->children[i];
955 Py_INCREF(item);
956 PyList_SET_ITEM(list, i - start, item);
957 }
958
959 return list;
960}
961
962static PyObject*
963element_insert(ElementObject* self, PyObject* args)
964{
965 int i;
966
967 int index;
968 PyObject* element;
969 if (!PyArg_ParseTuple(args, "iO!:insert", &index,
970 &Element_Type, &element))
971 return NULL;
972
973 if (!self->extra)
974 element_new_extra(self, NULL);
975
976 if (index < 0)
977 index = 0;
978 if (index > self->extra->length)
979 index = self->extra->length;
980
981 if (element_resize(self, 1) < 0)
982 return NULL;
983
984 for (i = self->extra->length; i > index; i--)
985 self->extra->children[i] = self->extra->children[i-1];
986
987 Py_INCREF(element);
988 self->extra->children[index] = element;
989
990 self->extra->length++;
991
992 Py_RETURN_NONE;
993}
994
995static PyObject*
996element_items(ElementObject* self, PyObject* args)
997{
998 if (!PyArg_ParseTuple(args, ":items"))
999 return NULL;
1000
1001 if (!self->extra || self->extra->attrib == Py_None)
1002 return PyList_New(0);
1003
1004 return PyDict_Items(self->extra->attrib);
1005}
1006
1007static PyObject*
1008element_keys(ElementObject* self, PyObject* args)
1009{
1010 if (!PyArg_ParseTuple(args, ":keys"))
1011 return NULL;
1012
1013 if (!self->extra || self->extra->attrib == Py_None)
1014 return PyList_New(0);
1015
1016 return PyDict_Keys(self->extra->attrib);
1017}
1018
Martin v. Löwis18e16552006-02-15 17:27:45 +00001019static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001020element_length(ElementObject* self)
1021{
1022 if (!self->extra)
1023 return 0;
1024
1025 return self->extra->length;
1026}
1027
1028static PyObject*
1029element_makeelement(PyObject* self, PyObject* args, PyObject* kw)
1030{
1031 PyObject* elem;
1032
1033 PyObject* tag;
1034 PyObject* attrib;
1035 if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib))
1036 return NULL;
1037
1038 attrib = PyDict_Copy(attrib);
1039 if (!attrib)
1040 return NULL;
1041
1042 elem = element_new(tag, attrib);
1043
1044 Py_DECREF(attrib);
1045
1046 return elem;
1047}
1048
1049static PyObject*
1050element_reduce(ElementObject* self, PyObject* args)
1051{
1052 if (!PyArg_ParseTuple(args, ":__reduce__"))
1053 return NULL;
1054
1055 /* Hack alert: This method is used to work around a __copy__
1056 problem on certain 2.3 and 2.4 versions. To save time and
1057 simplify the code, we create the copy in here, and use a dummy
1058 copyelement helper to trick the copy module into doing the
1059 right thing. */
1060
1061 if (!elementtree_copyelement_obj) {
1062 PyErr_SetString(
1063 PyExc_RuntimeError,
1064 "copyelement helper not found"
1065 );
1066 return NULL;
1067 }
1068
1069 return Py_BuildValue(
1070 "O(N)", elementtree_copyelement_obj, element_copy(self, args)
1071 );
1072}
1073
1074static PyObject*
1075element_remove(ElementObject* self, PyObject* args)
1076{
1077 int i;
1078
1079 PyObject* element;
1080 if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element))
1081 return NULL;
1082
1083 if (!self->extra) {
1084 /* element has no children, so raise exception */
1085 PyErr_SetString(
1086 PyExc_ValueError,
1087 "list.remove(x): x not in list"
1088 );
1089 return NULL;
1090 }
1091
1092 for (i = 0; i < self->extra->length; i++) {
1093 if (self->extra->children[i] == element)
1094 break;
1095 if (PyObject_Compare(self->extra->children[i], element) == 0)
1096 break;
1097 }
1098
1099 if (i == self->extra->length) {
1100 /* element is not in children, so raise exception */
1101 PyErr_SetString(
1102 PyExc_ValueError,
1103 "list.remove(x): x not in list"
1104 );
1105 return NULL;
1106 }
1107
1108 Py_DECREF(self->extra->children[i]);
1109
1110 self->extra->length--;
1111
1112 for (; i < self->extra->length; i++)
1113 self->extra->children[i] = self->extra->children[i+1];
1114
1115 Py_RETURN_NONE;
1116}
1117
1118static PyObject*
1119element_repr(ElementObject* self)
1120{
1121 PyObject* repr;
1122 char buffer[100];
1123
1124 repr = PyString_FromString("<Element ");
1125
1126 PyString_ConcatAndDel(&repr, PyObject_Repr(self->tag));
1127
1128 sprintf(buffer, " at %p>", self);
1129 PyString_ConcatAndDel(&repr, PyString_FromString(buffer));
1130
1131 return repr;
1132}
1133
1134static PyObject*
1135element_set(ElementObject* self, PyObject* args)
1136{
1137 PyObject* attrib;
1138
1139 PyObject* key;
1140 PyObject* value;
1141 if (!PyArg_ParseTuple(args, "OO:set", &key, &value))
1142 return NULL;
1143
1144 if (!self->extra)
1145 element_new_extra(self, NULL);
1146
1147 attrib = element_get_attrib(self);
1148 if (!attrib)
1149 return NULL;
1150
1151 if (PyDict_SetItem(attrib, key, value) < 0)
1152 return NULL;
1153
1154 Py_RETURN_NONE;
1155}
1156
1157static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001158element_setslice(PyObject* self_, Py_ssize_t start, Py_ssize_t end, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001159{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001160 ElementObject* self = (ElementObject*) self_;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001161 Py_ssize_t i, new, old;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001162 PyObject* recycle = NULL;
1163
1164 if (!self->extra)
1165 element_new_extra(self, NULL);
1166
1167 /* standard clamping */
1168 if (start < 0)
1169 start = 0;
1170 if (end < 0)
1171 end = 0;
1172 if (end > self->extra->length)
1173 end = self->extra->length;
1174 if (start > end)
1175 start = end;
1176
1177 old = end - start;
1178
1179 if (item == NULL)
1180 new = 0;
1181 else if (PyList_CheckExact(item)) {
1182 new = PyList_GET_SIZE(item);
1183 } else {
1184 /* FIXME: support arbitrary sequences? */
1185 PyErr_Format(
1186 PyExc_TypeError,
1187 "expected list, not \"%.200s\"", item->ob_type->tp_name
1188 );
1189 return -1;
1190 }
1191
1192 if (old > 0) {
1193 /* to avoid recursive calls to this method (via decref), move
1194 old items to the recycle bin here, and get rid of them when
1195 we're done modifying the element */
1196 recycle = PyList_New(old);
1197 for (i = 0; i < old; i++)
1198 PyList_SET_ITEM(recycle, i, self->extra->children[i + start]);
1199 }
1200
1201 if (new < old) {
1202 /* delete slice */
1203 for (i = end; i < self->extra->length; i++)
1204 self->extra->children[i + new - old] = self->extra->children[i];
1205 } else if (new > old) {
1206 /* insert slice */
1207 if (element_resize(self, new - old) < 0)
1208 return -1;
1209 for (i = self->extra->length-1; i >= end; i--)
1210 self->extra->children[i + new - old] = self->extra->children[i];
1211 }
1212
1213 /* replace the slice */
1214 for (i = 0; i < new; i++) {
1215 PyObject* element = PyList_GET_ITEM(item, i);
1216 Py_INCREF(element);
1217 self->extra->children[i + start] = element;
1218 }
1219
1220 self->extra->length += new - old;
1221
1222 /* discard the recycle bin, and everything in it */
1223 Py_XDECREF(recycle);
1224
1225 return 0;
1226}
1227
1228static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001229element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001230{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001231 ElementObject* self = (ElementObject*) self_;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001232 int i;
1233 PyObject* old;
1234
1235 if (!self->extra || index < 0 || index >= self->extra->length) {
1236 PyErr_SetString(
1237 PyExc_IndexError,
1238 "child assignment index out of range");
1239 return -1;
1240 }
1241
1242 old = self->extra->children[index];
1243
1244 if (item) {
1245 Py_INCREF(item);
1246 self->extra->children[index] = item;
1247 } else {
1248 self->extra->length--;
1249 for (i = index; i < self->extra->length; i++)
1250 self->extra->children[i] = self->extra->children[i+1];
1251 }
1252
1253 Py_DECREF(old);
1254
1255 return 0;
1256}
1257
1258static PyMethodDef element_methods[] = {
1259
1260 {"clear", (PyCFunction) element_clear, METH_VARARGS},
1261
1262 {"get", (PyCFunction) element_get, METH_VARARGS},
1263 {"set", (PyCFunction) element_set, METH_VARARGS},
1264
1265 {"find", (PyCFunction) element_find, METH_VARARGS},
1266 {"findtext", (PyCFunction) element_findtext, METH_VARARGS},
1267 {"findall", (PyCFunction) element_findall, METH_VARARGS},
1268
1269 {"append", (PyCFunction) element_append, METH_VARARGS},
1270 {"insert", (PyCFunction) element_insert, METH_VARARGS},
1271 {"remove", (PyCFunction) element_remove, METH_VARARGS},
1272
1273 {"getiterator", (PyCFunction) element_getiterator, METH_VARARGS},
1274 {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS},
1275
1276 {"items", (PyCFunction) element_items, METH_VARARGS},
1277 {"keys", (PyCFunction) element_keys, METH_VARARGS},
1278
1279 {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS},
1280
1281 {"__copy__", (PyCFunction) element_copy, METH_VARARGS},
1282 {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS},
1283
1284 /* Some 2.3 and 2.4 versions do not handle the __copy__ method on
1285 C objects correctly, so we have to fake it using a __reduce__-
1286 based hack (see the element_reduce implementation above for
1287 details). */
1288
1289 /* The behaviour has been changed in 2.3.5 and 2.4.1, so we're
1290 using a runtime test to figure out if we need to fake things
1291 or now (see the init code below). The following entry is
1292 enabled only if the hack is needed. */
1293
1294 {"!__reduce__", (PyCFunction) element_reduce, METH_VARARGS},
1295
1296 {NULL, NULL}
1297};
1298
1299static PyObject*
1300element_getattr(ElementObject* self, char* name)
1301{
1302 PyObject* res;
1303
1304 res = Py_FindMethod(element_methods, (PyObject*) self, name);
1305 if (res)
1306 return res;
1307
1308 PyErr_Clear();
1309
1310 if (strcmp(name, "tag") == 0)
1311 res = self->tag;
1312 else if (strcmp(name, "text") == 0)
1313 res = element_get_text(self);
1314 else if (strcmp(name, "tail") == 0) {
1315 res = element_get_tail(self);
1316 } else if (strcmp(name, "attrib") == 0) {
1317 if (!self->extra)
1318 element_new_extra(self, NULL);
1319 res = element_get_attrib(self);
1320 } else {
1321 PyErr_SetString(PyExc_AttributeError, name);
1322 return NULL;
1323 }
1324
1325 if (!res)
1326 return NULL;
1327
1328 Py_INCREF(res);
1329 return res;
1330}
1331
1332static int
1333element_setattr(ElementObject* self, const char* name, PyObject* value)
1334{
1335 if (value == NULL) {
1336 PyErr_SetString(
1337 PyExc_AttributeError,
1338 "can't delete element attributes"
1339 );
1340 return -1;
1341 }
1342
1343 if (strcmp(name, "tag") == 0) {
1344 Py_DECREF(self->tag);
1345 self->tag = value;
1346 Py_INCREF(self->tag);
1347 } else if (strcmp(name, "text") == 0) {
1348 Py_DECREF(JOIN_OBJ(self->text));
1349 self->text = value;
1350 Py_INCREF(self->text);
1351 } else if (strcmp(name, "tail") == 0) {
1352 Py_DECREF(JOIN_OBJ(self->tail));
1353 self->tail = value;
1354 Py_INCREF(self->tail);
1355 } else if (strcmp(name, "attrib") == 0) {
1356 if (!self->extra)
1357 element_new_extra(self, NULL);
1358 Py_DECREF(self->extra->attrib);
1359 self->extra->attrib = value;
1360 Py_INCREF(self->extra->attrib);
1361 } else {
1362 PyErr_SetString(PyExc_AttributeError, name);
1363 return -1;
1364 }
1365
1366 return 0;
1367}
1368
1369static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001370 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001371 0, /* sq_concat */
1372 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001373 element_getitem,
1374 element_getslice,
1375 element_setitem,
1376 element_setslice,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001377};
1378
Neal Norwitz227b5332006-03-22 09:28:35 +00001379static PyTypeObject Element_Type = {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001380 PyObject_HEAD_INIT(NULL)
1381 0, "Element", sizeof(ElementObject), 0,
1382 /* methods */
1383 (destructor)element_dealloc, /* tp_dealloc */
1384 0, /* tp_print */
1385 (getattrfunc)element_getattr, /* tp_getattr */
1386 (setattrfunc)element_setattr, /* tp_setattr */
1387 0, /* tp_compare */
1388 (reprfunc)element_repr, /* tp_repr */
1389 0, /* tp_as_number */
1390 &element_as_sequence, /* tp_as_sequence */
1391};
1392
1393/* ==================================================================== */
1394/* the tree builder type */
1395
1396typedef struct {
1397 PyObject_HEAD
1398
1399 PyObject* root; /* root node (first created node) */
1400
1401 ElementObject* this; /* current node */
1402 ElementObject* last; /* most recently created node */
1403
1404 PyObject* data; /* data collector (string or list), or NULL */
1405
1406 PyObject* stack; /* element stack */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001407 Py_ssize_t index; /* current stack size (0=empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001408
1409 /* element tracing */
1410 PyObject* events; /* list of events, or NULL if not collecting */
1411 PyObject* start_event_obj; /* event objects (NULL to ignore) */
1412 PyObject* end_event_obj;
1413 PyObject* start_ns_event_obj;
1414 PyObject* end_ns_event_obj;
1415
1416} TreeBuilderObject;
1417
Neal Norwitz227b5332006-03-22 09:28:35 +00001418static PyTypeObject TreeBuilder_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001419
1420#define TreeBuilder_CheckExact(op) ((op)->ob_type == &TreeBuilder_Type)
1421
1422/* -------------------------------------------------------------------- */
1423/* constructor and destructor */
1424
1425LOCAL(PyObject*)
1426treebuilder_new(void)
1427{
1428 TreeBuilderObject* self;
1429
1430 self = PyObject_New(TreeBuilderObject, &TreeBuilder_Type);
1431 if (self == NULL)
1432 return NULL;
1433
1434 self->root = NULL;
1435
1436 Py_INCREF(Py_None);
1437 self->this = (ElementObject*) Py_None;
1438
1439 Py_INCREF(Py_None);
1440 self->last = (ElementObject*) Py_None;
1441
1442 self->data = NULL;
1443
1444 self->stack = PyList_New(20);
1445 self->index = 0;
1446
1447 self->events = NULL;
1448 self->start_event_obj = self->end_event_obj = NULL;
1449 self->start_ns_event_obj = self->end_ns_event_obj = NULL;
1450
1451 ALLOC(sizeof(TreeBuilderObject), "create treebuilder");
1452
1453 return (PyObject*) self;
1454}
1455
1456static PyObject*
Thomas Wouters73e5a5b2006-06-08 15:35:45 +00001457treebuilder(PyObject* self_, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001458{
1459 if (!PyArg_ParseTuple(args, ":TreeBuilder"))
1460 return NULL;
1461
1462 return treebuilder_new();
1463}
1464
1465static void
1466treebuilder_dealloc(TreeBuilderObject* self)
1467{
1468 Py_XDECREF(self->end_ns_event_obj);
1469 Py_XDECREF(self->start_ns_event_obj);
1470 Py_XDECREF(self->end_event_obj);
1471 Py_XDECREF(self->start_event_obj);
1472 Py_XDECREF(self->events);
1473 Py_DECREF(self->stack);
1474 Py_XDECREF(self->data);
1475 Py_DECREF(self->last);
1476 Py_DECREF(self->this);
1477 Py_XDECREF(self->root);
1478
1479 RELEASE(sizeof(TreeBuilderObject), "destroy treebuilder");
1480
1481 PyObject_Del(self);
1482}
1483
1484/* -------------------------------------------------------------------- */
1485/* handlers */
1486
1487LOCAL(PyObject*)
1488treebuilder_handle_xml(TreeBuilderObject* self, PyObject* encoding,
1489 PyObject* standalone)
1490{
1491 Py_RETURN_NONE;
1492}
1493
1494LOCAL(PyObject*)
1495treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
1496 PyObject* attrib)
1497{
1498 PyObject* node;
1499 PyObject* this;
1500
1501 if (self->data) {
1502 if (self->this == self->last) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001503 Py_DECREF(JOIN_OBJ(self->last->text));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001504 self->last->text = JOIN_SET(
1505 self->data, PyList_CheckExact(self->data)
1506 );
1507 } else {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001508 Py_DECREF(JOIN_OBJ(self->last->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001509 self->last->tail = JOIN_SET(
1510 self->data, PyList_CheckExact(self->data)
1511 );
1512 }
1513 self->data = NULL;
1514 }
1515
1516 node = element_new(tag, attrib);
1517 if (!node)
1518 return NULL;
1519
1520 this = (PyObject*) self->this;
1521
1522 if (this != Py_None) {
1523 if (element_add_subelement((ElementObject*) this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001524 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001525 } else {
1526 if (self->root) {
1527 PyErr_SetString(
1528 PyExc_SyntaxError,
1529 "multiple elements on top level"
1530 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001531 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001532 }
1533 Py_INCREF(node);
1534 self->root = node;
1535 }
1536
1537 if (self->index < PyList_GET_SIZE(self->stack)) {
1538 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001539 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001540 Py_INCREF(this);
1541 } else {
1542 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001543 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001544 }
1545 self->index++;
1546
1547 Py_DECREF(this);
1548 Py_INCREF(node);
1549 self->this = (ElementObject*) node;
1550
1551 Py_DECREF(self->last);
1552 Py_INCREF(node);
1553 self->last = (ElementObject*) node;
1554
1555 if (self->start_event_obj) {
1556 PyObject* res;
1557 PyObject* action = self->start_event_obj;
1558 res = PyTuple_New(2);
1559 if (res) {
1560 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
1561 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
1562 PyList_Append(self->events, res);
1563 Py_DECREF(res);
1564 } else
1565 PyErr_Clear(); /* FIXME: propagate error */
1566 }
1567
1568 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001569
1570 error:
1571 Py_DECREF(node);
1572 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001573}
1574
1575LOCAL(PyObject*)
1576treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
1577{
1578 if (!self->data) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001579 if (self->last == (ElementObject*) Py_None) {
1580 /* ignore calls to data before the first call to start */
1581 Py_RETURN_NONE;
1582 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001583 /* store the first item as is */
1584 Py_INCREF(data); self->data = data;
1585 } else {
1586 /* more than one item; use a list to collect items */
1587 if (PyString_CheckExact(self->data) && self->data->ob_refcnt == 1 &&
1588 PyString_CheckExact(data) && PyString_GET_SIZE(data) == 1) {
1589 /* expat often generates single character data sections; handle
1590 the most common case by resizing the existing string... */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001591 Py_ssize_t size = PyString_GET_SIZE(self->data);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001592 if (_PyString_Resize(&self->data, size + 1) < 0)
1593 return NULL;
1594 PyString_AS_STRING(self->data)[size] = PyString_AS_STRING(data)[0];
1595 } else if (PyList_CheckExact(self->data)) {
1596 if (PyList_Append(self->data, data) < 0)
1597 return NULL;
1598 } else {
1599 PyObject* list = PyList_New(2);
1600 if (!list)
1601 return NULL;
1602 PyList_SET_ITEM(list, 0, self->data);
1603 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
1604 self->data = list;
1605 }
1606 }
1607
1608 Py_RETURN_NONE;
1609}
1610
1611LOCAL(PyObject*)
1612treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
1613{
1614 PyObject* item;
1615
1616 if (self->data) {
1617 if (self->this == self->last) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001618 Py_DECREF(JOIN_OBJ(self->last->text));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001619 self->last->text = JOIN_SET(
1620 self->data, PyList_CheckExact(self->data)
1621 );
1622 } else {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001623 Py_DECREF(JOIN_OBJ(self->last->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001624 self->last->tail = JOIN_SET(
1625 self->data, PyList_CheckExact(self->data)
1626 );
1627 }
1628 self->data = NULL;
1629 }
1630
1631 if (self->index == 0) {
1632 PyErr_SetString(
1633 PyExc_IndexError,
1634 "pop from empty stack"
1635 );
1636 return NULL;
1637 }
1638
1639 self->index--;
1640
1641 item = PyList_GET_ITEM(self->stack, self->index);
1642 Py_INCREF(item);
1643
1644 Py_DECREF(self->last);
1645
1646 self->last = (ElementObject*) self->this;
1647 self->this = (ElementObject*) item;
1648
1649 if (self->end_event_obj) {
1650 PyObject* res;
1651 PyObject* action = self->end_event_obj;
1652 PyObject* node = (PyObject*) self->last;
1653 res = PyTuple_New(2);
1654 if (res) {
1655 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
1656 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
1657 PyList_Append(self->events, res);
1658 Py_DECREF(res);
1659 } else
1660 PyErr_Clear(); /* FIXME: propagate error */
1661 }
1662
1663 Py_INCREF(self->last);
1664 return (PyObject*) self->last;
1665}
1666
1667LOCAL(void)
1668treebuilder_handle_namespace(TreeBuilderObject* self, int start,
1669 const char* prefix, const char *uri)
1670{
1671 PyObject* res;
1672 PyObject* action;
1673 PyObject* parcel;
1674
1675 if (!self->events)
1676 return;
1677
1678 if (start) {
1679 if (!self->start_ns_event_obj)
1680 return;
1681 action = self->start_ns_event_obj;
1682 /* FIXME: prefix and uri use utf-8 encoding! */
1683 parcel = Py_BuildValue("ss", (prefix) ? prefix : "", uri);
1684 if (!parcel)
1685 return;
1686 Py_INCREF(action);
1687 } else {
1688 if (!self->end_ns_event_obj)
1689 return;
1690 action = self->end_ns_event_obj;
1691 Py_INCREF(action);
1692 parcel = Py_None;
1693 Py_INCREF(parcel);
1694 }
1695
1696 res = PyTuple_New(2);
1697
1698 if (res) {
1699 PyTuple_SET_ITEM(res, 0, action);
1700 PyTuple_SET_ITEM(res, 1, parcel);
1701 PyList_Append(self->events, res);
1702 Py_DECREF(res);
1703 } else
1704 PyErr_Clear(); /* FIXME: propagate error */
1705}
1706
1707/* -------------------------------------------------------------------- */
1708/* methods (in alphabetical order) */
1709
1710static PyObject*
1711treebuilder_data(TreeBuilderObject* self, PyObject* args)
1712{
1713 PyObject* data;
1714 if (!PyArg_ParseTuple(args, "O:data", &data))
1715 return NULL;
1716
1717 return treebuilder_handle_data(self, data);
1718}
1719
1720static PyObject*
1721treebuilder_end(TreeBuilderObject* self, PyObject* args)
1722{
1723 PyObject* tag;
1724 if (!PyArg_ParseTuple(args, "O:end", &tag))
1725 return NULL;
1726
1727 return treebuilder_handle_end(self, tag);
1728}
1729
1730LOCAL(PyObject*)
1731treebuilder_done(TreeBuilderObject* self)
1732{
1733 PyObject* res;
1734
1735 /* FIXME: check stack size? */
1736
1737 if (self->root)
1738 res = self->root;
1739 else
1740 res = Py_None;
1741
1742 Py_INCREF(res);
1743 return res;
1744}
1745
1746static PyObject*
1747treebuilder_close(TreeBuilderObject* self, PyObject* args)
1748{
1749 if (!PyArg_ParseTuple(args, ":close"))
1750 return NULL;
1751
1752 return treebuilder_done(self);
1753}
1754
1755static PyObject*
1756treebuilder_start(TreeBuilderObject* self, PyObject* args)
1757{
1758 PyObject* tag;
1759 PyObject* attrib = Py_None;
1760 if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib))
1761 return NULL;
1762
1763 return treebuilder_handle_start(self, tag, attrib);
1764}
1765
1766static PyObject*
1767treebuilder_xml(TreeBuilderObject* self, PyObject* args)
1768{
1769 PyObject* encoding;
1770 PyObject* standalone;
1771 if (!PyArg_ParseTuple(args, "OO:xml", &encoding, &standalone))
1772 return NULL;
1773
1774 return treebuilder_handle_xml(self, encoding, standalone);
1775}
1776
1777static PyMethodDef treebuilder_methods[] = {
1778 {"data", (PyCFunction) treebuilder_data, METH_VARARGS},
1779 {"start", (PyCFunction) treebuilder_start, METH_VARARGS},
1780 {"end", (PyCFunction) treebuilder_end, METH_VARARGS},
1781 {"xml", (PyCFunction) treebuilder_xml, METH_VARARGS},
1782 {"close", (PyCFunction) treebuilder_close, METH_VARARGS},
1783 {NULL, NULL}
1784};
1785
1786static PyObject*
1787treebuilder_getattr(TreeBuilderObject* self, char* name)
1788{
1789 return Py_FindMethod(treebuilder_methods, (PyObject*) self, name);
1790}
1791
Neal Norwitz227b5332006-03-22 09:28:35 +00001792static PyTypeObject TreeBuilder_Type = {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001793 PyObject_HEAD_INIT(NULL)
1794 0, "TreeBuilder", sizeof(TreeBuilderObject), 0,
1795 /* methods */
1796 (destructor)treebuilder_dealloc, /* tp_dealloc */
1797 0, /* tp_print */
1798 (getattrfunc)treebuilder_getattr, /* tp_getattr */
1799};
1800
1801/* ==================================================================== */
1802/* the expat interface */
1803
1804#if defined(USE_EXPAT)
1805
1806#include "expat.h"
1807
1808#if defined(USE_PYEXPAT_CAPI)
1809#include "pyexpat.h"
1810static struct PyExpat_CAPI* expat_capi;
1811#define EXPAT(func) (expat_capi->func)
1812#else
1813#define EXPAT(func) (XML_##func)
1814#endif
1815
1816typedef struct {
1817 PyObject_HEAD
1818
1819 XML_Parser parser;
1820
1821 PyObject* target;
1822 PyObject* entity;
1823
1824 PyObject* names;
1825
1826 PyObject* handle_xml;
1827 PyObject* handle_start;
1828 PyObject* handle_data;
1829 PyObject* handle_end;
1830
1831 PyObject* handle_comment;
1832 PyObject* handle_pi;
1833
1834} XMLParserObject;
1835
Neal Norwitz227b5332006-03-22 09:28:35 +00001836static PyTypeObject XMLParser_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001837
1838/* helpers */
1839
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001840LOCAL(int)
1841checkstring(const char* string, int size)
1842{
1843 int i;
1844
1845 /* check if an 8-bit string contains UTF-8 characters */
1846 for (i = 0; i < size; i++)
1847 if (string[i] & 0x80)
1848 return 1;
1849
1850 return 0;
1851}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001852
1853LOCAL(PyObject*)
1854makestring(const char* string, int size)
1855{
1856 /* convert a UTF-8 string to either a 7-bit ascii string or a
1857 Unicode string */
1858
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001859 if (checkstring(string, size))
1860 return PyUnicode_DecodeUTF8(string, size, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001861
1862 return PyString_FromStringAndSize(string, size);
1863}
1864
1865LOCAL(PyObject*)
1866makeuniversal(XMLParserObject* self, const char* string)
1867{
1868 /* convert a UTF-8 tag/attribute name from the expat parser
1869 to a universal name string */
1870
1871 int size = strlen(string);
1872 PyObject* key;
1873 PyObject* value;
1874
1875 /* look the 'raw' name up in the names dictionary */
1876 key = PyString_FromStringAndSize(string, size);
1877 if (!key)
1878 return NULL;
1879
1880 value = PyDict_GetItem(self->names, key);
1881
1882 if (value) {
1883 Py_INCREF(value);
1884 } else {
1885 /* new name. convert to universal name, and decode as
1886 necessary */
1887
1888 PyObject* tag;
1889 char* p;
1890 int i;
1891
1892 /* look for namespace separator */
1893 for (i = 0; i < size; i++)
1894 if (string[i] == '}')
1895 break;
1896 if (i != size) {
1897 /* convert to universal name */
1898 tag = PyString_FromStringAndSize(NULL, size+1);
1899 p = PyString_AS_STRING(tag);
1900 p[0] = '{';
1901 memcpy(p+1, string, size);
1902 size++;
1903 } else {
1904 /* plain name; use key as tag */
1905 Py_INCREF(key);
1906 tag = key;
1907 }
1908
1909 /* decode universal name */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001910 /* inline makestring, to avoid duplicating the source string if
1911 it's not an utf-8 string */
1912 p = PyString_AS_STRING(tag);
1913 if (checkstring(p, size)) {
1914 value = PyUnicode_DecodeUTF8(p, size, "strict");
1915 Py_DECREF(tag);
1916 if (!value) {
1917 Py_DECREF(key);
1918 return NULL;
1919 }
1920 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001921 value = tag; /* use tag as is */
1922
1923 /* add to names dictionary */
1924 if (PyDict_SetItem(self->names, key, value) < 0) {
1925 Py_DECREF(key);
1926 Py_DECREF(value);
1927 return NULL;
1928 }
1929 }
1930
1931 Py_DECREF(key);
1932 return value;
1933}
1934
1935/* -------------------------------------------------------------------- */
1936/* handlers */
1937
1938static void
1939expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
1940 int data_len)
1941{
1942 PyObject* key;
1943 PyObject* value;
1944 PyObject* res;
1945
1946 if (data_len < 2 || data_in[0] != '&')
1947 return;
1948
1949 key = makestring(data_in + 1, data_len - 2);
1950 if (!key)
1951 return;
1952
1953 value = PyDict_GetItem(self->entity, key);
1954
1955 if (value) {
1956 if (TreeBuilder_CheckExact(self->target))
1957 res = treebuilder_handle_data(
1958 (TreeBuilderObject*) self->target, value
1959 );
1960 else if (self->handle_data)
1961 res = PyObject_CallFunction(self->handle_data, "O", value);
1962 else
1963 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001964 Py_XDECREF(res);
1965 } else {
1966 PyErr_Format(
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001967 PyExc_SyntaxError, "undefined entity &%s;: line %ld, column %ld",
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001968 PyString_AS_STRING(key),
1969 EXPAT(GetErrorLineNumber)(self->parser),
1970 EXPAT(GetErrorColumnNumber)(self->parser)
1971 );
1972 }
1973
1974 Py_DECREF(key);
1975}
1976
1977static void
1978expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
1979 const XML_Char **attrib_in)
1980{
1981 PyObject* res;
1982 PyObject* tag;
1983 PyObject* attrib;
1984 int ok;
1985
1986 /* tag name */
1987 tag = makeuniversal(self, tag_in);
1988 if (!tag)
1989 return; /* parser will look for errors */
1990
1991 /* attributes */
1992 if (attrib_in[0]) {
1993 attrib = PyDict_New();
1994 if (!attrib)
1995 return;
1996 while (attrib_in[0] && attrib_in[1]) {
1997 PyObject* key = makeuniversal(self, attrib_in[0]);
1998 PyObject* value = makestring(attrib_in[1], strlen(attrib_in[1]));
1999 if (!key || !value) {
2000 Py_XDECREF(value);
2001 Py_XDECREF(key);
2002 Py_DECREF(attrib);
2003 return;
2004 }
2005 ok = PyDict_SetItem(attrib, key, value);
2006 Py_DECREF(value);
2007 Py_DECREF(key);
2008 if (ok < 0) {
2009 Py_DECREF(attrib);
2010 return;
2011 }
2012 attrib_in += 2;
2013 }
2014 } else {
2015 Py_INCREF(Py_None);
2016 attrib = Py_None;
2017 }
2018
2019 if (TreeBuilder_CheckExact(self->target))
2020 /* shortcut */
2021 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2022 tag, attrib);
2023 else if (self->handle_start)
2024 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
2025 else
2026 res = NULL;
2027
2028 Py_DECREF(tag);
2029 Py_DECREF(attrib);
2030
2031 Py_XDECREF(res);
2032}
2033
2034static void
2035expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2036 int data_len)
2037{
2038 PyObject* data;
2039 PyObject* res;
2040
2041 data = makestring(data_in, data_len);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002042 if (!data)
2043 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002044
2045 if (TreeBuilder_CheckExact(self->target))
2046 /* shortcut */
2047 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
2048 else if (self->handle_data)
2049 res = PyObject_CallFunction(self->handle_data, "O", data);
2050 else
2051 res = NULL;
2052
2053 Py_DECREF(data);
2054
2055 Py_XDECREF(res);
2056}
2057
2058static void
2059expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
2060{
2061 PyObject* tag;
2062 PyObject* res = NULL;
2063
2064 if (TreeBuilder_CheckExact(self->target))
2065 /* shortcut */
2066 /* the standard tree builder doesn't look at the end tag */
2067 res = treebuilder_handle_end(
2068 (TreeBuilderObject*) self->target, Py_None
2069 );
2070 else if (self->handle_end) {
2071 tag = makeuniversal(self, tag_in);
2072 if (tag) {
2073 res = PyObject_CallFunction(self->handle_end, "O", tag);
2074 Py_DECREF(tag);
2075 }
2076 }
2077
2078 Py_XDECREF(res);
2079}
2080
2081static void
2082expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
2083 const XML_Char *uri)
2084{
2085 treebuilder_handle_namespace(
2086 (TreeBuilderObject*) self->target, 1, prefix, uri
2087 );
2088}
2089
2090static void
2091expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
2092{
2093 treebuilder_handle_namespace(
2094 (TreeBuilderObject*) self->target, 0, NULL, NULL
2095 );
2096}
2097
2098static void
2099expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
2100{
2101 PyObject* comment;
2102 PyObject* res;
2103
2104 if (self->handle_comment) {
2105 comment = makestring(comment_in, strlen(comment_in));
2106 if (comment) {
2107 res = PyObject_CallFunction(self->handle_comment, "O", comment);
2108 Py_XDECREF(res);
2109 Py_DECREF(comment);
2110 }
2111 }
2112}
2113
2114static void
2115expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
2116 const XML_Char* data_in)
2117{
2118 PyObject* target;
2119 PyObject* data;
2120 PyObject* res;
2121
2122 if (self->handle_pi) {
2123 target = makestring(target_in, strlen(target_in));
2124 data = makestring(data_in, strlen(data_in));
2125 if (target && data) {
2126 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
2127 Py_XDECREF(res);
2128 Py_DECREF(data);
2129 Py_DECREF(target);
2130 } else {
2131 Py_XDECREF(data);
2132 Py_XDECREF(target);
2133 }
2134 }
2135}
2136
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002137static int
2138expat_unknown_encoding_handler(XMLParserObject *self, const XML_Char *name,
2139 XML_Encoding *info)
2140{
2141 PyObject* u;
2142 Py_UNICODE* p;
2143 unsigned char s[256];
2144 int i;
2145
2146 memset(info, 0, sizeof(XML_Encoding));
2147
2148 for (i = 0; i < 256; i++)
2149 s[i] = i;
2150
Fredrik Lundhc3389992005-12-25 11:40:19 +00002151 u = PyUnicode_Decode((char*) s, 256, name, "replace");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002152 if (!u)
2153 return XML_STATUS_ERROR;
2154
2155 if (PyUnicode_GET_SIZE(u) != 256) {
2156 Py_DECREF(u);
2157 return XML_STATUS_ERROR;
2158 }
2159
2160 p = PyUnicode_AS_UNICODE(u);
2161
2162 for (i = 0; i < 256; i++) {
2163 if (p[i] != Py_UNICODE_REPLACEMENT_CHARACTER)
2164 info->map[i] = p[i];
2165 else
2166 info->map[i] = -1;
2167 }
2168
2169 Py_DECREF(u);
2170
2171 return XML_STATUS_OK;
2172}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002173
2174/* -------------------------------------------------------------------- */
2175/* constructor and destructor */
2176
2177static PyObject*
Thomas Wouters73e5a5b2006-06-08 15:35:45 +00002178xmlparser(PyObject* self_, PyObject* args, PyObject* kw)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002179{
2180 XMLParserObject* self;
2181 /* FIXME: does this need to be static? */
2182 static XML_Memory_Handling_Suite memory_handler;
2183
2184 PyObject* target = NULL;
2185 char* encoding = NULL;
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +00002186 static char* kwlist[] = { "target", "encoding", NULL };
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002187 if (!PyArg_ParseTupleAndKeywords(args, kw, "|Oz:XMLParser", kwlist,
2188 &target, &encoding))
2189 return NULL;
2190
2191#if defined(USE_PYEXPAT_CAPI)
2192 if (!expat_capi) {
2193 PyErr_SetString(
2194 PyExc_RuntimeError, "cannot load dispatch table from pyexpat"
2195 );
2196 return NULL;
2197 }
2198#endif
2199
2200 self = PyObject_New(XMLParserObject, &XMLParser_Type);
2201 if (self == NULL)
2202 return NULL;
2203
2204 self->entity = PyDict_New();
2205 if (!self->entity) {
2206 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002207 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002208 }
2209
2210 self->names = PyDict_New();
2211 if (!self->names) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002212 PyObject_Del(self->entity);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002213 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002214 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002215 }
2216
2217 memory_handler.malloc_fcn = PyObject_Malloc;
2218 memory_handler.realloc_fcn = PyObject_Realloc;
2219 memory_handler.free_fcn = PyObject_Free;
2220
2221 self->parser = EXPAT(ParserCreate_MM)(encoding, &memory_handler, "}");
2222 if (!self->parser) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002223 PyObject_Del(self->names);
2224 PyObject_Del(self->entity);
2225 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002226 PyErr_NoMemory();
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002227 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002228 }
2229
2230 /* setup target handlers */
2231 if (!target) {
2232 target = treebuilder_new();
2233 if (!target) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002234 EXPAT(ParserFree)(self->parser);
2235 PyObject_Del(self->names);
2236 PyObject_Del(self->entity);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002237 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002238 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002239 }
2240 } else
2241 Py_INCREF(target);
2242 self->target = target;
2243
2244 self->handle_xml = PyObject_GetAttrString(target, "xml");
2245 self->handle_start = PyObject_GetAttrString(target, "start");
2246 self->handle_data = PyObject_GetAttrString(target, "data");
2247 self->handle_end = PyObject_GetAttrString(target, "end");
2248 self->handle_comment = PyObject_GetAttrString(target, "comment");
2249 self->handle_pi = PyObject_GetAttrString(target, "pi");
2250
2251 PyErr_Clear();
2252
2253 /* configure parser */
2254 EXPAT(SetUserData)(self->parser, self);
2255 EXPAT(SetElementHandler)(
2256 self->parser,
2257 (XML_StartElementHandler) expat_start_handler,
2258 (XML_EndElementHandler) expat_end_handler
2259 );
2260 EXPAT(SetDefaultHandlerExpand)(
2261 self->parser,
2262 (XML_DefaultHandler) expat_default_handler
2263 );
2264 EXPAT(SetCharacterDataHandler)(
2265 self->parser,
2266 (XML_CharacterDataHandler) expat_data_handler
2267 );
2268 if (self->handle_comment)
2269 EXPAT(SetCommentHandler)(
2270 self->parser,
2271 (XML_CommentHandler) expat_comment_handler
2272 );
2273 if (self->handle_pi)
2274 EXPAT(SetProcessingInstructionHandler)(
2275 self->parser,
2276 (XML_ProcessingInstructionHandler) expat_pi_handler
2277 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002278 EXPAT(SetUnknownEncodingHandler)(
2279 self->parser,
2280 (XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL
2281 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002282
2283 ALLOC(sizeof(XMLParserObject), "create expatparser");
2284
2285 return (PyObject*) self;
2286}
2287
2288static void
2289xmlparser_dealloc(XMLParserObject* self)
2290{
2291 EXPAT(ParserFree)(self->parser);
2292
2293 Py_XDECREF(self->handle_pi);
2294 Py_XDECREF(self->handle_comment);
2295 Py_XDECREF(self->handle_end);
2296 Py_XDECREF(self->handle_data);
2297 Py_XDECREF(self->handle_start);
2298 Py_XDECREF(self->handle_xml);
2299
2300 Py_DECREF(self->target);
2301 Py_DECREF(self->entity);
2302 Py_DECREF(self->names);
2303
2304 RELEASE(sizeof(XMLParserObject), "destroy expatparser");
2305
2306 PyObject_Del(self);
2307}
2308
2309/* -------------------------------------------------------------------- */
2310/* methods (in alphabetical order) */
2311
2312LOCAL(PyObject*)
2313expat_parse(XMLParserObject* self, char* data, int data_len, int final)
2314{
2315 int ok;
2316
2317 ok = EXPAT(Parse)(self->parser, data, data_len, final);
2318
2319 if (PyErr_Occurred())
2320 return NULL;
2321
2322 if (!ok) {
2323 PyErr_Format(
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002324 PyExc_SyntaxError, "%s: line %ld, column %ld",
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002325 EXPAT(ErrorString)(EXPAT(GetErrorCode)(self->parser)),
2326 EXPAT(GetErrorLineNumber)(self->parser),
2327 EXPAT(GetErrorColumnNumber)(self->parser)
2328 );
2329 return NULL;
2330 }
2331
2332 Py_RETURN_NONE;
2333}
2334
2335static PyObject*
2336xmlparser_close(XMLParserObject* self, PyObject* args)
2337{
2338 /* end feeding data to parser */
2339
2340 PyObject* res;
2341 if (!PyArg_ParseTuple(args, ":close"))
2342 return NULL;
2343
2344 res = expat_parse(self, "", 0, 1);
2345
2346 if (res && TreeBuilder_CheckExact(self->target)) {
2347 Py_DECREF(res);
2348 return treebuilder_done((TreeBuilderObject*) self->target);
2349 }
2350
2351 return res;
2352}
2353
2354static PyObject*
2355xmlparser_feed(XMLParserObject* self, PyObject* args)
2356{
2357 /* feed data to parser */
2358
2359 char* data;
2360 int data_len;
2361 if (!PyArg_ParseTuple(args, "s#:feed", &data, &data_len))
2362 return NULL;
2363
2364 return expat_parse(self, data, data_len, 0);
2365}
2366
2367static PyObject*
2368xmlparser_parse(XMLParserObject* self, PyObject* args)
2369{
2370 /* (internal) parse until end of input stream */
2371
2372 PyObject* reader;
2373 PyObject* buffer;
2374 PyObject* res;
2375
2376 PyObject* fileobj;
2377 if (!PyArg_ParseTuple(args, "O:_parse", &fileobj))
2378 return NULL;
2379
2380 reader = PyObject_GetAttrString(fileobj, "read");
2381 if (!reader)
2382 return NULL;
2383
2384 /* read from open file object */
2385 for (;;) {
2386
2387 buffer = PyObject_CallFunction(reader, "i", 64*1024);
2388
2389 if (!buffer) {
2390 /* read failed (e.g. due to KeyboardInterrupt) */
2391 Py_DECREF(reader);
2392 return NULL;
2393 }
2394
2395 if (!PyString_CheckExact(buffer) || PyString_GET_SIZE(buffer) == 0) {
2396 Py_DECREF(buffer);
2397 break;
2398 }
2399
2400 res = expat_parse(
2401 self, PyString_AS_STRING(buffer), PyString_GET_SIZE(buffer), 0
2402 );
2403
2404 Py_DECREF(buffer);
2405
2406 if (!res) {
2407 Py_DECREF(reader);
2408 return NULL;
2409 }
2410 Py_DECREF(res);
2411
2412 }
2413
2414 Py_DECREF(reader);
2415
2416 res = expat_parse(self, "", 0, 1);
2417
2418 if (res && TreeBuilder_CheckExact(self->target)) {
2419 Py_DECREF(res);
2420 return treebuilder_done((TreeBuilderObject*) self->target);
2421 }
2422
2423 return res;
2424}
2425
2426static PyObject*
2427xmlparser_setevents(XMLParserObject* self, PyObject* args)
2428{
2429 /* activate element event reporting */
2430
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002431 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002432 TreeBuilderObject* target;
2433
2434 PyObject* events; /* event collector */
2435 PyObject* event_set = Py_None;
2436 if (!PyArg_ParseTuple(args, "O!|O:_setevents", &PyList_Type, &events,
2437 &event_set))
2438 return NULL;
2439
2440 if (!TreeBuilder_CheckExact(self->target)) {
2441 PyErr_SetString(
2442 PyExc_TypeError,
2443 "event handling only supported for cElementTree.Treebuilder "
2444 "targets"
2445 );
2446 return NULL;
2447 }
2448
2449 target = (TreeBuilderObject*) self->target;
2450
2451 Py_INCREF(events);
2452 Py_XDECREF(target->events);
2453 target->events = events;
2454
2455 /* clear out existing events */
2456 Py_XDECREF(target->start_event_obj); target->start_event_obj = NULL;
2457 Py_XDECREF(target->end_event_obj); target->end_event_obj = NULL;
2458 Py_XDECREF(target->start_ns_event_obj); target->start_ns_event_obj = NULL;
2459 Py_XDECREF(target->end_ns_event_obj); target->end_ns_event_obj = NULL;
2460
2461 if (event_set == Py_None) {
2462 /* default is "end" only */
2463 target->end_event_obj = PyString_FromString("end");
2464 Py_RETURN_NONE;
2465 }
2466
2467 if (!PyTuple_Check(event_set)) /* FIXME: handle arbitrary sequences */
2468 goto error;
2469
2470 for (i = 0; i < PyTuple_GET_SIZE(event_set); i++) {
2471 PyObject* item = PyTuple_GET_ITEM(event_set, i);
2472 char* event;
2473 if (!PyString_Check(item))
2474 goto error;
2475 event = PyString_AS_STRING(item);
2476 if (strcmp(event, "start") == 0) {
2477 Py_INCREF(item);
2478 target->start_event_obj = item;
2479 } else if (strcmp(event, "end") == 0) {
2480 Py_INCREF(item);
2481 Py_XDECREF(target->end_event_obj);
2482 target->end_event_obj = item;
2483 } else if (strcmp(event, "start-ns") == 0) {
2484 Py_INCREF(item);
2485 Py_XDECREF(target->start_ns_event_obj);
2486 target->start_ns_event_obj = item;
2487 EXPAT(SetNamespaceDeclHandler)(
2488 self->parser,
2489 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2490 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2491 );
2492 } else if (strcmp(event, "end-ns") == 0) {
2493 Py_INCREF(item);
2494 Py_XDECREF(target->end_ns_event_obj);
2495 target->end_ns_event_obj = item;
2496 EXPAT(SetNamespaceDeclHandler)(
2497 self->parser,
2498 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2499 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2500 );
2501 } else {
2502 PyErr_Format(
2503 PyExc_ValueError,
2504 "unknown event '%s'", event
2505 );
2506 return NULL;
2507 }
2508 }
2509
2510 Py_RETURN_NONE;
2511
2512 error:
2513 PyErr_SetString(
2514 PyExc_TypeError,
2515 "invalid event tuple"
2516 );
2517 return NULL;
2518}
2519
2520static PyMethodDef xmlparser_methods[] = {
2521 {"feed", (PyCFunction) xmlparser_feed, METH_VARARGS},
2522 {"close", (PyCFunction) xmlparser_close, METH_VARARGS},
2523 {"_parse", (PyCFunction) xmlparser_parse, METH_VARARGS},
2524 {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
2525 {NULL, NULL}
2526};
2527
2528static PyObject*
2529xmlparser_getattr(XMLParserObject* self, char* name)
2530{
2531 PyObject* res;
2532
2533 res = Py_FindMethod(xmlparser_methods, (PyObject*) self, name);
2534 if (res)
2535 return res;
2536
2537 PyErr_Clear();
2538
2539 if (strcmp(name, "entity") == 0)
2540 res = self->entity;
2541 else if (strcmp(name, "target") == 0)
2542 res = self->target;
2543 else if (strcmp(name, "version") == 0) {
2544 char buffer[100];
2545 sprintf(buffer, "Expat %d.%d.%d", XML_MAJOR_VERSION,
2546 XML_MINOR_VERSION, XML_MICRO_VERSION);
2547 return PyString_FromString(buffer);
2548 } else {
2549 PyErr_SetString(PyExc_AttributeError, name);
2550 return NULL;
2551 }
2552
2553 Py_INCREF(res);
2554 return res;
2555}
2556
Neal Norwitz227b5332006-03-22 09:28:35 +00002557static PyTypeObject XMLParser_Type = {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002558 PyObject_HEAD_INIT(NULL)
2559 0, "XMLParser", sizeof(XMLParserObject), 0,
2560 /* methods */
2561 (destructor)xmlparser_dealloc, /* tp_dealloc */
2562 0, /* tp_print */
2563 (getattrfunc)xmlparser_getattr, /* tp_getattr */
2564};
2565
2566#endif
2567
2568/* ==================================================================== */
2569/* python module interface */
2570
2571static PyMethodDef _functions[] = {
2572 {"Element", (PyCFunction) element, METH_VARARGS|METH_KEYWORDS},
2573 {"SubElement", (PyCFunction) subelement, METH_VARARGS|METH_KEYWORDS},
2574 {"TreeBuilder", (PyCFunction) treebuilder, METH_VARARGS},
2575#if defined(USE_EXPAT)
2576 {"XMLParser", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
2577 {"XMLTreeBuilder", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
2578#endif
2579 {NULL, NULL}
2580};
2581
Neal Norwitzf6657e62006-12-28 04:47:50 +00002582PyMODINIT_FUNC
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002583init_elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002584{
2585 PyObject* m;
2586 PyObject* g;
2587 char* bootstrap;
2588#if defined(USE_PYEXPAT_CAPI)
2589 struct PyExpat_CAPI* capi;
2590#endif
2591
2592 /* Patch object type */
2593 Element_Type.ob_type = TreeBuilder_Type.ob_type = &PyType_Type;
2594#if defined(USE_EXPAT)
2595 XMLParser_Type.ob_type = &PyType_Type;
2596#endif
2597
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002598 m = Py_InitModule("_elementtree", _functions);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002599 if (!m)
2600 return;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002601
2602 /* python glue code */
2603
2604 g = PyDict_New();
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002605 if (!g)
2606 return;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002607
2608 PyDict_SetItemString(g, "__builtins__", PyEval_GetBuiltins());
2609
2610 bootstrap = (
2611
2612#if (PY_VERSION_HEX >= 0x02020000 && PY_VERSION_HEX < 0x02030000)
2613 "from __future__ import generators\n" /* enable yield under 2.2 */
2614#endif
2615
2616 "from copy import copy, deepcopy\n"
2617
2618 "try:\n"
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002619 " from xml.etree import ElementTree\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002620 "except ImportError:\n"
2621 " import ElementTree\n"
2622 "ET = ElementTree\n"
2623 "del ElementTree\n"
2624
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002625 "import _elementtree as cElementTree\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002626
2627 "try:\n" /* check if copy works as is */
2628 " copy(cElementTree.Element('x'))\n"
2629 "except:\n"
2630 " def copyelement(elem):\n"
2631 " return elem\n"
2632
2633 "def Comment(text=None):\n" /* public */
2634 " element = cElementTree.Element(ET.Comment)\n"
2635 " element.text = text\n"
2636 " return element\n"
2637 "cElementTree.Comment = Comment\n"
2638
2639 "class ElementTree(ET.ElementTree):\n" /* public */
2640 " def parse(self, source, parser=None):\n"
2641 " if not hasattr(source, 'read'):\n"
2642 " source = open(source, 'rb')\n"
2643 " if parser is not None:\n"
2644 " while 1:\n"
2645 " data = source.read(65536)\n"
2646 " if not data:\n"
2647 " break\n"
2648 " parser.feed(data)\n"
2649 " self._root = parser.close()\n"
2650 " else:\n"
2651 " parser = cElementTree.XMLParser()\n"
2652 " self._root = parser._parse(source)\n"
2653 " return self._root\n"
2654 "cElementTree.ElementTree = ElementTree\n"
2655
2656 "def getiterator(node, tag=None):\n" /* helper */
2657 " if tag == '*':\n"
2658 " tag = None\n"
2659#if (PY_VERSION_HEX < 0x02020000)
2660 " nodes = []\n" /* 2.1 doesn't have yield */
2661 " if tag is None or node.tag == tag:\n"
2662 " nodes.append(node)\n"
2663 " for node in node:\n"
2664 " nodes.extend(getiterator(node, tag))\n"
2665 " return nodes\n"
2666#else
2667 " if tag is None or node.tag == tag:\n"
2668 " yield node\n"
2669 " for node in node:\n"
2670 " for node in getiterator(node, tag):\n"
2671 " yield node\n"
2672#endif
2673
2674 "def parse(source, parser=None):\n" /* public */
2675 " tree = ElementTree()\n"
2676 " tree.parse(source, parser)\n"
2677 " return tree\n"
2678 "cElementTree.parse = parse\n"
2679
2680#if (PY_VERSION_HEX < 0x02020000)
2681 "if hasattr(ET, 'iterparse'):\n"
2682 " cElementTree.iterparse = ET.iterparse\n" /* delegate on 2.1 */
2683#else
2684 "class iterparse(object):\n"
2685 " root = None\n"
2686 " def __init__(self, file, events=None):\n"
2687 " if not hasattr(file, 'read'):\n"
2688 " file = open(file, 'rb')\n"
2689 " self._file = file\n"
2690 " self._events = events\n"
2691 " def __iter__(self):\n"
2692 " events = []\n"
2693 " b = cElementTree.TreeBuilder()\n"
2694 " p = cElementTree.XMLParser(b)\n"
2695 " p._setevents(events, self._events)\n"
2696 " while 1:\n"
2697 " data = self._file.read(16384)\n"
2698 " if not data:\n"
2699 " break\n"
2700 " p.feed(data)\n"
2701 " for event in events:\n"
2702 " yield event\n"
2703 " del events[:]\n"
2704 " root = p.close()\n"
2705 " for event in events:\n"
2706 " yield event\n"
2707 " self.root = root\n"
2708 "cElementTree.iterparse = iterparse\n"
2709#endif
2710
2711 "def PI(target, text=None):\n" /* public */
2712 " element = cElementTree.Element(ET.ProcessingInstruction)\n"
2713 " element.text = target\n"
2714 " if text:\n"
2715 " element.text = element.text + ' ' + text\n"
2716 " return element\n"
2717
2718 " elem = cElementTree.Element(ET.PI)\n"
2719 " elem.text = text\n"
2720 " return elem\n"
2721 "cElementTree.PI = cElementTree.ProcessingInstruction = PI\n"
2722
2723 "def XML(text):\n" /* public */
2724 " parser = cElementTree.XMLParser()\n"
2725 " parser.feed(text)\n"
2726 " return parser.close()\n"
2727 "cElementTree.XML = cElementTree.fromstring = XML\n"
2728
2729 "def XMLID(text):\n" /* public */
2730 " tree = XML(text)\n"
2731 " ids = {}\n"
2732 " for elem in tree.getiterator():\n"
2733 " id = elem.get('id')\n"
2734 " if id:\n"
2735 " ids[id] = elem\n"
2736 " return tree, ids\n"
2737 "cElementTree.XMLID = XMLID\n"
2738
2739 "cElementTree.dump = ET.dump\n"
2740 "cElementTree.ElementPath = ElementPath = ET.ElementPath\n"
2741 "cElementTree.iselement = ET.iselement\n"
2742 "cElementTree.QName = ET.QName\n"
2743 "cElementTree.tostring = ET.tostring\n"
2744 "cElementTree.VERSION = '" VERSION "'\n"
2745 "cElementTree.__version__ = '" VERSION "'\n"
2746 "cElementTree.XMLParserError = SyntaxError\n"
2747
2748 );
2749
2750 PyRun_String(bootstrap, Py_file_input, g, NULL);
2751
2752 elementpath_obj = PyDict_GetItemString(g, "ElementPath");
2753
2754 elementtree_copyelement_obj = PyDict_GetItemString(g, "copyelement");
2755 if (elementtree_copyelement_obj) {
2756 /* reduce hack needed; enable reduce method */
2757 PyMethodDef* mp;
2758 for (mp = element_methods; mp->ml_name; mp++)
2759 if (mp->ml_meth == (PyCFunction) element_reduce) {
2760 mp->ml_name = "__reduce__";
2761 break;
2762 }
2763 } else
2764 PyErr_Clear();
2765 elementtree_deepcopy_obj = PyDict_GetItemString(g, "deepcopy");
2766 elementtree_getiterator_obj = PyDict_GetItemString(g, "getiterator");
2767
2768#if defined(USE_PYEXPAT_CAPI)
2769 /* link against pyexpat, if possible */
2770 capi = PyCObject_Import("pyexpat", "expat_CAPI");
2771 if (capi &&
2772 strcmp(capi->magic, PyExpat_CAPI_MAGIC) == 0 &&
2773 capi->size <= sizeof(*expat_capi) &&
2774 capi->MAJOR_VERSION == XML_MAJOR_VERSION &&
2775 capi->MINOR_VERSION == XML_MINOR_VERSION &&
2776 capi->MICRO_VERSION == XML_MICRO_VERSION)
2777 expat_capi = capi;
2778 else
2779 expat_capi = NULL;
2780#endif
2781
2782}