blob: 2e10d5549dc3f54c3a2f8d20eb3cc931898bd52c [file] [log] [blame]
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001/*
2 * ElementTree
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003 * $Id: _elementtree.c 2657 2006-03-12 20:50:32Z fredrik $
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
5 * elementtree accelerator
6 *
7 * History:
8 * 1999-06-20 fl created (as part of sgmlop)
9 * 2001-05-29 fl effdom edition
10 * 2001-06-05 fl backported to unix; fixed bogus free in clear
11 * 2001-07-10 fl added findall helper
12 * 2003-02-27 fl elementtree edition (alpha)
13 * 2004-06-03 fl updates for elementtree 1.2
14 * 2005-01-05 fl added universal name cache, Element/SubElement factories
15 * 2005-01-06 fl moved python helpers into C module; removed 1.5.2 support
16 * 2005-01-07 fl added 2.1 support; work around broken __copy__ in 2.3
17 * 2005-01-08 fl added makeelement method; fixed path support
18 * 2005-01-10 fl optimized memory usage
19 * 2005-01-11 fl first public release (cElementTree 0.8)
20 * 2005-01-12 fl split element object into base and extras
21 * 2005-01-13 fl use tagged pointers for tail/text (cElementTree 0.9)
22 * 2005-01-17 fl added treebuilder close method
23 * 2005-01-17 fl fixed crash in getchildren
24 * 2005-01-18 fl removed observer api, added iterparse (cElementTree 0.9.3)
25 * 2005-01-23 fl revised iterparse api; added namespace event support (0.9.8)
26 * 2005-01-26 fl added VERSION module property (cElementTree 1.0)
27 * 2005-01-28 fl added remove method (1.0.1)
28 * 2005-03-01 fl added iselement function; fixed makeelement aliasing (1.0.2)
29 * 2005-03-13 fl export Comment and ProcessingInstruction/PI helpers
30 * 2005-03-26 fl added Comment and PI support to XMLParser
31 * 2005-03-27 fl event optimizations; complain about bogus events
32 * 2005-08-08 fl fixed read error handling in parse
33 * 2005-08-11 fl added runtime test for copy workaround (1.0.3)
34 * 2005-12-13 fl added expat_capi support (for xml.etree) (1.0.4)
35 * 2005-12-16 fl added support for non-standard encodings
Fredrik Lundh44ed4db2006-03-12 21:06:35 +000036 * 2006-03-08 fl fixed a couple of potential null-refs and leaks
37 * 2006-03-12 fl merge in 2.5 ssize_t changes
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000038 *
Fredrik Lundh44ed4db2006-03-12 21:06:35 +000039 * Copyright (c) 1999-2006 by Secret Labs AB. All rights reserved.
40 * Copyright (c) 1999-2006 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000041 *
42 * info@pythonware.com
43 * http://www.pythonware.com
44 */
45
Fredrik Lundh6d52b552005-12-16 22:06:43 +000046/* Licensed to PSF under a Contributor Agreement. */
47/* See http://www.python.org/2.4/license for licensing details. */
48
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000049#include "Python.h"
50
Thomas Wouters00ee7ba2006-08-21 19:07:27 +000051#define VERSION "1.0.6"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000052
53/* -------------------------------------------------------------------- */
54/* configuration */
55
56/* Leave defined to include the expat-based XMLParser type */
57#define USE_EXPAT
58
59/* Define to to all expat calls via pyexpat's embedded expat library */
60/* #define USE_PYEXPAT_CAPI */
61
62/* An element can hold this many children without extra memory
63 allocations. */
64#define STATIC_CHILDREN 4
65
66/* For best performance, chose a value so that 80-90% of all nodes
67 have no more than the given number of children. Set this to zero
68 to minimize the size of the element structure itself (this only
69 helps if you have lots of leaf nodes with attributes). */
70
71/* Also note that pymalloc always allocates blocks in multiples of
72 eight bytes. For the current version of cElementTree, this means
73 that the number of children should be an even number, at least on
74 32-bit platforms. */
75
76/* -------------------------------------------------------------------- */
77
78#if 0
79static int memory = 0;
80#define ALLOC(size, comment)\
81do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
82#define RELEASE(size, comment)\
83do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
84#else
85#define ALLOC(size, comment)
86#define RELEASE(size, comment)
87#endif
88
89/* compiler tweaks */
90#if defined(_MSC_VER)
91#define LOCAL(type) static __inline type __fastcall
92#else
93#define LOCAL(type) static type
94#endif
95
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000096/* macros used to store 'join' flags in string object pointers. note
97 that all use of text and tail as object pointers must be wrapped in
98 JOIN_OBJ. see comments in the ElementObject definition for more
99 info. */
100#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
101#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
102#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~1))
103
104/* glue functions (see the init function for details) */
105static PyObject* elementtree_copyelement_obj;
106static PyObject* elementtree_deepcopy_obj;
107static PyObject* elementtree_getiterator_obj;
108static PyObject* elementpath_obj;
109
110/* helpers */
111
112LOCAL(PyObject*)
113deepcopy(PyObject* object, PyObject* memo)
114{
115 /* do a deep copy of the given object */
116
117 PyObject* args;
118 PyObject* result;
119
120 if (!elementtree_deepcopy_obj) {
121 PyErr_SetString(
122 PyExc_RuntimeError,
123 "deepcopy helper not found"
124 );
125 return NULL;
126 }
127
128 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000129 if (!args)
130 return NULL;
131
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000132 Py_INCREF(object); PyTuple_SET_ITEM(args, 0, (PyObject*) object);
133 Py_INCREF(memo); PyTuple_SET_ITEM(args, 1, (PyObject*) memo);
134
135 result = PyObject_CallObject(elementtree_deepcopy_obj, args);
136
137 Py_DECREF(args);
138
139 return result;
140}
141
142LOCAL(PyObject*)
143list_join(PyObject* list)
144{
145 /* join list elements (destroying the list in the process) */
146
147 PyObject* joiner;
148 PyObject* function;
149 PyObject* args;
150 PyObject* result;
151
152 switch (PyList_GET_SIZE(list)) {
153 case 0:
154 Py_DECREF(list);
155 return PyString_FromString("");
156 case 1:
157 result = PyList_GET_ITEM(list, 0);
158 Py_INCREF(result);
159 Py_DECREF(list);
160 return result;
161 }
162
163 /* two or more elements: slice out a suitable separator from the
164 first member, and use that to join the entire list */
165
166 joiner = PySequence_GetSlice(PyList_GET_ITEM(list, 0), 0, 0);
167 if (!joiner)
168 return NULL;
169
170 function = PyObject_GetAttrString(joiner, "join");
171 if (!function) {
172 Py_DECREF(joiner);
173 return NULL;
174 }
175
176 args = PyTuple_New(1);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000177 if (!args)
178 return NULL;
179
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000180 PyTuple_SET_ITEM(args, 0, list);
181
182 result = PyObject_CallObject(function, args);
183
184 Py_DECREF(args); /* also removes list */
185 Py_DECREF(function);
186 Py_DECREF(joiner);
187
188 return result;
189}
190
191#if (PY_VERSION_HEX < 0x02020000)
192LOCAL(int)
193PyDict_Update(PyObject* dict, PyObject* other)
194{
195 /* PyDict_Update emulation for 2.1 and earlier */
196
197 PyObject* res;
198
199 res = PyObject_CallMethod(dict, "update", "O", other);
200 if (!res)
201 return -1;
202
203 Py_DECREF(res);
204 return 0;
205}
206#endif
207
208/* -------------------------------------------------------------------- */
209/* the element type */
210
211typedef struct {
212
213 /* attributes (a dictionary object), or None if no attributes */
214 PyObject* attrib;
215
216 /* child elements */
217 int length; /* actual number of items */
218 int allocated; /* allocated items */
219
220 /* this either points to _children or to a malloced buffer */
221 PyObject* *children;
222
223 PyObject* _children[STATIC_CHILDREN];
224
225} ElementObjectExtra;
226
227typedef struct {
228 PyObject_HEAD
229
230 /* element tag (a string). */
231 PyObject* tag;
232
233 /* text before first child. note that this is a tagged pointer;
234 use JOIN_OBJ to get the object pointer. the join flag is used
235 to distinguish lists created by the tree builder from lists
236 assigned to the attribute by application code; the former
237 should be joined before being returned to the user, the latter
238 should be left intact. */
239 PyObject* text;
240
241 /* text after this element, in parent. note that this is a tagged
242 pointer; use JOIN_OBJ to get the object pointer. */
243 PyObject* tail;
244
245 ElementObjectExtra* extra;
246
247} ElementObject;
248
Neal Norwitz227b5332006-03-22 09:28:35 +0000249static PyTypeObject Element_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000250
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000251#define Element_CheckExact(op) (Py_Type(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000252
253/* -------------------------------------------------------------------- */
254/* element constructor and destructor */
255
256LOCAL(int)
257element_new_extra(ElementObject* self, PyObject* attrib)
258{
259 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
260 if (!self->extra)
261 return -1;
262
263 if (!attrib)
264 attrib = Py_None;
265
266 Py_INCREF(attrib);
267 self->extra->attrib = attrib;
268
269 self->extra->length = 0;
270 self->extra->allocated = STATIC_CHILDREN;
271 self->extra->children = self->extra->_children;
272
273 return 0;
274}
275
276LOCAL(void)
277element_dealloc_extra(ElementObject* self)
278{
279 int i;
280
281 Py_DECREF(self->extra->attrib);
282
283 for (i = 0; i < self->extra->length; i++)
284 Py_DECREF(self->extra->children[i]);
285
286 if (self->extra->children != self->extra->_children)
287 PyObject_Free(self->extra->children);
288
289 PyObject_Free(self->extra);
290}
291
292LOCAL(PyObject*)
293element_new(PyObject* tag, PyObject* attrib)
294{
295 ElementObject* self;
296
297 self = PyObject_New(ElementObject, &Element_Type);
298 if (self == NULL)
299 return NULL;
300
301 /* use None for empty dictionaries */
302 if (PyDict_CheckExact(attrib) && !PyDict_Size(attrib))
303 attrib = Py_None;
304
305 self->extra = NULL;
306
307 if (attrib != Py_None) {
308
Thomas Wouters477c8d52006-05-27 19:21:47 +0000309 if (element_new_extra(self, attrib) < 0) {
310 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000311 return NULL;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000312 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000313
314 self->extra->length = 0;
315 self->extra->allocated = STATIC_CHILDREN;
316 self->extra->children = self->extra->_children;
317
318 }
319
320 Py_INCREF(tag);
321 self->tag = tag;
322
323 Py_INCREF(Py_None);
324 self->text = Py_None;
325
326 Py_INCREF(Py_None);
327 self->tail = Py_None;
328
329 ALLOC(sizeof(ElementObject), "create element");
330
331 return (PyObject*) self;
332}
333
334LOCAL(int)
335element_resize(ElementObject* self, int extra)
336{
337 int size;
338 PyObject* *children;
339
340 /* make sure self->children can hold the given number of extra
341 elements. set an exception and return -1 if allocation failed */
342
343 if (!self->extra)
344 element_new_extra(self, NULL);
345
346 size = self->extra->length + extra;
347
348 if (size > self->extra->allocated) {
349 /* use Python 2.4's list growth strategy */
350 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
351 if (self->extra->children != self->extra->_children) {
352 children = PyObject_Realloc(self->extra->children,
353 size * sizeof(PyObject*));
354 if (!children)
355 goto nomemory;
356 } else {
357 children = PyObject_Malloc(size * sizeof(PyObject*));
358 if (!children)
359 goto nomemory;
360 /* copy existing children from static area to malloc buffer */
361 memcpy(children, self->extra->children,
362 self->extra->length * sizeof(PyObject*));
363 }
364 self->extra->children = children;
365 self->extra->allocated = size;
366 }
367
368 return 0;
369
370 nomemory:
371 PyErr_NoMemory();
372 return -1;
373}
374
375LOCAL(int)
376element_add_subelement(ElementObject* self, PyObject* element)
377{
378 /* add a child element to a parent */
379
380 if (element_resize(self, 1) < 0)
381 return -1;
382
383 Py_INCREF(element);
384 self->extra->children[self->extra->length] = element;
385
386 self->extra->length++;
387
388 return 0;
389}
390
391LOCAL(PyObject*)
392element_get_attrib(ElementObject* self)
393{
394 /* return borrowed reference to attrib dictionary */
395 /* note: this function assumes that the extra section exists */
396
397 PyObject* res = self->extra->attrib;
398
399 if (res == Py_None) {
400 /* create missing dictionary */
401 res = PyDict_New();
402 if (!res)
403 return NULL;
404 self->extra->attrib = res;
405 }
406
407 return res;
408}
409
410LOCAL(PyObject*)
411element_get_text(ElementObject* self)
412{
413 /* return borrowed reference to text attribute */
414
415 PyObject* res = self->text;
416
417 if (JOIN_GET(res)) {
418 res = JOIN_OBJ(res);
419 if (PyList_CheckExact(res)) {
420 res = list_join(res);
421 if (!res)
422 return NULL;
423 self->text = res;
424 }
425 }
426
427 return res;
428}
429
430LOCAL(PyObject*)
431element_get_tail(ElementObject* self)
432{
433 /* return borrowed reference to text attribute */
434
435 PyObject* res = self->tail;
436
437 if (JOIN_GET(res)) {
438 res = JOIN_OBJ(res);
439 if (PyList_CheckExact(res)) {
440 res = list_join(res);
441 if (!res)
442 return NULL;
443 self->tail = res;
444 }
445 }
446
447 return res;
448}
449
450static PyObject*
451element(PyObject* self, PyObject* args, PyObject* kw)
452{
453 PyObject* elem;
454
455 PyObject* tag;
456 PyObject* attrib = NULL;
457 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag,
458 &PyDict_Type, &attrib))
459 return NULL;
460
461 if (attrib || kw) {
462 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
463 if (!attrib)
464 return NULL;
465 if (kw)
466 PyDict_Update(attrib, kw);
467 } else {
468 Py_INCREF(Py_None);
469 attrib = Py_None;
470 }
471
472 elem = element_new(tag, attrib);
473
474 Py_DECREF(attrib);
475
476 return elem;
477}
478
479static PyObject*
480subelement(PyObject* self, PyObject* args, PyObject* kw)
481{
482 PyObject* elem;
483
484 ElementObject* parent;
485 PyObject* tag;
486 PyObject* attrib = NULL;
487 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
488 &Element_Type, &parent, &tag,
489 &PyDict_Type, &attrib))
490 return NULL;
491
492 if (attrib || kw) {
493 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
494 if (!attrib)
495 return NULL;
496 if (kw)
497 PyDict_Update(attrib, kw);
498 } else {
499 Py_INCREF(Py_None);
500 attrib = Py_None;
501 }
502
503 elem = element_new(tag, attrib);
504
505 Py_DECREF(attrib);
506
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000507 if (element_add_subelement(parent, elem) < 0) {
508 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000509 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000510 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000511
512 return elem;
513}
514
515static void
516element_dealloc(ElementObject* self)
517{
518 if (self->extra)
519 element_dealloc_extra(self);
520
521 /* discard attributes */
522 Py_DECREF(self->tag);
523 Py_DECREF(JOIN_OBJ(self->text));
524 Py_DECREF(JOIN_OBJ(self->tail));
525
526 RELEASE(sizeof(ElementObject), "destroy element");
527
528 PyObject_Del(self);
529}
530
531/* -------------------------------------------------------------------- */
532/* methods (in alphabetical order) */
533
534static PyObject*
535element_append(ElementObject* self, PyObject* args)
536{
537 PyObject* element;
538 if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element))
539 return NULL;
540
541 if (element_add_subelement(self, element) < 0)
542 return NULL;
543
544 Py_RETURN_NONE;
545}
546
547static PyObject*
548element_clear(ElementObject* self, PyObject* args)
549{
550 if (!PyArg_ParseTuple(args, ":clear"))
551 return NULL;
552
553 if (self->extra) {
554 element_dealloc_extra(self);
555 self->extra = NULL;
556 }
557
558 Py_INCREF(Py_None);
559 Py_DECREF(JOIN_OBJ(self->text));
560 self->text = Py_None;
561
562 Py_INCREF(Py_None);
563 Py_DECREF(JOIN_OBJ(self->tail));
564 self->tail = Py_None;
565
566 Py_RETURN_NONE;
567}
568
569static PyObject*
570element_copy(ElementObject* self, PyObject* args)
571{
572 int i;
573 ElementObject* element;
574
575 if (!PyArg_ParseTuple(args, ":__copy__"))
576 return NULL;
577
578 element = (ElementObject*) element_new(
579 self->tag, (self->extra) ? self->extra->attrib : Py_None
580 );
581 if (!element)
582 return NULL;
583
584 Py_DECREF(JOIN_OBJ(element->text));
585 element->text = self->text;
586 Py_INCREF(JOIN_OBJ(element->text));
587
588 Py_DECREF(JOIN_OBJ(element->tail));
589 element->tail = self->tail;
590 Py_INCREF(JOIN_OBJ(element->tail));
591
592 if (self->extra) {
593
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000594 if (element_resize(element, self->extra->length) < 0) {
595 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000596 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000597 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000598
599 for (i = 0; i < self->extra->length; i++) {
600 Py_INCREF(self->extra->children[i]);
601 element->extra->children[i] = self->extra->children[i];
602 }
603
604 element->extra->length = self->extra->length;
605
606 }
607
608 return (PyObject*) element;
609}
610
611static PyObject*
612element_deepcopy(ElementObject* self, PyObject* args)
613{
614 int i;
615 ElementObject* element;
616 PyObject* tag;
617 PyObject* attrib;
618 PyObject* text;
619 PyObject* tail;
620 PyObject* id;
621
622 PyObject* memo;
623 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
624 return NULL;
625
626 tag = deepcopy(self->tag, memo);
627 if (!tag)
628 return NULL;
629
630 if (self->extra) {
631 attrib = deepcopy(self->extra->attrib, memo);
632 if (!attrib) {
633 Py_DECREF(tag);
634 return NULL;
635 }
636 } else {
637 Py_INCREF(Py_None);
638 attrib = Py_None;
639 }
640
641 element = (ElementObject*) element_new(tag, attrib);
642
643 Py_DECREF(tag);
644 Py_DECREF(attrib);
645
646 if (!element)
647 return NULL;
648
649 text = deepcopy(JOIN_OBJ(self->text), memo);
650 if (!text)
651 goto error;
652 Py_DECREF(element->text);
653 element->text = JOIN_SET(text, JOIN_GET(self->text));
654
655 tail = deepcopy(JOIN_OBJ(self->tail), memo);
656 if (!tail)
657 goto error;
658 Py_DECREF(element->tail);
659 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
660
661 if (self->extra) {
662
663 if (element_resize(element, self->extra->length) < 0)
664 goto error;
665
666 for (i = 0; i < self->extra->length; i++) {
667 PyObject* child = deepcopy(self->extra->children[i], memo);
668 if (!child) {
669 element->extra->length = i;
670 goto error;
671 }
672 element->extra->children[i] = child;
673 }
674
675 element->extra->length = self->extra->length;
676
677 }
678
679 /* add object to memo dictionary (so deepcopy won't visit it again) */
680 id = PyInt_FromLong((Py_uintptr_t) self);
681
682 i = PyDict_SetItem(memo, id, (PyObject*) element);
683
684 Py_DECREF(id);
685
686 if (i < 0)
687 goto error;
688
689 return (PyObject*) element;
690
691 error:
692 Py_DECREF(element);
693 return NULL;
694}
695
696LOCAL(int)
697checkpath(PyObject* tag)
698{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000699 Py_ssize_t i;
700 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000701
702 /* check if a tag contains an xpath character */
703
704#define PATHCHAR(ch) (ch == '/' || ch == '*' || ch == '[' || ch == '@')
705
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000706 if (PyUnicode_Check(tag)) {
707 Py_UNICODE *p = PyUnicode_AS_UNICODE(tag);
708 for (i = 0; i < PyUnicode_GET_SIZE(tag); i++) {
709 if (p[i] == '{')
710 check = 0;
711 else if (p[i] == '}')
712 check = 1;
713 else if (check && PATHCHAR(p[i]))
714 return 1;
715 }
716 return 0;
717 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000718 if (PyString_Check(tag)) {
719 char *p = PyString_AS_STRING(tag);
720 for (i = 0; i < PyString_GET_SIZE(tag); i++) {
721 if (p[i] == '{')
722 check = 0;
723 else if (p[i] == '}')
724 check = 1;
725 else if (check && PATHCHAR(p[i]))
726 return 1;
727 }
728 return 0;
729 }
730
731 return 1; /* unknown type; might be path expression */
732}
733
734static PyObject*
735element_find(ElementObject* self, PyObject* args)
736{
737 int i;
738
739 PyObject* tag;
740 if (!PyArg_ParseTuple(args, "O:find", &tag))
741 return NULL;
742
743 if (checkpath(tag))
744 return PyObject_CallMethod(
745 elementpath_obj, "find", "OO", self, tag
746 );
747
748 if (!self->extra)
749 Py_RETURN_NONE;
750
751 for (i = 0; i < self->extra->length; i++) {
752 PyObject* item = self->extra->children[i];
753 if (Element_CheckExact(item) &&
754 PyObject_Compare(((ElementObject*)item)->tag, tag) == 0) {
755 Py_INCREF(item);
756 return item;
757 }
758 }
759
760 Py_RETURN_NONE;
761}
762
763static PyObject*
764element_findtext(ElementObject* self, PyObject* args)
765{
766 int i;
767
768 PyObject* tag;
769 PyObject* default_value = Py_None;
770 if (!PyArg_ParseTuple(args, "O|O:findtext", &tag, &default_value))
771 return NULL;
772
773 if (checkpath(tag))
774 return PyObject_CallMethod(
775 elementpath_obj, "findtext", "OOO", self, tag, default_value
776 );
777
778 if (!self->extra) {
779 Py_INCREF(default_value);
780 return default_value;
781 }
782
783 for (i = 0; i < self->extra->length; i++) {
784 ElementObject* item = (ElementObject*) self->extra->children[i];
785 if (Element_CheckExact(item) && !PyObject_Compare(item->tag, tag)) {
786 PyObject* text = element_get_text(item);
787 if (text == Py_None)
788 return PyString_FromString("");
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000789 Py_XINCREF(text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000790 return text;
791 }
792 }
793
794 Py_INCREF(default_value);
795 return default_value;
796}
797
798static PyObject*
799element_findall(ElementObject* self, PyObject* args)
800{
801 int i;
802 PyObject* out;
803
804 PyObject* tag;
805 if (!PyArg_ParseTuple(args, "O:findall", &tag))
806 return NULL;
807
808 if (checkpath(tag))
809 return PyObject_CallMethod(
810 elementpath_obj, "findall", "OO", self, tag
811 );
812
813 out = PyList_New(0);
814 if (!out)
815 return NULL;
816
817 if (!self->extra)
818 return out;
819
820 for (i = 0; i < self->extra->length; i++) {
821 PyObject* item = self->extra->children[i];
822 if (Element_CheckExact(item) &&
823 PyObject_Compare(((ElementObject*)item)->tag, tag) == 0) {
824 if (PyList_Append(out, item) < 0) {
825 Py_DECREF(out);
826 return NULL;
827 }
828 }
829 }
830
831 return out;
832}
833
834static PyObject*
835element_get(ElementObject* self, PyObject* args)
836{
837 PyObject* value;
838
839 PyObject* key;
840 PyObject* default_value = Py_None;
841 if (!PyArg_ParseTuple(args, "O|O:get", &key, &default_value))
842 return NULL;
843
844 if (!self->extra || self->extra->attrib == Py_None)
845 value = default_value;
846 else {
847 value = PyDict_GetItem(self->extra->attrib, key);
848 if (!value)
849 value = default_value;
850 }
851
852 Py_INCREF(value);
853 return value;
854}
855
856static PyObject*
857element_getchildren(ElementObject* self, PyObject* args)
858{
859 int i;
860 PyObject* list;
861
862 if (!PyArg_ParseTuple(args, ":getchildren"))
863 return NULL;
864
865 if (!self->extra)
866 return PyList_New(0);
867
868 list = PyList_New(self->extra->length);
869 if (!list)
870 return NULL;
871
872 for (i = 0; i < self->extra->length; i++) {
873 PyObject* item = self->extra->children[i];
874 Py_INCREF(item);
875 PyList_SET_ITEM(list, i, item);
876 }
877
878 return list;
879}
880
881static PyObject*
882element_getiterator(ElementObject* self, PyObject* args)
883{
884 PyObject* result;
885
886 PyObject* tag = Py_None;
887 if (!PyArg_ParseTuple(args, "|O:getiterator", &tag))
888 return NULL;
889
890 if (!elementtree_getiterator_obj) {
891 PyErr_SetString(
892 PyExc_RuntimeError,
893 "getiterator helper not found"
894 );
895 return NULL;
896 }
897
898 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000899 if (!args)
900 return NULL;
Neal Norwitz02876df2006-02-07 06:58:52 +0000901
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000902 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
903 Py_INCREF(tag); PyTuple_SET_ITEM(args, 1, (PyObject*) tag);
904
905 result = PyObject_CallObject(elementtree_getiterator_obj, args);
906
907 Py_DECREF(args);
908
909 return result;
910}
911
912static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000913element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000914{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000915 ElementObject* self = (ElementObject*) self_;
916
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000917 if (!self->extra || index < 0 || index >= self->extra->length) {
918 PyErr_SetString(
919 PyExc_IndexError,
920 "child index out of range"
921 );
922 return NULL;
923 }
924
925 Py_INCREF(self->extra->children[index]);
926 return self->extra->children[index];
927}
928
929static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000930element_getslice(PyObject* self_, Py_ssize_t start, Py_ssize_t end)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000931{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000932 ElementObject* self = (ElementObject*) self_;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000933 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000934 PyObject* list;
935
936 if (!self->extra)
937 return PyList_New(0);
938
939 /* standard clamping */
940 if (start < 0)
941 start = 0;
942 if (end < 0)
943 end = 0;
944 if (end > self->extra->length)
945 end = self->extra->length;
946 if (start > end)
947 start = end;
948
949 list = PyList_New(end - start);
950 if (!list)
951 return NULL;
952
953 for (i = start; i < end; i++) {
954 PyObject* item = self->extra->children[i];
955 Py_INCREF(item);
956 PyList_SET_ITEM(list, i - start, item);
957 }
958
959 return list;
960}
961
962static PyObject*
963element_insert(ElementObject* self, PyObject* args)
964{
965 int i;
966
967 int index;
968 PyObject* element;
969 if (!PyArg_ParseTuple(args, "iO!:insert", &index,
970 &Element_Type, &element))
971 return NULL;
972
973 if (!self->extra)
974 element_new_extra(self, NULL);
975
976 if (index < 0)
977 index = 0;
978 if (index > self->extra->length)
979 index = self->extra->length;
980
981 if (element_resize(self, 1) < 0)
982 return NULL;
983
984 for (i = self->extra->length; i > index; i--)
985 self->extra->children[i] = self->extra->children[i-1];
986
987 Py_INCREF(element);
988 self->extra->children[index] = element;
989
990 self->extra->length++;
991
992 Py_RETURN_NONE;
993}
994
995static PyObject*
996element_items(ElementObject* self, PyObject* args)
997{
998 if (!PyArg_ParseTuple(args, ":items"))
999 return NULL;
1000
1001 if (!self->extra || self->extra->attrib == Py_None)
1002 return PyList_New(0);
1003
1004 return PyDict_Items(self->extra->attrib);
1005}
1006
1007static PyObject*
1008element_keys(ElementObject* self, PyObject* args)
1009{
1010 if (!PyArg_ParseTuple(args, ":keys"))
1011 return NULL;
1012
1013 if (!self->extra || self->extra->attrib == Py_None)
1014 return PyList_New(0);
1015
1016 return PyDict_Keys(self->extra->attrib);
1017}
1018
Martin v. Löwis18e16552006-02-15 17:27:45 +00001019static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001020element_length(ElementObject* self)
1021{
1022 if (!self->extra)
1023 return 0;
1024
1025 return self->extra->length;
1026}
1027
1028static PyObject*
1029element_makeelement(PyObject* self, PyObject* args, PyObject* kw)
1030{
1031 PyObject* elem;
1032
1033 PyObject* tag;
1034 PyObject* attrib;
1035 if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib))
1036 return NULL;
1037
1038 attrib = PyDict_Copy(attrib);
1039 if (!attrib)
1040 return NULL;
1041
1042 elem = element_new(tag, attrib);
1043
1044 Py_DECREF(attrib);
1045
1046 return elem;
1047}
1048
1049static PyObject*
1050element_reduce(ElementObject* self, PyObject* args)
1051{
1052 if (!PyArg_ParseTuple(args, ":__reduce__"))
1053 return NULL;
1054
1055 /* Hack alert: This method is used to work around a __copy__
1056 problem on certain 2.3 and 2.4 versions. To save time and
1057 simplify the code, we create the copy in here, and use a dummy
1058 copyelement helper to trick the copy module into doing the
1059 right thing. */
1060
1061 if (!elementtree_copyelement_obj) {
1062 PyErr_SetString(
1063 PyExc_RuntimeError,
1064 "copyelement helper not found"
1065 );
1066 return NULL;
1067 }
1068
1069 return Py_BuildValue(
1070 "O(N)", elementtree_copyelement_obj, element_copy(self, args)
1071 );
1072}
1073
1074static PyObject*
1075element_remove(ElementObject* self, PyObject* args)
1076{
1077 int i;
1078
1079 PyObject* element;
1080 if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element))
1081 return NULL;
1082
1083 if (!self->extra) {
1084 /* element has no children, so raise exception */
1085 PyErr_SetString(
1086 PyExc_ValueError,
1087 "list.remove(x): x not in list"
1088 );
1089 return NULL;
1090 }
1091
1092 for (i = 0; i < self->extra->length; i++) {
1093 if (self->extra->children[i] == element)
1094 break;
1095 if (PyObject_Compare(self->extra->children[i], element) == 0)
1096 break;
1097 }
1098
1099 if (i == self->extra->length) {
1100 /* element is not in children, so raise exception */
1101 PyErr_SetString(
1102 PyExc_ValueError,
1103 "list.remove(x): x not in list"
1104 );
1105 return NULL;
1106 }
1107
1108 Py_DECREF(self->extra->children[i]);
1109
1110 self->extra->length--;
1111
1112 for (; i < self->extra->length; i++)
1113 self->extra->children[i] = self->extra->children[i+1];
1114
1115 Py_RETURN_NONE;
1116}
1117
1118static PyObject*
1119element_repr(ElementObject* self)
1120{
Walter Dörwald7569dfe2007-05-19 21:49:49 +00001121 return PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001122}
1123
1124static PyObject*
1125element_set(ElementObject* self, PyObject* args)
1126{
1127 PyObject* attrib;
1128
1129 PyObject* key;
1130 PyObject* value;
1131 if (!PyArg_ParseTuple(args, "OO:set", &key, &value))
1132 return NULL;
1133
1134 if (!self->extra)
1135 element_new_extra(self, NULL);
1136
1137 attrib = element_get_attrib(self);
1138 if (!attrib)
1139 return NULL;
1140
1141 if (PyDict_SetItem(attrib, key, value) < 0)
1142 return NULL;
1143
1144 Py_RETURN_NONE;
1145}
1146
1147static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001148element_setslice(PyObject* self_, Py_ssize_t start, Py_ssize_t end, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001149{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001150 ElementObject* self = (ElementObject*) self_;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001151 Py_ssize_t i, new, old;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001152 PyObject* recycle = NULL;
1153
1154 if (!self->extra)
1155 element_new_extra(self, NULL);
1156
1157 /* standard clamping */
1158 if (start < 0)
1159 start = 0;
1160 if (end < 0)
1161 end = 0;
1162 if (end > self->extra->length)
1163 end = self->extra->length;
1164 if (start > end)
1165 start = end;
1166
1167 old = end - start;
1168
1169 if (item == NULL)
1170 new = 0;
1171 else if (PyList_CheckExact(item)) {
1172 new = PyList_GET_SIZE(item);
1173 } else {
1174 /* FIXME: support arbitrary sequences? */
1175 PyErr_Format(
1176 PyExc_TypeError,
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001177 "expected list, not \"%.200s\"", Py_Type(item)->tp_name
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001178 );
1179 return -1;
1180 }
1181
1182 if (old > 0) {
1183 /* to avoid recursive calls to this method (via decref), move
1184 old items to the recycle bin here, and get rid of them when
1185 we're done modifying the element */
1186 recycle = PyList_New(old);
1187 for (i = 0; i < old; i++)
1188 PyList_SET_ITEM(recycle, i, self->extra->children[i + start]);
1189 }
1190
1191 if (new < old) {
1192 /* delete slice */
1193 for (i = end; i < self->extra->length; i++)
1194 self->extra->children[i + new - old] = self->extra->children[i];
1195 } else if (new > old) {
1196 /* insert slice */
1197 if (element_resize(self, new - old) < 0)
1198 return -1;
1199 for (i = self->extra->length-1; i >= end; i--)
1200 self->extra->children[i + new - old] = self->extra->children[i];
1201 }
1202
1203 /* replace the slice */
1204 for (i = 0; i < new; i++) {
1205 PyObject* element = PyList_GET_ITEM(item, i);
1206 Py_INCREF(element);
1207 self->extra->children[i + start] = element;
1208 }
1209
1210 self->extra->length += new - old;
1211
1212 /* discard the recycle bin, and everything in it */
1213 Py_XDECREF(recycle);
1214
1215 return 0;
1216}
1217
1218static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001219element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001220{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001221 ElementObject* self = (ElementObject*) self_;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001222 int i;
1223 PyObject* old;
1224
1225 if (!self->extra || index < 0 || index >= self->extra->length) {
1226 PyErr_SetString(
1227 PyExc_IndexError,
1228 "child assignment index out of range");
1229 return -1;
1230 }
1231
1232 old = self->extra->children[index];
1233
1234 if (item) {
1235 Py_INCREF(item);
1236 self->extra->children[index] = item;
1237 } else {
1238 self->extra->length--;
1239 for (i = index; i < self->extra->length; i++)
1240 self->extra->children[i] = self->extra->children[i+1];
1241 }
1242
1243 Py_DECREF(old);
1244
1245 return 0;
1246}
1247
1248static PyMethodDef element_methods[] = {
1249
1250 {"clear", (PyCFunction) element_clear, METH_VARARGS},
1251
1252 {"get", (PyCFunction) element_get, METH_VARARGS},
1253 {"set", (PyCFunction) element_set, METH_VARARGS},
1254
1255 {"find", (PyCFunction) element_find, METH_VARARGS},
1256 {"findtext", (PyCFunction) element_findtext, METH_VARARGS},
1257 {"findall", (PyCFunction) element_findall, METH_VARARGS},
1258
1259 {"append", (PyCFunction) element_append, METH_VARARGS},
1260 {"insert", (PyCFunction) element_insert, METH_VARARGS},
1261 {"remove", (PyCFunction) element_remove, METH_VARARGS},
1262
1263 {"getiterator", (PyCFunction) element_getiterator, METH_VARARGS},
1264 {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS},
1265
1266 {"items", (PyCFunction) element_items, METH_VARARGS},
1267 {"keys", (PyCFunction) element_keys, METH_VARARGS},
1268
1269 {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS},
1270
1271 {"__copy__", (PyCFunction) element_copy, METH_VARARGS},
1272 {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS},
1273
1274 /* Some 2.3 and 2.4 versions do not handle the __copy__ method on
1275 C objects correctly, so we have to fake it using a __reduce__-
1276 based hack (see the element_reduce implementation above for
1277 details). */
1278
1279 /* The behaviour has been changed in 2.3.5 and 2.4.1, so we're
1280 using a runtime test to figure out if we need to fake things
1281 or now (see the init code below). The following entry is
1282 enabled only if the hack is needed. */
1283
1284 {"!__reduce__", (PyCFunction) element_reduce, METH_VARARGS},
1285
1286 {NULL, NULL}
1287};
1288
1289static PyObject*
1290element_getattr(ElementObject* self, char* name)
1291{
1292 PyObject* res;
1293
1294 res = Py_FindMethod(element_methods, (PyObject*) self, name);
1295 if (res)
1296 return res;
1297
1298 PyErr_Clear();
1299
1300 if (strcmp(name, "tag") == 0)
1301 res = self->tag;
1302 else if (strcmp(name, "text") == 0)
1303 res = element_get_text(self);
1304 else if (strcmp(name, "tail") == 0) {
1305 res = element_get_tail(self);
1306 } else if (strcmp(name, "attrib") == 0) {
1307 if (!self->extra)
1308 element_new_extra(self, NULL);
1309 res = element_get_attrib(self);
1310 } else {
1311 PyErr_SetString(PyExc_AttributeError, name);
1312 return NULL;
1313 }
1314
1315 if (!res)
1316 return NULL;
1317
1318 Py_INCREF(res);
1319 return res;
1320}
1321
1322static int
1323element_setattr(ElementObject* self, const char* name, PyObject* value)
1324{
1325 if (value == NULL) {
1326 PyErr_SetString(
1327 PyExc_AttributeError,
1328 "can't delete element attributes"
1329 );
1330 return -1;
1331 }
1332
1333 if (strcmp(name, "tag") == 0) {
1334 Py_DECREF(self->tag);
1335 self->tag = value;
1336 Py_INCREF(self->tag);
1337 } else if (strcmp(name, "text") == 0) {
1338 Py_DECREF(JOIN_OBJ(self->text));
1339 self->text = value;
1340 Py_INCREF(self->text);
1341 } else if (strcmp(name, "tail") == 0) {
1342 Py_DECREF(JOIN_OBJ(self->tail));
1343 self->tail = value;
1344 Py_INCREF(self->tail);
1345 } else if (strcmp(name, "attrib") == 0) {
1346 if (!self->extra)
1347 element_new_extra(self, NULL);
1348 Py_DECREF(self->extra->attrib);
1349 self->extra->attrib = value;
1350 Py_INCREF(self->extra->attrib);
1351 } else {
1352 PyErr_SetString(PyExc_AttributeError, name);
1353 return -1;
1354 }
1355
1356 return 0;
1357}
1358
1359static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001360 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001361 0, /* sq_concat */
1362 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001363 element_getitem,
1364 element_getslice,
1365 element_setitem,
1366 element_setslice,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001367};
1368
Neal Norwitz227b5332006-03-22 09:28:35 +00001369static PyTypeObject Element_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001370 PyVarObject_HEAD_INIT(NULL, 0)
1371 "Element", sizeof(ElementObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001372 /* methods */
1373 (destructor)element_dealloc, /* tp_dealloc */
1374 0, /* tp_print */
1375 (getattrfunc)element_getattr, /* tp_getattr */
1376 (setattrfunc)element_setattr, /* tp_setattr */
1377 0, /* tp_compare */
1378 (reprfunc)element_repr, /* tp_repr */
1379 0, /* tp_as_number */
1380 &element_as_sequence, /* tp_as_sequence */
1381};
1382
1383/* ==================================================================== */
1384/* the tree builder type */
1385
1386typedef struct {
1387 PyObject_HEAD
1388
1389 PyObject* root; /* root node (first created node) */
1390
1391 ElementObject* this; /* current node */
1392 ElementObject* last; /* most recently created node */
1393
1394 PyObject* data; /* data collector (string or list), or NULL */
1395
1396 PyObject* stack; /* element stack */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001397 Py_ssize_t index; /* current stack size (0=empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001398
1399 /* element tracing */
1400 PyObject* events; /* list of events, or NULL if not collecting */
1401 PyObject* start_event_obj; /* event objects (NULL to ignore) */
1402 PyObject* end_event_obj;
1403 PyObject* start_ns_event_obj;
1404 PyObject* end_ns_event_obj;
1405
1406} TreeBuilderObject;
1407
Neal Norwitz227b5332006-03-22 09:28:35 +00001408static PyTypeObject TreeBuilder_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001409
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001410#define TreeBuilder_CheckExact(op) (Py_Type(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001411
1412/* -------------------------------------------------------------------- */
1413/* constructor and destructor */
1414
1415LOCAL(PyObject*)
1416treebuilder_new(void)
1417{
1418 TreeBuilderObject* self;
1419
1420 self = PyObject_New(TreeBuilderObject, &TreeBuilder_Type);
1421 if (self == NULL)
1422 return NULL;
1423
1424 self->root = NULL;
1425
1426 Py_INCREF(Py_None);
1427 self->this = (ElementObject*) Py_None;
1428
1429 Py_INCREF(Py_None);
1430 self->last = (ElementObject*) Py_None;
1431
1432 self->data = NULL;
1433
1434 self->stack = PyList_New(20);
1435 self->index = 0;
1436
1437 self->events = NULL;
1438 self->start_event_obj = self->end_event_obj = NULL;
1439 self->start_ns_event_obj = self->end_ns_event_obj = NULL;
1440
1441 ALLOC(sizeof(TreeBuilderObject), "create treebuilder");
1442
1443 return (PyObject*) self;
1444}
1445
1446static PyObject*
Thomas Wouters73e5a5b2006-06-08 15:35:45 +00001447treebuilder(PyObject* self_, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001448{
1449 if (!PyArg_ParseTuple(args, ":TreeBuilder"))
1450 return NULL;
1451
1452 return treebuilder_new();
1453}
1454
1455static void
1456treebuilder_dealloc(TreeBuilderObject* self)
1457{
1458 Py_XDECREF(self->end_ns_event_obj);
1459 Py_XDECREF(self->start_ns_event_obj);
1460 Py_XDECREF(self->end_event_obj);
1461 Py_XDECREF(self->start_event_obj);
1462 Py_XDECREF(self->events);
1463 Py_DECREF(self->stack);
1464 Py_XDECREF(self->data);
1465 Py_DECREF(self->last);
1466 Py_DECREF(self->this);
1467 Py_XDECREF(self->root);
1468
1469 RELEASE(sizeof(TreeBuilderObject), "destroy treebuilder");
1470
1471 PyObject_Del(self);
1472}
1473
1474/* -------------------------------------------------------------------- */
1475/* handlers */
1476
1477LOCAL(PyObject*)
1478treebuilder_handle_xml(TreeBuilderObject* self, PyObject* encoding,
1479 PyObject* standalone)
1480{
1481 Py_RETURN_NONE;
1482}
1483
1484LOCAL(PyObject*)
1485treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
1486 PyObject* attrib)
1487{
1488 PyObject* node;
1489 PyObject* this;
1490
1491 if (self->data) {
1492 if (self->this == self->last) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001493 Py_DECREF(JOIN_OBJ(self->last->text));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001494 self->last->text = JOIN_SET(
1495 self->data, PyList_CheckExact(self->data)
1496 );
1497 } else {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001498 Py_DECREF(JOIN_OBJ(self->last->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001499 self->last->tail = JOIN_SET(
1500 self->data, PyList_CheckExact(self->data)
1501 );
1502 }
1503 self->data = NULL;
1504 }
1505
1506 node = element_new(tag, attrib);
1507 if (!node)
1508 return NULL;
1509
1510 this = (PyObject*) self->this;
1511
1512 if (this != Py_None) {
1513 if (element_add_subelement((ElementObject*) this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001514 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001515 } else {
1516 if (self->root) {
1517 PyErr_SetString(
1518 PyExc_SyntaxError,
1519 "multiple elements on top level"
1520 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001521 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001522 }
1523 Py_INCREF(node);
1524 self->root = node;
1525 }
1526
1527 if (self->index < PyList_GET_SIZE(self->stack)) {
1528 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001529 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001530 Py_INCREF(this);
1531 } else {
1532 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001533 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001534 }
1535 self->index++;
1536
1537 Py_DECREF(this);
1538 Py_INCREF(node);
1539 self->this = (ElementObject*) node;
1540
1541 Py_DECREF(self->last);
1542 Py_INCREF(node);
1543 self->last = (ElementObject*) node;
1544
1545 if (self->start_event_obj) {
1546 PyObject* res;
1547 PyObject* action = self->start_event_obj;
1548 res = PyTuple_New(2);
1549 if (res) {
1550 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
1551 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
1552 PyList_Append(self->events, res);
1553 Py_DECREF(res);
1554 } else
1555 PyErr_Clear(); /* FIXME: propagate error */
1556 }
1557
1558 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001559
1560 error:
1561 Py_DECREF(node);
1562 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001563}
1564
1565LOCAL(PyObject*)
1566treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
1567{
1568 if (!self->data) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001569 if (self->last == (ElementObject*) Py_None) {
1570 /* ignore calls to data before the first call to start */
1571 Py_RETURN_NONE;
1572 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001573 /* store the first item as is */
1574 Py_INCREF(data); self->data = data;
1575 } else {
1576 /* more than one item; use a list to collect items */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001577 if (PyString_CheckExact(self->data) && Py_Refcnt(self->data) == 1 &&
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001578 PyString_CheckExact(data) && PyString_GET_SIZE(data) == 1) {
1579 /* expat often generates single character data sections; handle
1580 the most common case by resizing the existing string... */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001581 Py_ssize_t size = PyString_GET_SIZE(self->data);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001582 if (_PyString_Resize(&self->data, size + 1) < 0)
1583 return NULL;
1584 PyString_AS_STRING(self->data)[size] = PyString_AS_STRING(data)[0];
1585 } else if (PyList_CheckExact(self->data)) {
1586 if (PyList_Append(self->data, data) < 0)
1587 return NULL;
1588 } else {
1589 PyObject* list = PyList_New(2);
1590 if (!list)
1591 return NULL;
1592 PyList_SET_ITEM(list, 0, self->data);
1593 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
1594 self->data = list;
1595 }
1596 }
1597
1598 Py_RETURN_NONE;
1599}
1600
1601LOCAL(PyObject*)
1602treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
1603{
1604 PyObject* item;
1605
1606 if (self->data) {
1607 if (self->this == self->last) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001608 Py_DECREF(JOIN_OBJ(self->last->text));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001609 self->last->text = JOIN_SET(
1610 self->data, PyList_CheckExact(self->data)
1611 );
1612 } else {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001613 Py_DECREF(JOIN_OBJ(self->last->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001614 self->last->tail = JOIN_SET(
1615 self->data, PyList_CheckExact(self->data)
1616 );
1617 }
1618 self->data = NULL;
1619 }
1620
1621 if (self->index == 0) {
1622 PyErr_SetString(
1623 PyExc_IndexError,
1624 "pop from empty stack"
1625 );
1626 return NULL;
1627 }
1628
1629 self->index--;
1630
1631 item = PyList_GET_ITEM(self->stack, self->index);
1632 Py_INCREF(item);
1633
1634 Py_DECREF(self->last);
1635
1636 self->last = (ElementObject*) self->this;
1637 self->this = (ElementObject*) item;
1638
1639 if (self->end_event_obj) {
1640 PyObject* res;
1641 PyObject* action = self->end_event_obj;
1642 PyObject* node = (PyObject*) self->last;
1643 res = PyTuple_New(2);
1644 if (res) {
1645 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
1646 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
1647 PyList_Append(self->events, res);
1648 Py_DECREF(res);
1649 } else
1650 PyErr_Clear(); /* FIXME: propagate error */
1651 }
1652
1653 Py_INCREF(self->last);
1654 return (PyObject*) self->last;
1655}
1656
1657LOCAL(void)
1658treebuilder_handle_namespace(TreeBuilderObject* self, int start,
1659 const char* prefix, const char *uri)
1660{
1661 PyObject* res;
1662 PyObject* action;
1663 PyObject* parcel;
1664
1665 if (!self->events)
1666 return;
1667
1668 if (start) {
1669 if (!self->start_ns_event_obj)
1670 return;
1671 action = self->start_ns_event_obj;
1672 /* FIXME: prefix and uri use utf-8 encoding! */
1673 parcel = Py_BuildValue("ss", (prefix) ? prefix : "", uri);
1674 if (!parcel)
1675 return;
1676 Py_INCREF(action);
1677 } else {
1678 if (!self->end_ns_event_obj)
1679 return;
1680 action = self->end_ns_event_obj;
1681 Py_INCREF(action);
1682 parcel = Py_None;
1683 Py_INCREF(parcel);
1684 }
1685
1686 res = PyTuple_New(2);
1687
1688 if (res) {
1689 PyTuple_SET_ITEM(res, 0, action);
1690 PyTuple_SET_ITEM(res, 1, parcel);
1691 PyList_Append(self->events, res);
1692 Py_DECREF(res);
1693 } else
1694 PyErr_Clear(); /* FIXME: propagate error */
1695}
1696
1697/* -------------------------------------------------------------------- */
1698/* methods (in alphabetical order) */
1699
1700static PyObject*
1701treebuilder_data(TreeBuilderObject* self, PyObject* args)
1702{
1703 PyObject* data;
1704 if (!PyArg_ParseTuple(args, "O:data", &data))
1705 return NULL;
1706
1707 return treebuilder_handle_data(self, data);
1708}
1709
1710static PyObject*
1711treebuilder_end(TreeBuilderObject* self, PyObject* args)
1712{
1713 PyObject* tag;
1714 if (!PyArg_ParseTuple(args, "O:end", &tag))
1715 return NULL;
1716
1717 return treebuilder_handle_end(self, tag);
1718}
1719
1720LOCAL(PyObject*)
1721treebuilder_done(TreeBuilderObject* self)
1722{
1723 PyObject* res;
1724
1725 /* FIXME: check stack size? */
1726
1727 if (self->root)
1728 res = self->root;
1729 else
1730 res = Py_None;
1731
1732 Py_INCREF(res);
1733 return res;
1734}
1735
1736static PyObject*
1737treebuilder_close(TreeBuilderObject* self, PyObject* args)
1738{
1739 if (!PyArg_ParseTuple(args, ":close"))
1740 return NULL;
1741
1742 return treebuilder_done(self);
1743}
1744
1745static PyObject*
1746treebuilder_start(TreeBuilderObject* self, PyObject* args)
1747{
1748 PyObject* tag;
1749 PyObject* attrib = Py_None;
1750 if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib))
1751 return NULL;
1752
1753 return treebuilder_handle_start(self, tag, attrib);
1754}
1755
1756static PyObject*
1757treebuilder_xml(TreeBuilderObject* self, PyObject* args)
1758{
1759 PyObject* encoding;
1760 PyObject* standalone;
1761 if (!PyArg_ParseTuple(args, "OO:xml", &encoding, &standalone))
1762 return NULL;
1763
1764 return treebuilder_handle_xml(self, encoding, standalone);
1765}
1766
1767static PyMethodDef treebuilder_methods[] = {
1768 {"data", (PyCFunction) treebuilder_data, METH_VARARGS},
1769 {"start", (PyCFunction) treebuilder_start, METH_VARARGS},
1770 {"end", (PyCFunction) treebuilder_end, METH_VARARGS},
1771 {"xml", (PyCFunction) treebuilder_xml, METH_VARARGS},
1772 {"close", (PyCFunction) treebuilder_close, METH_VARARGS},
1773 {NULL, NULL}
1774};
1775
1776static PyObject*
1777treebuilder_getattr(TreeBuilderObject* self, char* name)
1778{
1779 return Py_FindMethod(treebuilder_methods, (PyObject*) self, name);
1780}
1781
Neal Norwitz227b5332006-03-22 09:28:35 +00001782static PyTypeObject TreeBuilder_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001783 PyVarObject_HEAD_INIT(NULL, 0)
1784 "TreeBuilder", sizeof(TreeBuilderObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001785 /* methods */
1786 (destructor)treebuilder_dealloc, /* tp_dealloc */
1787 0, /* tp_print */
1788 (getattrfunc)treebuilder_getattr, /* tp_getattr */
1789};
1790
1791/* ==================================================================== */
1792/* the expat interface */
1793
1794#if defined(USE_EXPAT)
1795
1796#include "expat.h"
1797
1798#if defined(USE_PYEXPAT_CAPI)
1799#include "pyexpat.h"
1800static struct PyExpat_CAPI* expat_capi;
1801#define EXPAT(func) (expat_capi->func)
1802#else
1803#define EXPAT(func) (XML_##func)
1804#endif
1805
1806typedef struct {
1807 PyObject_HEAD
1808
1809 XML_Parser parser;
1810
1811 PyObject* target;
1812 PyObject* entity;
1813
1814 PyObject* names;
1815
1816 PyObject* handle_xml;
1817 PyObject* handle_start;
1818 PyObject* handle_data;
1819 PyObject* handle_end;
1820
1821 PyObject* handle_comment;
1822 PyObject* handle_pi;
1823
1824} XMLParserObject;
1825
Neal Norwitz227b5332006-03-22 09:28:35 +00001826static PyTypeObject XMLParser_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001827
1828/* helpers */
1829
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001830LOCAL(int)
1831checkstring(const char* string, int size)
1832{
1833 int i;
1834
1835 /* check if an 8-bit string contains UTF-8 characters */
1836 for (i = 0; i < size; i++)
1837 if (string[i] & 0x80)
1838 return 1;
1839
1840 return 0;
1841}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001842
1843LOCAL(PyObject*)
1844makestring(const char* string, int size)
1845{
1846 /* convert a UTF-8 string to either a 7-bit ascii string or a
1847 Unicode string */
1848
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001849 if (checkstring(string, size))
1850 return PyUnicode_DecodeUTF8(string, size, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001851
1852 return PyString_FromStringAndSize(string, size);
1853}
1854
1855LOCAL(PyObject*)
1856makeuniversal(XMLParserObject* self, const char* string)
1857{
1858 /* convert a UTF-8 tag/attribute name from the expat parser
1859 to a universal name string */
1860
1861 int size = strlen(string);
1862 PyObject* key;
1863 PyObject* value;
1864
1865 /* look the 'raw' name up in the names dictionary */
1866 key = PyString_FromStringAndSize(string, size);
1867 if (!key)
1868 return NULL;
1869
1870 value = PyDict_GetItem(self->names, key);
1871
1872 if (value) {
1873 Py_INCREF(value);
1874 } else {
1875 /* new name. convert to universal name, and decode as
1876 necessary */
1877
1878 PyObject* tag;
1879 char* p;
1880 int i;
1881
1882 /* look for namespace separator */
1883 for (i = 0; i < size; i++)
1884 if (string[i] == '}')
1885 break;
1886 if (i != size) {
1887 /* convert to universal name */
1888 tag = PyString_FromStringAndSize(NULL, size+1);
1889 p = PyString_AS_STRING(tag);
1890 p[0] = '{';
1891 memcpy(p+1, string, size);
1892 size++;
1893 } else {
1894 /* plain name; use key as tag */
1895 Py_INCREF(key);
1896 tag = key;
1897 }
1898
1899 /* decode universal name */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001900 /* inline makestring, to avoid duplicating the source string if
1901 it's not an utf-8 string */
1902 p = PyString_AS_STRING(tag);
1903 if (checkstring(p, size)) {
1904 value = PyUnicode_DecodeUTF8(p, size, "strict");
1905 Py_DECREF(tag);
1906 if (!value) {
1907 Py_DECREF(key);
1908 return NULL;
1909 }
1910 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001911 value = tag; /* use tag as is */
1912
1913 /* add to names dictionary */
1914 if (PyDict_SetItem(self->names, key, value) < 0) {
1915 Py_DECREF(key);
1916 Py_DECREF(value);
1917 return NULL;
1918 }
1919 }
1920
1921 Py_DECREF(key);
1922 return value;
1923}
1924
1925/* -------------------------------------------------------------------- */
1926/* handlers */
1927
1928static void
1929expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
1930 int data_len)
1931{
1932 PyObject* key;
1933 PyObject* value;
1934 PyObject* res;
1935
1936 if (data_len < 2 || data_in[0] != '&')
1937 return;
1938
1939 key = makestring(data_in + 1, data_len - 2);
1940 if (!key)
1941 return;
1942
1943 value = PyDict_GetItem(self->entity, key);
1944
1945 if (value) {
1946 if (TreeBuilder_CheckExact(self->target))
1947 res = treebuilder_handle_data(
1948 (TreeBuilderObject*) self->target, value
1949 );
1950 else if (self->handle_data)
1951 res = PyObject_CallFunction(self->handle_data, "O", value);
1952 else
1953 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001954 Py_XDECREF(res);
1955 } else {
1956 PyErr_Format(
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001957 PyExc_SyntaxError, "undefined entity &%s;: line %ld, column %ld",
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001958 PyString_AS_STRING(key),
1959 EXPAT(GetErrorLineNumber)(self->parser),
1960 EXPAT(GetErrorColumnNumber)(self->parser)
1961 );
1962 }
1963
1964 Py_DECREF(key);
1965}
1966
1967static void
1968expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
1969 const XML_Char **attrib_in)
1970{
1971 PyObject* res;
1972 PyObject* tag;
1973 PyObject* attrib;
1974 int ok;
1975
1976 /* tag name */
1977 tag = makeuniversal(self, tag_in);
1978 if (!tag)
1979 return; /* parser will look for errors */
1980
1981 /* attributes */
1982 if (attrib_in[0]) {
1983 attrib = PyDict_New();
1984 if (!attrib)
1985 return;
1986 while (attrib_in[0] && attrib_in[1]) {
1987 PyObject* key = makeuniversal(self, attrib_in[0]);
1988 PyObject* value = makestring(attrib_in[1], strlen(attrib_in[1]));
1989 if (!key || !value) {
1990 Py_XDECREF(value);
1991 Py_XDECREF(key);
1992 Py_DECREF(attrib);
1993 return;
1994 }
1995 ok = PyDict_SetItem(attrib, key, value);
1996 Py_DECREF(value);
1997 Py_DECREF(key);
1998 if (ok < 0) {
1999 Py_DECREF(attrib);
2000 return;
2001 }
2002 attrib_in += 2;
2003 }
2004 } else {
2005 Py_INCREF(Py_None);
2006 attrib = Py_None;
2007 }
2008
2009 if (TreeBuilder_CheckExact(self->target))
2010 /* shortcut */
2011 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2012 tag, attrib);
2013 else if (self->handle_start)
2014 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
2015 else
2016 res = NULL;
2017
2018 Py_DECREF(tag);
2019 Py_DECREF(attrib);
2020
2021 Py_XDECREF(res);
2022}
2023
2024static void
2025expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2026 int data_len)
2027{
2028 PyObject* data;
2029 PyObject* res;
2030
2031 data = makestring(data_in, data_len);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002032 if (!data)
2033 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002034
2035 if (TreeBuilder_CheckExact(self->target))
2036 /* shortcut */
2037 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
2038 else if (self->handle_data)
2039 res = PyObject_CallFunction(self->handle_data, "O", data);
2040 else
2041 res = NULL;
2042
2043 Py_DECREF(data);
2044
2045 Py_XDECREF(res);
2046}
2047
2048static void
2049expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
2050{
2051 PyObject* tag;
2052 PyObject* res = NULL;
2053
2054 if (TreeBuilder_CheckExact(self->target))
2055 /* shortcut */
2056 /* the standard tree builder doesn't look at the end tag */
2057 res = treebuilder_handle_end(
2058 (TreeBuilderObject*) self->target, Py_None
2059 );
2060 else if (self->handle_end) {
2061 tag = makeuniversal(self, tag_in);
2062 if (tag) {
2063 res = PyObject_CallFunction(self->handle_end, "O", tag);
2064 Py_DECREF(tag);
2065 }
2066 }
2067
2068 Py_XDECREF(res);
2069}
2070
2071static void
2072expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
2073 const XML_Char *uri)
2074{
2075 treebuilder_handle_namespace(
2076 (TreeBuilderObject*) self->target, 1, prefix, uri
2077 );
2078}
2079
2080static void
2081expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
2082{
2083 treebuilder_handle_namespace(
2084 (TreeBuilderObject*) self->target, 0, NULL, NULL
2085 );
2086}
2087
2088static void
2089expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
2090{
2091 PyObject* comment;
2092 PyObject* res;
2093
2094 if (self->handle_comment) {
2095 comment = makestring(comment_in, strlen(comment_in));
2096 if (comment) {
2097 res = PyObject_CallFunction(self->handle_comment, "O", comment);
2098 Py_XDECREF(res);
2099 Py_DECREF(comment);
2100 }
2101 }
2102}
2103
2104static void
2105expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
2106 const XML_Char* data_in)
2107{
2108 PyObject* target;
2109 PyObject* data;
2110 PyObject* res;
2111
2112 if (self->handle_pi) {
2113 target = makestring(target_in, strlen(target_in));
2114 data = makestring(data_in, strlen(data_in));
2115 if (target && data) {
2116 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
2117 Py_XDECREF(res);
2118 Py_DECREF(data);
2119 Py_DECREF(target);
2120 } else {
2121 Py_XDECREF(data);
2122 Py_XDECREF(target);
2123 }
2124 }
2125}
2126
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002127static int
2128expat_unknown_encoding_handler(XMLParserObject *self, const XML_Char *name,
2129 XML_Encoding *info)
2130{
2131 PyObject* u;
2132 Py_UNICODE* p;
2133 unsigned char s[256];
2134 int i;
2135
2136 memset(info, 0, sizeof(XML_Encoding));
2137
2138 for (i = 0; i < 256; i++)
2139 s[i] = i;
2140
Fredrik Lundhc3389992005-12-25 11:40:19 +00002141 u = PyUnicode_Decode((char*) s, 256, name, "replace");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002142 if (!u)
2143 return XML_STATUS_ERROR;
2144
2145 if (PyUnicode_GET_SIZE(u) != 256) {
2146 Py_DECREF(u);
2147 return XML_STATUS_ERROR;
2148 }
2149
2150 p = PyUnicode_AS_UNICODE(u);
2151
2152 for (i = 0; i < 256; i++) {
2153 if (p[i] != Py_UNICODE_REPLACEMENT_CHARACTER)
2154 info->map[i] = p[i];
2155 else
2156 info->map[i] = -1;
2157 }
2158
2159 Py_DECREF(u);
2160
2161 return XML_STATUS_OK;
2162}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002163
2164/* -------------------------------------------------------------------- */
2165/* constructor and destructor */
2166
2167static PyObject*
Thomas Wouters73e5a5b2006-06-08 15:35:45 +00002168xmlparser(PyObject* self_, PyObject* args, PyObject* kw)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002169{
2170 XMLParserObject* self;
2171 /* FIXME: does this need to be static? */
2172 static XML_Memory_Handling_Suite memory_handler;
2173
2174 PyObject* target = NULL;
2175 char* encoding = NULL;
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +00002176 static char* kwlist[] = { "target", "encoding", NULL };
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002177 if (!PyArg_ParseTupleAndKeywords(args, kw, "|Oz:XMLParser", kwlist,
2178 &target, &encoding))
2179 return NULL;
2180
2181#if defined(USE_PYEXPAT_CAPI)
2182 if (!expat_capi) {
2183 PyErr_SetString(
2184 PyExc_RuntimeError, "cannot load dispatch table from pyexpat"
2185 );
2186 return NULL;
2187 }
2188#endif
2189
2190 self = PyObject_New(XMLParserObject, &XMLParser_Type);
2191 if (self == NULL)
2192 return NULL;
2193
2194 self->entity = PyDict_New();
2195 if (!self->entity) {
2196 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002197 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002198 }
2199
2200 self->names = PyDict_New();
2201 if (!self->names) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002202 PyObject_Del(self->entity);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002203 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002204 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002205 }
2206
2207 memory_handler.malloc_fcn = PyObject_Malloc;
2208 memory_handler.realloc_fcn = PyObject_Realloc;
2209 memory_handler.free_fcn = PyObject_Free;
2210
2211 self->parser = EXPAT(ParserCreate_MM)(encoding, &memory_handler, "}");
2212 if (!self->parser) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002213 PyObject_Del(self->names);
2214 PyObject_Del(self->entity);
2215 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002216 PyErr_NoMemory();
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002217 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002218 }
2219
2220 /* setup target handlers */
2221 if (!target) {
2222 target = treebuilder_new();
2223 if (!target) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002224 EXPAT(ParserFree)(self->parser);
2225 PyObject_Del(self->names);
2226 PyObject_Del(self->entity);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002227 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002228 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002229 }
2230 } else
2231 Py_INCREF(target);
2232 self->target = target;
2233
2234 self->handle_xml = PyObject_GetAttrString(target, "xml");
2235 self->handle_start = PyObject_GetAttrString(target, "start");
2236 self->handle_data = PyObject_GetAttrString(target, "data");
2237 self->handle_end = PyObject_GetAttrString(target, "end");
2238 self->handle_comment = PyObject_GetAttrString(target, "comment");
2239 self->handle_pi = PyObject_GetAttrString(target, "pi");
2240
2241 PyErr_Clear();
2242
2243 /* configure parser */
2244 EXPAT(SetUserData)(self->parser, self);
2245 EXPAT(SetElementHandler)(
2246 self->parser,
2247 (XML_StartElementHandler) expat_start_handler,
2248 (XML_EndElementHandler) expat_end_handler
2249 );
2250 EXPAT(SetDefaultHandlerExpand)(
2251 self->parser,
2252 (XML_DefaultHandler) expat_default_handler
2253 );
2254 EXPAT(SetCharacterDataHandler)(
2255 self->parser,
2256 (XML_CharacterDataHandler) expat_data_handler
2257 );
2258 if (self->handle_comment)
2259 EXPAT(SetCommentHandler)(
2260 self->parser,
2261 (XML_CommentHandler) expat_comment_handler
2262 );
2263 if (self->handle_pi)
2264 EXPAT(SetProcessingInstructionHandler)(
2265 self->parser,
2266 (XML_ProcessingInstructionHandler) expat_pi_handler
2267 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002268 EXPAT(SetUnknownEncodingHandler)(
2269 self->parser,
2270 (XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL
2271 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002272
2273 ALLOC(sizeof(XMLParserObject), "create expatparser");
2274
2275 return (PyObject*) self;
2276}
2277
2278static void
2279xmlparser_dealloc(XMLParserObject* self)
2280{
2281 EXPAT(ParserFree)(self->parser);
2282
2283 Py_XDECREF(self->handle_pi);
2284 Py_XDECREF(self->handle_comment);
2285 Py_XDECREF(self->handle_end);
2286 Py_XDECREF(self->handle_data);
2287 Py_XDECREF(self->handle_start);
2288 Py_XDECREF(self->handle_xml);
2289
2290 Py_DECREF(self->target);
2291 Py_DECREF(self->entity);
2292 Py_DECREF(self->names);
2293
2294 RELEASE(sizeof(XMLParserObject), "destroy expatparser");
2295
2296 PyObject_Del(self);
2297}
2298
2299/* -------------------------------------------------------------------- */
2300/* methods (in alphabetical order) */
2301
2302LOCAL(PyObject*)
2303expat_parse(XMLParserObject* self, char* data, int data_len, int final)
2304{
2305 int ok;
2306
2307 ok = EXPAT(Parse)(self->parser, data, data_len, final);
2308
2309 if (PyErr_Occurred())
2310 return NULL;
2311
2312 if (!ok) {
2313 PyErr_Format(
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002314 PyExc_SyntaxError, "%s: line %ld, column %ld",
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002315 EXPAT(ErrorString)(EXPAT(GetErrorCode)(self->parser)),
2316 EXPAT(GetErrorLineNumber)(self->parser),
2317 EXPAT(GetErrorColumnNumber)(self->parser)
2318 );
2319 return NULL;
2320 }
2321
2322 Py_RETURN_NONE;
2323}
2324
2325static PyObject*
2326xmlparser_close(XMLParserObject* self, PyObject* args)
2327{
2328 /* end feeding data to parser */
2329
2330 PyObject* res;
2331 if (!PyArg_ParseTuple(args, ":close"))
2332 return NULL;
2333
2334 res = expat_parse(self, "", 0, 1);
2335
2336 if (res && TreeBuilder_CheckExact(self->target)) {
2337 Py_DECREF(res);
2338 return treebuilder_done((TreeBuilderObject*) self->target);
2339 }
2340
2341 return res;
2342}
2343
2344static PyObject*
2345xmlparser_feed(XMLParserObject* self, PyObject* args)
2346{
2347 /* feed data to parser */
2348
2349 char* data;
2350 int data_len;
2351 if (!PyArg_ParseTuple(args, "s#:feed", &data, &data_len))
2352 return NULL;
2353
2354 return expat_parse(self, data, data_len, 0);
2355}
2356
2357static PyObject*
2358xmlparser_parse(XMLParserObject* self, PyObject* args)
2359{
2360 /* (internal) parse until end of input stream */
2361
2362 PyObject* reader;
2363 PyObject* buffer;
2364 PyObject* res;
2365
2366 PyObject* fileobj;
2367 if (!PyArg_ParseTuple(args, "O:_parse", &fileobj))
2368 return NULL;
2369
2370 reader = PyObject_GetAttrString(fileobj, "read");
2371 if (!reader)
2372 return NULL;
2373
2374 /* read from open file object */
2375 for (;;) {
2376
2377 buffer = PyObject_CallFunction(reader, "i", 64*1024);
2378
2379 if (!buffer) {
2380 /* read failed (e.g. due to KeyboardInterrupt) */
2381 Py_DECREF(reader);
2382 return NULL;
2383 }
2384
2385 if (!PyString_CheckExact(buffer) || PyString_GET_SIZE(buffer) == 0) {
2386 Py_DECREF(buffer);
2387 break;
2388 }
2389
2390 res = expat_parse(
2391 self, PyString_AS_STRING(buffer), PyString_GET_SIZE(buffer), 0
2392 );
2393
2394 Py_DECREF(buffer);
2395
2396 if (!res) {
2397 Py_DECREF(reader);
2398 return NULL;
2399 }
2400 Py_DECREF(res);
2401
2402 }
2403
2404 Py_DECREF(reader);
2405
2406 res = expat_parse(self, "", 0, 1);
2407
2408 if (res && TreeBuilder_CheckExact(self->target)) {
2409 Py_DECREF(res);
2410 return treebuilder_done((TreeBuilderObject*) self->target);
2411 }
2412
2413 return res;
2414}
2415
2416static PyObject*
2417xmlparser_setevents(XMLParserObject* self, PyObject* args)
2418{
2419 /* activate element event reporting */
2420
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002421 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002422 TreeBuilderObject* target;
2423
2424 PyObject* events; /* event collector */
2425 PyObject* event_set = Py_None;
2426 if (!PyArg_ParseTuple(args, "O!|O:_setevents", &PyList_Type, &events,
2427 &event_set))
2428 return NULL;
2429
2430 if (!TreeBuilder_CheckExact(self->target)) {
2431 PyErr_SetString(
2432 PyExc_TypeError,
2433 "event handling only supported for cElementTree.Treebuilder "
2434 "targets"
2435 );
2436 return NULL;
2437 }
2438
2439 target = (TreeBuilderObject*) self->target;
2440
2441 Py_INCREF(events);
2442 Py_XDECREF(target->events);
2443 target->events = events;
2444
2445 /* clear out existing events */
2446 Py_XDECREF(target->start_event_obj); target->start_event_obj = NULL;
2447 Py_XDECREF(target->end_event_obj); target->end_event_obj = NULL;
2448 Py_XDECREF(target->start_ns_event_obj); target->start_ns_event_obj = NULL;
2449 Py_XDECREF(target->end_ns_event_obj); target->end_ns_event_obj = NULL;
2450
2451 if (event_set == Py_None) {
2452 /* default is "end" only */
2453 target->end_event_obj = PyString_FromString("end");
2454 Py_RETURN_NONE;
2455 }
2456
2457 if (!PyTuple_Check(event_set)) /* FIXME: handle arbitrary sequences */
2458 goto error;
2459
2460 for (i = 0; i < PyTuple_GET_SIZE(event_set); i++) {
2461 PyObject* item = PyTuple_GET_ITEM(event_set, i);
2462 char* event;
2463 if (!PyString_Check(item))
2464 goto error;
2465 event = PyString_AS_STRING(item);
2466 if (strcmp(event, "start") == 0) {
2467 Py_INCREF(item);
2468 target->start_event_obj = item;
2469 } else if (strcmp(event, "end") == 0) {
2470 Py_INCREF(item);
2471 Py_XDECREF(target->end_event_obj);
2472 target->end_event_obj = item;
2473 } else if (strcmp(event, "start-ns") == 0) {
2474 Py_INCREF(item);
2475 Py_XDECREF(target->start_ns_event_obj);
2476 target->start_ns_event_obj = item;
2477 EXPAT(SetNamespaceDeclHandler)(
2478 self->parser,
2479 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2480 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2481 );
2482 } else if (strcmp(event, "end-ns") == 0) {
2483 Py_INCREF(item);
2484 Py_XDECREF(target->end_ns_event_obj);
2485 target->end_ns_event_obj = item;
2486 EXPAT(SetNamespaceDeclHandler)(
2487 self->parser,
2488 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2489 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2490 );
2491 } else {
2492 PyErr_Format(
2493 PyExc_ValueError,
2494 "unknown event '%s'", event
2495 );
2496 return NULL;
2497 }
2498 }
2499
2500 Py_RETURN_NONE;
2501
2502 error:
2503 PyErr_SetString(
2504 PyExc_TypeError,
2505 "invalid event tuple"
2506 );
2507 return NULL;
2508}
2509
2510static PyMethodDef xmlparser_methods[] = {
2511 {"feed", (PyCFunction) xmlparser_feed, METH_VARARGS},
2512 {"close", (PyCFunction) xmlparser_close, METH_VARARGS},
2513 {"_parse", (PyCFunction) xmlparser_parse, METH_VARARGS},
2514 {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
2515 {NULL, NULL}
2516};
2517
2518static PyObject*
2519xmlparser_getattr(XMLParserObject* self, char* name)
2520{
2521 PyObject* res;
2522
2523 res = Py_FindMethod(xmlparser_methods, (PyObject*) self, name);
2524 if (res)
2525 return res;
2526
2527 PyErr_Clear();
2528
2529 if (strcmp(name, "entity") == 0)
2530 res = self->entity;
2531 else if (strcmp(name, "target") == 0)
2532 res = self->target;
2533 else if (strcmp(name, "version") == 0) {
2534 char buffer[100];
2535 sprintf(buffer, "Expat %d.%d.%d", XML_MAJOR_VERSION,
2536 XML_MINOR_VERSION, XML_MICRO_VERSION);
2537 return PyString_FromString(buffer);
2538 } else {
2539 PyErr_SetString(PyExc_AttributeError, name);
2540 return NULL;
2541 }
2542
2543 Py_INCREF(res);
2544 return res;
2545}
2546
Neal Norwitz227b5332006-03-22 09:28:35 +00002547static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002548 PyVarObject_HEAD_INIT(NULL, 0)
2549 "XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002550 /* methods */
2551 (destructor)xmlparser_dealloc, /* tp_dealloc */
2552 0, /* tp_print */
2553 (getattrfunc)xmlparser_getattr, /* tp_getattr */
2554};
2555
2556#endif
2557
2558/* ==================================================================== */
2559/* python module interface */
2560
2561static PyMethodDef _functions[] = {
2562 {"Element", (PyCFunction) element, METH_VARARGS|METH_KEYWORDS},
2563 {"SubElement", (PyCFunction) subelement, METH_VARARGS|METH_KEYWORDS},
2564 {"TreeBuilder", (PyCFunction) treebuilder, METH_VARARGS},
2565#if defined(USE_EXPAT)
2566 {"XMLParser", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
2567 {"XMLTreeBuilder", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
2568#endif
2569 {NULL, NULL}
2570};
2571
Neal Norwitzf6657e62006-12-28 04:47:50 +00002572PyMODINIT_FUNC
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002573init_elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002574{
2575 PyObject* m;
2576 PyObject* g;
2577 char* bootstrap;
2578#if defined(USE_PYEXPAT_CAPI)
2579 struct PyExpat_CAPI* capi;
2580#endif
2581
2582 /* Patch object type */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002583 Py_Type(&Element_Type) = Py_Type(&TreeBuilder_Type) = &PyType_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002584#if defined(USE_EXPAT)
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002585 Py_Type(&XMLParser_Type) = &PyType_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002586#endif
2587
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002588 m = Py_InitModule("_elementtree", _functions);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002589 if (!m)
2590 return;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002591
2592 /* python glue code */
2593
2594 g = PyDict_New();
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002595 if (!g)
2596 return;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002597
2598 PyDict_SetItemString(g, "__builtins__", PyEval_GetBuiltins());
2599
2600 bootstrap = (
2601
2602#if (PY_VERSION_HEX >= 0x02020000 && PY_VERSION_HEX < 0x02030000)
2603 "from __future__ import generators\n" /* enable yield under 2.2 */
2604#endif
2605
2606 "from copy import copy, deepcopy\n"
2607
2608 "try:\n"
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002609 " from xml.etree import ElementTree\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002610 "except ImportError:\n"
2611 " import ElementTree\n"
2612 "ET = ElementTree\n"
2613 "del ElementTree\n"
2614
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002615 "import _elementtree as cElementTree\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002616
2617 "try:\n" /* check if copy works as is */
2618 " copy(cElementTree.Element('x'))\n"
2619 "except:\n"
2620 " def copyelement(elem):\n"
2621 " return elem\n"
2622
2623 "def Comment(text=None):\n" /* public */
2624 " element = cElementTree.Element(ET.Comment)\n"
2625 " element.text = text\n"
2626 " return element\n"
2627 "cElementTree.Comment = Comment\n"
2628
2629 "class ElementTree(ET.ElementTree):\n" /* public */
2630 " def parse(self, source, parser=None):\n"
2631 " if not hasattr(source, 'read'):\n"
2632 " source = open(source, 'rb')\n"
2633 " if parser is not None:\n"
2634 " while 1:\n"
2635 " data = source.read(65536)\n"
2636 " if not data:\n"
2637 " break\n"
2638 " parser.feed(data)\n"
2639 " self._root = parser.close()\n"
2640 " else:\n"
2641 " parser = cElementTree.XMLParser()\n"
2642 " self._root = parser._parse(source)\n"
2643 " return self._root\n"
2644 "cElementTree.ElementTree = ElementTree\n"
2645
2646 "def getiterator(node, tag=None):\n" /* helper */
2647 " if tag == '*':\n"
2648 " tag = None\n"
2649#if (PY_VERSION_HEX < 0x02020000)
2650 " nodes = []\n" /* 2.1 doesn't have yield */
2651 " if tag is None or node.tag == tag:\n"
2652 " nodes.append(node)\n"
2653 " for node in node:\n"
2654 " nodes.extend(getiterator(node, tag))\n"
2655 " return nodes\n"
2656#else
2657 " if tag is None or node.tag == tag:\n"
2658 " yield node\n"
2659 " for node in node:\n"
2660 " for node in getiterator(node, tag):\n"
2661 " yield node\n"
2662#endif
2663
2664 "def parse(source, parser=None):\n" /* public */
2665 " tree = ElementTree()\n"
2666 " tree.parse(source, parser)\n"
2667 " return tree\n"
2668 "cElementTree.parse = parse\n"
2669
2670#if (PY_VERSION_HEX < 0x02020000)
2671 "if hasattr(ET, 'iterparse'):\n"
2672 " cElementTree.iterparse = ET.iterparse\n" /* delegate on 2.1 */
2673#else
2674 "class iterparse(object):\n"
2675 " root = None\n"
2676 " def __init__(self, file, events=None):\n"
2677 " if not hasattr(file, 'read'):\n"
2678 " file = open(file, 'rb')\n"
2679 " self._file = file\n"
2680 " self._events = events\n"
2681 " def __iter__(self):\n"
2682 " events = []\n"
2683 " b = cElementTree.TreeBuilder()\n"
2684 " p = cElementTree.XMLParser(b)\n"
2685 " p._setevents(events, self._events)\n"
2686 " while 1:\n"
2687 " data = self._file.read(16384)\n"
2688 " if not data:\n"
2689 " break\n"
2690 " p.feed(data)\n"
2691 " for event in events:\n"
2692 " yield event\n"
2693 " del events[:]\n"
2694 " root = p.close()\n"
2695 " for event in events:\n"
2696 " yield event\n"
2697 " self.root = root\n"
2698 "cElementTree.iterparse = iterparse\n"
2699#endif
2700
2701 "def PI(target, text=None):\n" /* public */
2702 " element = cElementTree.Element(ET.ProcessingInstruction)\n"
2703 " element.text = target\n"
2704 " if text:\n"
2705 " element.text = element.text + ' ' + text\n"
2706 " return element\n"
2707
2708 " elem = cElementTree.Element(ET.PI)\n"
2709 " elem.text = text\n"
2710 " return elem\n"
2711 "cElementTree.PI = cElementTree.ProcessingInstruction = PI\n"
2712
2713 "def XML(text):\n" /* public */
2714 " parser = cElementTree.XMLParser()\n"
2715 " parser.feed(text)\n"
2716 " return parser.close()\n"
2717 "cElementTree.XML = cElementTree.fromstring = XML\n"
2718
2719 "def XMLID(text):\n" /* public */
2720 " tree = XML(text)\n"
2721 " ids = {}\n"
2722 " for elem in tree.getiterator():\n"
2723 " id = elem.get('id')\n"
2724 " if id:\n"
2725 " ids[id] = elem\n"
2726 " return tree, ids\n"
2727 "cElementTree.XMLID = XMLID\n"
2728
2729 "cElementTree.dump = ET.dump\n"
2730 "cElementTree.ElementPath = ElementPath = ET.ElementPath\n"
2731 "cElementTree.iselement = ET.iselement\n"
2732 "cElementTree.QName = ET.QName\n"
2733 "cElementTree.tostring = ET.tostring\n"
2734 "cElementTree.VERSION = '" VERSION "'\n"
2735 "cElementTree.__version__ = '" VERSION "'\n"
2736 "cElementTree.XMLParserError = SyntaxError\n"
2737
2738 );
2739
2740 PyRun_String(bootstrap, Py_file_input, g, NULL);
2741
2742 elementpath_obj = PyDict_GetItemString(g, "ElementPath");
2743
2744 elementtree_copyelement_obj = PyDict_GetItemString(g, "copyelement");
2745 if (elementtree_copyelement_obj) {
2746 /* reduce hack needed; enable reduce method */
2747 PyMethodDef* mp;
2748 for (mp = element_methods; mp->ml_name; mp++)
2749 if (mp->ml_meth == (PyCFunction) element_reduce) {
2750 mp->ml_name = "__reduce__";
2751 break;
2752 }
2753 } else
2754 PyErr_Clear();
2755 elementtree_deepcopy_obj = PyDict_GetItemString(g, "deepcopy");
2756 elementtree_getiterator_obj = PyDict_GetItemString(g, "getiterator");
2757
2758#if defined(USE_PYEXPAT_CAPI)
2759 /* link against pyexpat, if possible */
2760 capi = PyCObject_Import("pyexpat", "expat_CAPI");
2761 if (capi &&
2762 strcmp(capi->magic, PyExpat_CAPI_MAGIC) == 0 &&
2763 capi->size <= sizeof(*expat_capi) &&
2764 capi->MAJOR_VERSION == XML_MAJOR_VERSION &&
2765 capi->MINOR_VERSION == XML_MINOR_VERSION &&
2766 capi->MICRO_VERSION == XML_MICRO_VERSION)
2767 expat_capi = capi;
2768 else
2769 expat_capi = NULL;
2770#endif
2771
2772}