blob: 7e36ea384b1c5faa73b01a693bff75b02dedf7a5 [file] [log] [blame]
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001/*
2 * ElementTree
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003 * $Id: _elementtree.c 2657 2006-03-12 20:50:32Z fredrik $
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
5 * elementtree accelerator
6 *
7 * History:
8 * 1999-06-20 fl created (as part of sgmlop)
9 * 2001-05-29 fl effdom edition
10 * 2001-06-05 fl backported to unix; fixed bogus free in clear
11 * 2001-07-10 fl added findall helper
12 * 2003-02-27 fl elementtree edition (alpha)
13 * 2004-06-03 fl updates for elementtree 1.2
14 * 2005-01-05 fl added universal name cache, Element/SubElement factories
15 * 2005-01-06 fl moved python helpers into C module; removed 1.5.2 support
16 * 2005-01-07 fl added 2.1 support; work around broken __copy__ in 2.3
17 * 2005-01-08 fl added makeelement method; fixed path support
18 * 2005-01-10 fl optimized memory usage
19 * 2005-01-11 fl first public release (cElementTree 0.8)
20 * 2005-01-12 fl split element object into base and extras
21 * 2005-01-13 fl use tagged pointers for tail/text (cElementTree 0.9)
22 * 2005-01-17 fl added treebuilder close method
23 * 2005-01-17 fl fixed crash in getchildren
24 * 2005-01-18 fl removed observer api, added iterparse (cElementTree 0.9.3)
25 * 2005-01-23 fl revised iterparse api; added namespace event support (0.9.8)
26 * 2005-01-26 fl added VERSION module property (cElementTree 1.0)
27 * 2005-01-28 fl added remove method (1.0.1)
28 * 2005-03-01 fl added iselement function; fixed makeelement aliasing (1.0.2)
29 * 2005-03-13 fl export Comment and ProcessingInstruction/PI helpers
30 * 2005-03-26 fl added Comment and PI support to XMLParser
31 * 2005-03-27 fl event optimizations; complain about bogus events
32 * 2005-08-08 fl fixed read error handling in parse
33 * 2005-08-11 fl added runtime test for copy workaround (1.0.3)
34 * 2005-12-13 fl added expat_capi support (for xml.etree) (1.0.4)
35 * 2005-12-16 fl added support for non-standard encodings
Fredrik Lundh44ed4db2006-03-12 21:06:35 +000036 * 2006-03-08 fl fixed a couple of potential null-refs and leaks
37 * 2006-03-12 fl merge in 2.5 ssize_t changes
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000038 *
Fredrik Lundh44ed4db2006-03-12 21:06:35 +000039 * Copyright (c) 1999-2006 by Secret Labs AB. All rights reserved.
40 * Copyright (c) 1999-2006 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000041 *
42 * info@pythonware.com
43 * http://www.pythonware.com
44 */
45
Fredrik Lundh6d52b552005-12-16 22:06:43 +000046/* Licensed to PSF under a Contributor Agreement. */
47/* See http://www.python.org/2.4/license for licensing details. */
48
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000049#include "Python.h"
50
Fredrik Lundh44ed4db2006-03-12 21:06:35 +000051#define VERSION "1.0.6-snapshot"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000052
53/* -------------------------------------------------------------------- */
54/* configuration */
55
56/* Leave defined to include the expat-based XMLParser type */
57#define USE_EXPAT
58
59/* Define to to all expat calls via pyexpat's embedded expat library */
60/* #define USE_PYEXPAT_CAPI */
61
62/* An element can hold this many children without extra memory
63 allocations. */
64#define STATIC_CHILDREN 4
65
66/* For best performance, chose a value so that 80-90% of all nodes
67 have no more than the given number of children. Set this to zero
68 to minimize the size of the element structure itself (this only
69 helps if you have lots of leaf nodes with attributes). */
70
71/* Also note that pymalloc always allocates blocks in multiples of
72 eight bytes. For the current version of cElementTree, this means
73 that the number of children should be an even number, at least on
74 32-bit platforms. */
75
76/* -------------------------------------------------------------------- */
77
78#if 0
79static int memory = 0;
80#define ALLOC(size, comment)\
81do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
82#define RELEASE(size, comment)\
83do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
84#else
85#define ALLOC(size, comment)
86#define RELEASE(size, comment)
87#endif
88
89/* compiler tweaks */
90#if defined(_MSC_VER)
91#define LOCAL(type) static __inline type __fastcall
92#else
93#define LOCAL(type) static type
94#endif
95
96/* compatibility macros */
Martin v. Löwis18e16552006-02-15 17:27:45 +000097#if (PY_VERSION_HEX < 0x02050000)
98typedef int Py_ssize_t;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +000099#define lenfunc inquiry
Martin v. Löwis18e16552006-02-15 17:27:45 +0000100#endif
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000101
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000102#if (PY_VERSION_HEX < 0x02040000)
103#define PyDict_CheckExact PyDict_Check
104#if (PY_VERSION_HEX < 0x02020000)
105#define PyList_CheckExact PyList_Check
106#define PyString_CheckExact PyString_Check
107#if (PY_VERSION_HEX >= 0x01060000)
108#define Py_USING_UNICODE /* always enabled for 2.0 and 2.1 */
109#endif
110#endif
111#endif
112
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000113#if !defined(Py_RETURN_NONE)
114#define Py_RETURN_NONE return Py_INCREF(Py_None), Py_None
115#endif
116
117/* macros used to store 'join' flags in string object pointers. note
118 that all use of text and tail as object pointers must be wrapped in
119 JOIN_OBJ. see comments in the ElementObject definition for more
120 info. */
121#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
122#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
123#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~1))
124
125/* glue functions (see the init function for details) */
126static PyObject* elementtree_copyelement_obj;
127static PyObject* elementtree_deepcopy_obj;
128static PyObject* elementtree_getiterator_obj;
129static PyObject* elementpath_obj;
130
131/* helpers */
132
133LOCAL(PyObject*)
134deepcopy(PyObject* object, PyObject* memo)
135{
136 /* do a deep copy of the given object */
137
138 PyObject* args;
139 PyObject* result;
140
141 if (!elementtree_deepcopy_obj) {
142 PyErr_SetString(
143 PyExc_RuntimeError,
144 "deepcopy helper not found"
145 );
146 return NULL;
147 }
148
149 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000150 if (!args)
151 return NULL;
152
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000153 Py_INCREF(object); PyTuple_SET_ITEM(args, 0, (PyObject*) object);
154 Py_INCREF(memo); PyTuple_SET_ITEM(args, 1, (PyObject*) memo);
155
156 result = PyObject_CallObject(elementtree_deepcopy_obj, args);
157
158 Py_DECREF(args);
159
160 return result;
161}
162
163LOCAL(PyObject*)
164list_join(PyObject* list)
165{
166 /* join list elements (destroying the list in the process) */
167
168 PyObject* joiner;
169 PyObject* function;
170 PyObject* args;
171 PyObject* result;
172
173 switch (PyList_GET_SIZE(list)) {
174 case 0:
175 Py_DECREF(list);
176 return PyString_FromString("");
177 case 1:
178 result = PyList_GET_ITEM(list, 0);
179 Py_INCREF(result);
180 Py_DECREF(list);
181 return result;
182 }
183
184 /* two or more elements: slice out a suitable separator from the
185 first member, and use that to join the entire list */
186
187 joiner = PySequence_GetSlice(PyList_GET_ITEM(list, 0), 0, 0);
188 if (!joiner)
189 return NULL;
190
191 function = PyObject_GetAttrString(joiner, "join");
192 if (!function) {
193 Py_DECREF(joiner);
194 return NULL;
195 }
196
197 args = PyTuple_New(1);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000198 if (!args)
199 return NULL;
200
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000201 PyTuple_SET_ITEM(args, 0, list);
202
203 result = PyObject_CallObject(function, args);
204
205 Py_DECREF(args); /* also removes list */
206 Py_DECREF(function);
207 Py_DECREF(joiner);
208
209 return result;
210}
211
212#if (PY_VERSION_HEX < 0x02020000)
213LOCAL(int)
214PyDict_Update(PyObject* dict, PyObject* other)
215{
216 /* PyDict_Update emulation for 2.1 and earlier */
217
218 PyObject* res;
219
220 res = PyObject_CallMethod(dict, "update", "O", other);
221 if (!res)
222 return -1;
223
224 Py_DECREF(res);
225 return 0;
226}
227#endif
228
229/* -------------------------------------------------------------------- */
230/* the element type */
231
232typedef struct {
233
234 /* attributes (a dictionary object), or None if no attributes */
235 PyObject* attrib;
236
237 /* child elements */
238 int length; /* actual number of items */
239 int allocated; /* allocated items */
240
241 /* this either points to _children or to a malloced buffer */
242 PyObject* *children;
243
244 PyObject* _children[STATIC_CHILDREN];
245
246} ElementObjectExtra;
247
248typedef struct {
249 PyObject_HEAD
250
251 /* element tag (a string). */
252 PyObject* tag;
253
254 /* text before first child. note that this is a tagged pointer;
255 use JOIN_OBJ to get the object pointer. the join flag is used
256 to distinguish lists created by the tree builder from lists
257 assigned to the attribute by application code; the former
258 should be joined before being returned to the user, the latter
259 should be left intact. */
260 PyObject* text;
261
262 /* text after this element, in parent. note that this is a tagged
263 pointer; use JOIN_OBJ to get the object pointer. */
264 PyObject* tail;
265
266 ElementObjectExtra* extra;
267
268} ElementObject;
269
Neal Norwitz227b5332006-03-22 09:28:35 +0000270static PyTypeObject Element_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000271
272#define Element_CheckExact(op) ((op)->ob_type == &Element_Type)
273
274/* -------------------------------------------------------------------- */
275/* element constructor and destructor */
276
277LOCAL(int)
278element_new_extra(ElementObject* self, PyObject* attrib)
279{
280 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
281 if (!self->extra)
282 return -1;
283
284 if (!attrib)
285 attrib = Py_None;
286
287 Py_INCREF(attrib);
288 self->extra->attrib = attrib;
289
290 self->extra->length = 0;
291 self->extra->allocated = STATIC_CHILDREN;
292 self->extra->children = self->extra->_children;
293
294 return 0;
295}
296
297LOCAL(void)
298element_dealloc_extra(ElementObject* self)
299{
300 int i;
301
302 Py_DECREF(self->extra->attrib);
303
304 for (i = 0; i < self->extra->length; i++)
305 Py_DECREF(self->extra->children[i]);
306
307 if (self->extra->children != self->extra->_children)
308 PyObject_Free(self->extra->children);
309
310 PyObject_Free(self->extra);
311}
312
313LOCAL(PyObject*)
314element_new(PyObject* tag, PyObject* attrib)
315{
316 ElementObject* self;
317
318 self = PyObject_New(ElementObject, &Element_Type);
319 if (self == NULL)
320 return NULL;
321
322 /* use None for empty dictionaries */
323 if (PyDict_CheckExact(attrib) && !PyDict_Size(attrib))
324 attrib = Py_None;
325
326 self->extra = NULL;
327
328 if (attrib != Py_None) {
329
330 if (element_new_extra(self, attrib) < 0)
331 return NULL;
332
333 self->extra->length = 0;
334 self->extra->allocated = STATIC_CHILDREN;
335 self->extra->children = self->extra->_children;
336
337 }
338
339 Py_INCREF(tag);
340 self->tag = tag;
341
342 Py_INCREF(Py_None);
343 self->text = Py_None;
344
345 Py_INCREF(Py_None);
346 self->tail = Py_None;
347
348 ALLOC(sizeof(ElementObject), "create element");
349
350 return (PyObject*) self;
351}
352
353LOCAL(int)
354element_resize(ElementObject* self, int extra)
355{
356 int size;
357 PyObject* *children;
358
359 /* make sure self->children can hold the given number of extra
360 elements. set an exception and return -1 if allocation failed */
361
362 if (!self->extra)
363 element_new_extra(self, NULL);
364
365 size = self->extra->length + extra;
366
367 if (size > self->extra->allocated) {
368 /* use Python 2.4's list growth strategy */
369 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
370 if (self->extra->children != self->extra->_children) {
371 children = PyObject_Realloc(self->extra->children,
372 size * sizeof(PyObject*));
373 if (!children)
374 goto nomemory;
375 } else {
376 children = PyObject_Malloc(size * sizeof(PyObject*));
377 if (!children)
378 goto nomemory;
379 /* copy existing children from static area to malloc buffer */
380 memcpy(children, self->extra->children,
381 self->extra->length * sizeof(PyObject*));
382 }
383 self->extra->children = children;
384 self->extra->allocated = size;
385 }
386
387 return 0;
388
389 nomemory:
390 PyErr_NoMemory();
391 return -1;
392}
393
394LOCAL(int)
395element_add_subelement(ElementObject* self, PyObject* element)
396{
397 /* add a child element to a parent */
398
399 if (element_resize(self, 1) < 0)
400 return -1;
401
402 Py_INCREF(element);
403 self->extra->children[self->extra->length] = element;
404
405 self->extra->length++;
406
407 return 0;
408}
409
410LOCAL(PyObject*)
411element_get_attrib(ElementObject* self)
412{
413 /* return borrowed reference to attrib dictionary */
414 /* note: this function assumes that the extra section exists */
415
416 PyObject* res = self->extra->attrib;
417
418 if (res == Py_None) {
419 /* create missing dictionary */
420 res = PyDict_New();
421 if (!res)
422 return NULL;
423 self->extra->attrib = res;
424 }
425
426 return res;
427}
428
429LOCAL(PyObject*)
430element_get_text(ElementObject* self)
431{
432 /* return borrowed reference to text attribute */
433
434 PyObject* res = self->text;
435
436 if (JOIN_GET(res)) {
437 res = JOIN_OBJ(res);
438 if (PyList_CheckExact(res)) {
439 res = list_join(res);
440 if (!res)
441 return NULL;
442 self->text = res;
443 }
444 }
445
446 return res;
447}
448
449LOCAL(PyObject*)
450element_get_tail(ElementObject* self)
451{
452 /* return borrowed reference to text attribute */
453
454 PyObject* res = self->tail;
455
456 if (JOIN_GET(res)) {
457 res = JOIN_OBJ(res);
458 if (PyList_CheckExact(res)) {
459 res = list_join(res);
460 if (!res)
461 return NULL;
462 self->tail = res;
463 }
464 }
465
466 return res;
467}
468
469static PyObject*
470element(PyObject* self, PyObject* args, PyObject* kw)
471{
472 PyObject* elem;
473
474 PyObject* tag;
475 PyObject* attrib = NULL;
476 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag,
477 &PyDict_Type, &attrib))
478 return NULL;
479
480 if (attrib || kw) {
481 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
482 if (!attrib)
483 return NULL;
484 if (kw)
485 PyDict_Update(attrib, kw);
486 } else {
487 Py_INCREF(Py_None);
488 attrib = Py_None;
489 }
490
491 elem = element_new(tag, attrib);
492
493 Py_DECREF(attrib);
494
495 return elem;
496}
497
498static PyObject*
499subelement(PyObject* self, PyObject* args, PyObject* kw)
500{
501 PyObject* elem;
502
503 ElementObject* parent;
504 PyObject* tag;
505 PyObject* attrib = NULL;
506 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
507 &Element_Type, &parent, &tag,
508 &PyDict_Type, &attrib))
509 return NULL;
510
511 if (attrib || kw) {
512 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
513 if (!attrib)
514 return NULL;
515 if (kw)
516 PyDict_Update(attrib, kw);
517 } else {
518 Py_INCREF(Py_None);
519 attrib = Py_None;
520 }
521
522 elem = element_new(tag, attrib);
523
524 Py_DECREF(attrib);
525
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000526 if (element_add_subelement(parent, elem) < 0) {
527 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000528 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000529 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000530
531 return elem;
532}
533
534static void
535element_dealloc(ElementObject* self)
536{
537 if (self->extra)
538 element_dealloc_extra(self);
539
540 /* discard attributes */
541 Py_DECREF(self->tag);
542 Py_DECREF(JOIN_OBJ(self->text));
543 Py_DECREF(JOIN_OBJ(self->tail));
544
545 RELEASE(sizeof(ElementObject), "destroy element");
546
547 PyObject_Del(self);
548}
549
550/* -------------------------------------------------------------------- */
551/* methods (in alphabetical order) */
552
553static PyObject*
554element_append(ElementObject* self, PyObject* args)
555{
556 PyObject* element;
557 if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element))
558 return NULL;
559
560 if (element_add_subelement(self, element) < 0)
561 return NULL;
562
563 Py_RETURN_NONE;
564}
565
566static PyObject*
567element_clear(ElementObject* self, PyObject* args)
568{
569 if (!PyArg_ParseTuple(args, ":clear"))
570 return NULL;
571
572 if (self->extra) {
573 element_dealloc_extra(self);
574 self->extra = NULL;
575 }
576
577 Py_INCREF(Py_None);
578 Py_DECREF(JOIN_OBJ(self->text));
579 self->text = Py_None;
580
581 Py_INCREF(Py_None);
582 Py_DECREF(JOIN_OBJ(self->tail));
583 self->tail = Py_None;
584
585 Py_RETURN_NONE;
586}
587
588static PyObject*
589element_copy(ElementObject* self, PyObject* args)
590{
591 int i;
592 ElementObject* element;
593
594 if (!PyArg_ParseTuple(args, ":__copy__"))
595 return NULL;
596
597 element = (ElementObject*) element_new(
598 self->tag, (self->extra) ? self->extra->attrib : Py_None
599 );
600 if (!element)
601 return NULL;
602
603 Py_DECREF(JOIN_OBJ(element->text));
604 element->text = self->text;
605 Py_INCREF(JOIN_OBJ(element->text));
606
607 Py_DECREF(JOIN_OBJ(element->tail));
608 element->tail = self->tail;
609 Py_INCREF(JOIN_OBJ(element->tail));
610
611 if (self->extra) {
612
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000613 if (element_resize(element, self->extra->length) < 0) {
614 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000615 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000616 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000617
618 for (i = 0; i < self->extra->length; i++) {
619 Py_INCREF(self->extra->children[i]);
620 element->extra->children[i] = self->extra->children[i];
621 }
622
623 element->extra->length = self->extra->length;
624
625 }
626
627 return (PyObject*) element;
628}
629
630static PyObject*
631element_deepcopy(ElementObject* self, PyObject* args)
632{
633 int i;
634 ElementObject* element;
635 PyObject* tag;
636 PyObject* attrib;
637 PyObject* text;
638 PyObject* tail;
639 PyObject* id;
640
641 PyObject* memo;
642 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
643 return NULL;
644
645 tag = deepcopy(self->tag, memo);
646 if (!tag)
647 return NULL;
648
649 if (self->extra) {
650 attrib = deepcopy(self->extra->attrib, memo);
651 if (!attrib) {
652 Py_DECREF(tag);
653 return NULL;
654 }
655 } else {
656 Py_INCREF(Py_None);
657 attrib = Py_None;
658 }
659
660 element = (ElementObject*) element_new(tag, attrib);
661
662 Py_DECREF(tag);
663 Py_DECREF(attrib);
664
665 if (!element)
666 return NULL;
667
668 text = deepcopy(JOIN_OBJ(self->text), memo);
669 if (!text)
670 goto error;
671 Py_DECREF(element->text);
672 element->text = JOIN_SET(text, JOIN_GET(self->text));
673
674 tail = deepcopy(JOIN_OBJ(self->tail), memo);
675 if (!tail)
676 goto error;
677 Py_DECREF(element->tail);
678 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
679
680 if (self->extra) {
681
682 if (element_resize(element, self->extra->length) < 0)
683 goto error;
684
685 for (i = 0; i < self->extra->length; i++) {
686 PyObject* child = deepcopy(self->extra->children[i], memo);
687 if (!child) {
688 element->extra->length = i;
689 goto error;
690 }
691 element->extra->children[i] = child;
692 }
693
694 element->extra->length = self->extra->length;
695
696 }
697
698 /* add object to memo dictionary (so deepcopy won't visit it again) */
699 id = PyInt_FromLong((Py_uintptr_t) self);
700
701 i = PyDict_SetItem(memo, id, (PyObject*) element);
702
703 Py_DECREF(id);
704
705 if (i < 0)
706 goto error;
707
708 return (PyObject*) element;
709
710 error:
711 Py_DECREF(element);
712 return NULL;
713}
714
715LOCAL(int)
716checkpath(PyObject* tag)
717{
718 int i, check = 1;
719
720 /* check if a tag contains an xpath character */
721
722#define PATHCHAR(ch) (ch == '/' || ch == '*' || ch == '[' || ch == '@')
723
724#if defined(Py_USING_UNICODE)
725 if (PyUnicode_Check(tag)) {
726 Py_UNICODE *p = PyUnicode_AS_UNICODE(tag);
727 for (i = 0; i < PyUnicode_GET_SIZE(tag); i++) {
728 if (p[i] == '{')
729 check = 0;
730 else if (p[i] == '}')
731 check = 1;
732 else if (check && PATHCHAR(p[i]))
733 return 1;
734 }
735 return 0;
736 }
737#endif
738 if (PyString_Check(tag)) {
739 char *p = PyString_AS_STRING(tag);
740 for (i = 0; i < PyString_GET_SIZE(tag); i++) {
741 if (p[i] == '{')
742 check = 0;
743 else if (p[i] == '}')
744 check = 1;
745 else if (check && PATHCHAR(p[i]))
746 return 1;
747 }
748 return 0;
749 }
750
751 return 1; /* unknown type; might be path expression */
752}
753
754static PyObject*
755element_find(ElementObject* self, PyObject* args)
756{
757 int i;
758
759 PyObject* tag;
760 if (!PyArg_ParseTuple(args, "O:find", &tag))
761 return NULL;
762
763 if (checkpath(tag))
764 return PyObject_CallMethod(
765 elementpath_obj, "find", "OO", self, tag
766 );
767
768 if (!self->extra)
769 Py_RETURN_NONE;
770
771 for (i = 0; i < self->extra->length; i++) {
772 PyObject* item = self->extra->children[i];
773 if (Element_CheckExact(item) &&
774 PyObject_Compare(((ElementObject*)item)->tag, tag) == 0) {
775 Py_INCREF(item);
776 return item;
777 }
778 }
779
780 Py_RETURN_NONE;
781}
782
783static PyObject*
784element_findtext(ElementObject* self, PyObject* args)
785{
786 int i;
787
788 PyObject* tag;
789 PyObject* default_value = Py_None;
790 if (!PyArg_ParseTuple(args, "O|O:findtext", &tag, &default_value))
791 return NULL;
792
793 if (checkpath(tag))
794 return PyObject_CallMethod(
795 elementpath_obj, "findtext", "OOO", self, tag, default_value
796 );
797
798 if (!self->extra) {
799 Py_INCREF(default_value);
800 return default_value;
801 }
802
803 for (i = 0; i < self->extra->length; i++) {
804 ElementObject* item = (ElementObject*) self->extra->children[i];
805 if (Element_CheckExact(item) && !PyObject_Compare(item->tag, tag)) {
806 PyObject* text = element_get_text(item);
807 if (text == Py_None)
808 return PyString_FromString("");
809 Py_INCREF(text);
810 return text;
811 }
812 }
813
814 Py_INCREF(default_value);
815 return default_value;
816}
817
818static PyObject*
819element_findall(ElementObject* self, PyObject* args)
820{
821 int i;
822 PyObject* out;
823
824 PyObject* tag;
825 if (!PyArg_ParseTuple(args, "O:findall", &tag))
826 return NULL;
827
828 if (checkpath(tag))
829 return PyObject_CallMethod(
830 elementpath_obj, "findall", "OO", self, tag
831 );
832
833 out = PyList_New(0);
834 if (!out)
835 return NULL;
836
837 if (!self->extra)
838 return out;
839
840 for (i = 0; i < self->extra->length; i++) {
841 PyObject* item = self->extra->children[i];
842 if (Element_CheckExact(item) &&
843 PyObject_Compare(((ElementObject*)item)->tag, tag) == 0) {
844 if (PyList_Append(out, item) < 0) {
845 Py_DECREF(out);
846 return NULL;
847 }
848 }
849 }
850
851 return out;
852}
853
854static PyObject*
855element_get(ElementObject* self, PyObject* args)
856{
857 PyObject* value;
858
859 PyObject* key;
860 PyObject* default_value = Py_None;
861 if (!PyArg_ParseTuple(args, "O|O:get", &key, &default_value))
862 return NULL;
863
864 if (!self->extra || self->extra->attrib == Py_None)
865 value = default_value;
866 else {
867 value = PyDict_GetItem(self->extra->attrib, key);
868 if (!value)
869 value = default_value;
870 }
871
872 Py_INCREF(value);
873 return value;
874}
875
876static PyObject*
877element_getchildren(ElementObject* self, PyObject* args)
878{
879 int i;
880 PyObject* list;
881
882 if (!PyArg_ParseTuple(args, ":getchildren"))
883 return NULL;
884
885 if (!self->extra)
886 return PyList_New(0);
887
888 list = PyList_New(self->extra->length);
889 if (!list)
890 return NULL;
891
892 for (i = 0; i < self->extra->length; i++) {
893 PyObject* item = self->extra->children[i];
894 Py_INCREF(item);
895 PyList_SET_ITEM(list, i, item);
896 }
897
898 return list;
899}
900
901static PyObject*
902element_getiterator(ElementObject* self, PyObject* args)
903{
904 PyObject* result;
905
906 PyObject* tag = Py_None;
907 if (!PyArg_ParseTuple(args, "|O:getiterator", &tag))
908 return NULL;
909
910 if (!elementtree_getiterator_obj) {
911 PyErr_SetString(
912 PyExc_RuntimeError,
913 "getiterator helper not found"
914 );
915 return NULL;
916 }
917
918 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000919 if (!args)
920 return NULL;
Neal Norwitz02876df2006-02-07 06:58:52 +0000921
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000922 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
923 Py_INCREF(tag); PyTuple_SET_ITEM(args, 1, (PyObject*) tag);
924
925 result = PyObject_CallObject(elementtree_getiterator_obj, args);
926
927 Py_DECREF(args);
928
929 return result;
930}
931
932static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000933element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000934{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000935 ElementObject* self = (ElementObject*) self_;
936
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000937 if (!self->extra || index < 0 || index >= self->extra->length) {
938 PyErr_SetString(
939 PyExc_IndexError,
940 "child index out of range"
941 );
942 return NULL;
943 }
944
945 Py_INCREF(self->extra->children[index]);
946 return self->extra->children[index];
947}
948
949static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000950element_getslice(PyObject* self_, Py_ssize_t start, Py_ssize_t end)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000951{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000952 ElementObject* self = (ElementObject*) self_;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000953 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000954 PyObject* list;
955
956 if (!self->extra)
957 return PyList_New(0);
958
959 /* standard clamping */
960 if (start < 0)
961 start = 0;
962 if (end < 0)
963 end = 0;
964 if (end > self->extra->length)
965 end = self->extra->length;
966 if (start > end)
967 start = end;
968
969 list = PyList_New(end - start);
970 if (!list)
971 return NULL;
972
973 for (i = start; i < end; i++) {
974 PyObject* item = self->extra->children[i];
975 Py_INCREF(item);
976 PyList_SET_ITEM(list, i - start, item);
977 }
978
979 return list;
980}
981
982static PyObject*
983element_insert(ElementObject* self, PyObject* args)
984{
985 int i;
986
987 int index;
988 PyObject* element;
989 if (!PyArg_ParseTuple(args, "iO!:insert", &index,
990 &Element_Type, &element))
991 return NULL;
992
993 if (!self->extra)
994 element_new_extra(self, NULL);
995
996 if (index < 0)
997 index = 0;
998 if (index > self->extra->length)
999 index = self->extra->length;
1000
1001 if (element_resize(self, 1) < 0)
1002 return NULL;
1003
1004 for (i = self->extra->length; i > index; i--)
1005 self->extra->children[i] = self->extra->children[i-1];
1006
1007 Py_INCREF(element);
1008 self->extra->children[index] = element;
1009
1010 self->extra->length++;
1011
1012 Py_RETURN_NONE;
1013}
1014
1015static PyObject*
1016element_items(ElementObject* self, PyObject* args)
1017{
1018 if (!PyArg_ParseTuple(args, ":items"))
1019 return NULL;
1020
1021 if (!self->extra || self->extra->attrib == Py_None)
1022 return PyList_New(0);
1023
1024 return PyDict_Items(self->extra->attrib);
1025}
1026
1027static PyObject*
1028element_keys(ElementObject* self, PyObject* args)
1029{
1030 if (!PyArg_ParseTuple(args, ":keys"))
1031 return NULL;
1032
1033 if (!self->extra || self->extra->attrib == Py_None)
1034 return PyList_New(0);
1035
1036 return PyDict_Keys(self->extra->attrib);
1037}
1038
Martin v. Löwis18e16552006-02-15 17:27:45 +00001039static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001040element_length(ElementObject* self)
1041{
1042 if (!self->extra)
1043 return 0;
1044
1045 return self->extra->length;
1046}
1047
1048static PyObject*
1049element_makeelement(PyObject* self, PyObject* args, PyObject* kw)
1050{
1051 PyObject* elem;
1052
1053 PyObject* tag;
1054 PyObject* attrib;
1055 if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib))
1056 return NULL;
1057
1058 attrib = PyDict_Copy(attrib);
1059 if (!attrib)
1060 return NULL;
1061
1062 elem = element_new(tag, attrib);
1063
1064 Py_DECREF(attrib);
1065
1066 return elem;
1067}
1068
1069static PyObject*
1070element_reduce(ElementObject* self, PyObject* args)
1071{
1072 if (!PyArg_ParseTuple(args, ":__reduce__"))
1073 return NULL;
1074
1075 /* Hack alert: This method is used to work around a __copy__
1076 problem on certain 2.3 and 2.4 versions. To save time and
1077 simplify the code, we create the copy in here, and use a dummy
1078 copyelement helper to trick the copy module into doing the
1079 right thing. */
1080
1081 if (!elementtree_copyelement_obj) {
1082 PyErr_SetString(
1083 PyExc_RuntimeError,
1084 "copyelement helper not found"
1085 );
1086 return NULL;
1087 }
1088
1089 return Py_BuildValue(
1090 "O(N)", elementtree_copyelement_obj, element_copy(self, args)
1091 );
1092}
1093
1094static PyObject*
1095element_remove(ElementObject* self, PyObject* args)
1096{
1097 int i;
1098
1099 PyObject* element;
1100 if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element))
1101 return NULL;
1102
1103 if (!self->extra) {
1104 /* element has no children, so raise exception */
1105 PyErr_SetString(
1106 PyExc_ValueError,
1107 "list.remove(x): x not in list"
1108 );
1109 return NULL;
1110 }
1111
1112 for (i = 0; i < self->extra->length; i++) {
1113 if (self->extra->children[i] == element)
1114 break;
1115 if (PyObject_Compare(self->extra->children[i], element) == 0)
1116 break;
1117 }
1118
1119 if (i == self->extra->length) {
1120 /* element is not in children, so raise exception */
1121 PyErr_SetString(
1122 PyExc_ValueError,
1123 "list.remove(x): x not in list"
1124 );
1125 return NULL;
1126 }
1127
1128 Py_DECREF(self->extra->children[i]);
1129
1130 self->extra->length--;
1131
1132 for (; i < self->extra->length; i++)
1133 self->extra->children[i] = self->extra->children[i+1];
1134
1135 Py_RETURN_NONE;
1136}
1137
1138static PyObject*
1139element_repr(ElementObject* self)
1140{
1141 PyObject* repr;
1142 char buffer[100];
1143
1144 repr = PyString_FromString("<Element ");
1145
1146 PyString_ConcatAndDel(&repr, PyObject_Repr(self->tag));
1147
1148 sprintf(buffer, " at %p>", self);
1149 PyString_ConcatAndDel(&repr, PyString_FromString(buffer));
1150
1151 return repr;
1152}
1153
1154static PyObject*
1155element_set(ElementObject* self, PyObject* args)
1156{
1157 PyObject* attrib;
1158
1159 PyObject* key;
1160 PyObject* value;
1161 if (!PyArg_ParseTuple(args, "OO:set", &key, &value))
1162 return NULL;
1163
1164 if (!self->extra)
1165 element_new_extra(self, NULL);
1166
1167 attrib = element_get_attrib(self);
1168 if (!attrib)
1169 return NULL;
1170
1171 if (PyDict_SetItem(attrib, key, value) < 0)
1172 return NULL;
1173
1174 Py_RETURN_NONE;
1175}
1176
1177static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001178element_setslice(PyObject* self_, Py_ssize_t start, Py_ssize_t end, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001179{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001180 ElementObject* self = (ElementObject*) self_;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001181 int i, new, old;
1182 PyObject* recycle = NULL;
1183
1184 if (!self->extra)
1185 element_new_extra(self, NULL);
1186
1187 /* standard clamping */
1188 if (start < 0)
1189 start = 0;
1190 if (end < 0)
1191 end = 0;
1192 if (end > self->extra->length)
1193 end = self->extra->length;
1194 if (start > end)
1195 start = end;
1196
1197 old = end - start;
1198
1199 if (item == NULL)
1200 new = 0;
1201 else if (PyList_CheckExact(item)) {
1202 new = PyList_GET_SIZE(item);
1203 } else {
1204 /* FIXME: support arbitrary sequences? */
1205 PyErr_Format(
1206 PyExc_TypeError,
1207 "expected list, not \"%.200s\"", item->ob_type->tp_name
1208 );
1209 return -1;
1210 }
1211
1212 if (old > 0) {
1213 /* to avoid recursive calls to this method (via decref), move
1214 old items to the recycle bin here, and get rid of them when
1215 we're done modifying the element */
1216 recycle = PyList_New(old);
1217 for (i = 0; i < old; i++)
1218 PyList_SET_ITEM(recycle, i, self->extra->children[i + start]);
1219 }
1220
1221 if (new < old) {
1222 /* delete slice */
1223 for (i = end; i < self->extra->length; i++)
1224 self->extra->children[i + new - old] = self->extra->children[i];
1225 } else if (new > old) {
1226 /* insert slice */
1227 if (element_resize(self, new - old) < 0)
1228 return -1;
1229 for (i = self->extra->length-1; i >= end; i--)
1230 self->extra->children[i + new - old] = self->extra->children[i];
1231 }
1232
1233 /* replace the slice */
1234 for (i = 0; i < new; i++) {
1235 PyObject* element = PyList_GET_ITEM(item, i);
1236 Py_INCREF(element);
1237 self->extra->children[i + start] = element;
1238 }
1239
1240 self->extra->length += new - old;
1241
1242 /* discard the recycle bin, and everything in it */
1243 Py_XDECREF(recycle);
1244
1245 return 0;
1246}
1247
1248static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001249element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001250{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001251 ElementObject* self = (ElementObject*) self_;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001252 int i;
1253 PyObject* old;
1254
1255 if (!self->extra || index < 0 || index >= self->extra->length) {
1256 PyErr_SetString(
1257 PyExc_IndexError,
1258 "child assignment index out of range");
1259 return -1;
1260 }
1261
1262 old = self->extra->children[index];
1263
1264 if (item) {
1265 Py_INCREF(item);
1266 self->extra->children[index] = item;
1267 } else {
1268 self->extra->length--;
1269 for (i = index; i < self->extra->length; i++)
1270 self->extra->children[i] = self->extra->children[i+1];
1271 }
1272
1273 Py_DECREF(old);
1274
1275 return 0;
1276}
1277
1278static PyMethodDef element_methods[] = {
1279
1280 {"clear", (PyCFunction) element_clear, METH_VARARGS},
1281
1282 {"get", (PyCFunction) element_get, METH_VARARGS},
1283 {"set", (PyCFunction) element_set, METH_VARARGS},
1284
1285 {"find", (PyCFunction) element_find, METH_VARARGS},
1286 {"findtext", (PyCFunction) element_findtext, METH_VARARGS},
1287 {"findall", (PyCFunction) element_findall, METH_VARARGS},
1288
1289 {"append", (PyCFunction) element_append, METH_VARARGS},
1290 {"insert", (PyCFunction) element_insert, METH_VARARGS},
1291 {"remove", (PyCFunction) element_remove, METH_VARARGS},
1292
1293 {"getiterator", (PyCFunction) element_getiterator, METH_VARARGS},
1294 {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS},
1295
1296 {"items", (PyCFunction) element_items, METH_VARARGS},
1297 {"keys", (PyCFunction) element_keys, METH_VARARGS},
1298
1299 {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS},
1300
1301 {"__copy__", (PyCFunction) element_copy, METH_VARARGS},
1302 {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS},
1303
1304 /* Some 2.3 and 2.4 versions do not handle the __copy__ method on
1305 C objects correctly, so we have to fake it using a __reduce__-
1306 based hack (see the element_reduce implementation above for
1307 details). */
1308
1309 /* The behaviour has been changed in 2.3.5 and 2.4.1, so we're
1310 using a runtime test to figure out if we need to fake things
1311 or now (see the init code below). The following entry is
1312 enabled only if the hack is needed. */
1313
1314 {"!__reduce__", (PyCFunction) element_reduce, METH_VARARGS},
1315
1316 {NULL, NULL}
1317};
1318
1319static PyObject*
1320element_getattr(ElementObject* self, char* name)
1321{
1322 PyObject* res;
1323
1324 res = Py_FindMethod(element_methods, (PyObject*) self, name);
1325 if (res)
1326 return res;
1327
1328 PyErr_Clear();
1329
1330 if (strcmp(name, "tag") == 0)
1331 res = self->tag;
1332 else if (strcmp(name, "text") == 0)
1333 res = element_get_text(self);
1334 else if (strcmp(name, "tail") == 0) {
1335 res = element_get_tail(self);
1336 } else if (strcmp(name, "attrib") == 0) {
1337 if (!self->extra)
1338 element_new_extra(self, NULL);
1339 res = element_get_attrib(self);
1340 } else {
1341 PyErr_SetString(PyExc_AttributeError, name);
1342 return NULL;
1343 }
1344
1345 if (!res)
1346 return NULL;
1347
1348 Py_INCREF(res);
1349 return res;
1350}
1351
1352static int
1353element_setattr(ElementObject* self, const char* name, PyObject* value)
1354{
1355 if (value == NULL) {
1356 PyErr_SetString(
1357 PyExc_AttributeError,
1358 "can't delete element attributes"
1359 );
1360 return -1;
1361 }
1362
1363 if (strcmp(name, "tag") == 0) {
1364 Py_DECREF(self->tag);
1365 self->tag = value;
1366 Py_INCREF(self->tag);
1367 } else if (strcmp(name, "text") == 0) {
1368 Py_DECREF(JOIN_OBJ(self->text));
1369 self->text = value;
1370 Py_INCREF(self->text);
1371 } else if (strcmp(name, "tail") == 0) {
1372 Py_DECREF(JOIN_OBJ(self->tail));
1373 self->tail = value;
1374 Py_INCREF(self->tail);
1375 } else if (strcmp(name, "attrib") == 0) {
1376 if (!self->extra)
1377 element_new_extra(self, NULL);
1378 Py_DECREF(self->extra->attrib);
1379 self->extra->attrib = value;
1380 Py_INCREF(self->extra->attrib);
1381 } else {
1382 PyErr_SetString(PyExc_AttributeError, name);
1383 return -1;
1384 }
1385
1386 return 0;
1387}
1388
1389static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001390 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001391 0, /* sq_concat */
1392 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001393 element_getitem,
1394 element_getslice,
1395 element_setitem,
1396 element_setslice,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001397};
1398
Neal Norwitz227b5332006-03-22 09:28:35 +00001399static PyTypeObject Element_Type = {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001400 PyObject_HEAD_INIT(NULL)
1401 0, "Element", sizeof(ElementObject), 0,
1402 /* methods */
1403 (destructor)element_dealloc, /* tp_dealloc */
1404 0, /* tp_print */
1405 (getattrfunc)element_getattr, /* tp_getattr */
1406 (setattrfunc)element_setattr, /* tp_setattr */
1407 0, /* tp_compare */
1408 (reprfunc)element_repr, /* tp_repr */
1409 0, /* tp_as_number */
1410 &element_as_sequence, /* tp_as_sequence */
1411};
1412
1413/* ==================================================================== */
1414/* the tree builder type */
1415
1416typedef struct {
1417 PyObject_HEAD
1418
1419 PyObject* root; /* root node (first created node) */
1420
1421 ElementObject* this; /* current node */
1422 ElementObject* last; /* most recently created node */
1423
1424 PyObject* data; /* data collector (string or list), or NULL */
1425
1426 PyObject* stack; /* element stack */
1427 int index; /* current stack size (0=empty) */
1428
1429 /* element tracing */
1430 PyObject* events; /* list of events, or NULL if not collecting */
1431 PyObject* start_event_obj; /* event objects (NULL to ignore) */
1432 PyObject* end_event_obj;
1433 PyObject* start_ns_event_obj;
1434 PyObject* end_ns_event_obj;
1435
1436} TreeBuilderObject;
1437
Neal Norwitz227b5332006-03-22 09:28:35 +00001438static PyTypeObject TreeBuilder_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001439
1440#define TreeBuilder_CheckExact(op) ((op)->ob_type == &TreeBuilder_Type)
1441
1442/* -------------------------------------------------------------------- */
1443/* constructor and destructor */
1444
1445LOCAL(PyObject*)
1446treebuilder_new(void)
1447{
1448 TreeBuilderObject* self;
1449
1450 self = PyObject_New(TreeBuilderObject, &TreeBuilder_Type);
1451 if (self == NULL)
1452 return NULL;
1453
1454 self->root = NULL;
1455
1456 Py_INCREF(Py_None);
1457 self->this = (ElementObject*) Py_None;
1458
1459 Py_INCREF(Py_None);
1460 self->last = (ElementObject*) Py_None;
1461
1462 self->data = NULL;
1463
1464 self->stack = PyList_New(20);
1465 self->index = 0;
1466
1467 self->events = NULL;
1468 self->start_event_obj = self->end_event_obj = NULL;
1469 self->start_ns_event_obj = self->end_ns_event_obj = NULL;
1470
1471 ALLOC(sizeof(TreeBuilderObject), "create treebuilder");
1472
1473 return (PyObject*) self;
1474}
1475
1476static PyObject*
1477treebuilder(PyObject* _self, PyObject* args)
1478{
1479 if (!PyArg_ParseTuple(args, ":TreeBuilder"))
1480 return NULL;
1481
1482 return treebuilder_new();
1483}
1484
1485static void
1486treebuilder_dealloc(TreeBuilderObject* self)
1487{
1488 Py_XDECREF(self->end_ns_event_obj);
1489 Py_XDECREF(self->start_ns_event_obj);
1490 Py_XDECREF(self->end_event_obj);
1491 Py_XDECREF(self->start_event_obj);
1492 Py_XDECREF(self->events);
1493 Py_DECREF(self->stack);
1494 Py_XDECREF(self->data);
1495 Py_DECREF(self->last);
1496 Py_DECREF(self->this);
1497 Py_XDECREF(self->root);
1498
1499 RELEASE(sizeof(TreeBuilderObject), "destroy treebuilder");
1500
1501 PyObject_Del(self);
1502}
1503
1504/* -------------------------------------------------------------------- */
1505/* handlers */
1506
1507LOCAL(PyObject*)
1508treebuilder_handle_xml(TreeBuilderObject* self, PyObject* encoding,
1509 PyObject* standalone)
1510{
1511 Py_RETURN_NONE;
1512}
1513
1514LOCAL(PyObject*)
1515treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
1516 PyObject* attrib)
1517{
1518 PyObject* node;
1519 PyObject* this;
1520
1521 if (self->data) {
1522 if (self->this == self->last) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001523 Py_DECREF(JOIN_OBJ(self->last->text));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001524 self->last->text = JOIN_SET(
1525 self->data, PyList_CheckExact(self->data)
1526 );
1527 } else {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001528 Py_DECREF(JOIN_OBJ(self->last->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001529 self->last->tail = JOIN_SET(
1530 self->data, PyList_CheckExact(self->data)
1531 );
1532 }
1533 self->data = NULL;
1534 }
1535
1536 node = element_new(tag, attrib);
1537 if (!node)
1538 return NULL;
1539
1540 this = (PyObject*) self->this;
1541
1542 if (this != Py_None) {
1543 if (element_add_subelement((ElementObject*) this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001544 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001545 } else {
1546 if (self->root) {
1547 PyErr_SetString(
1548 PyExc_SyntaxError,
1549 "multiple elements on top level"
1550 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001551 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001552 }
1553 Py_INCREF(node);
1554 self->root = node;
1555 }
1556
1557 if (self->index < PyList_GET_SIZE(self->stack)) {
1558 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001559 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001560 Py_INCREF(this);
1561 } else {
1562 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001563 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001564 }
1565 self->index++;
1566
1567 Py_DECREF(this);
1568 Py_INCREF(node);
1569 self->this = (ElementObject*) node;
1570
1571 Py_DECREF(self->last);
1572 Py_INCREF(node);
1573 self->last = (ElementObject*) node;
1574
1575 if (self->start_event_obj) {
1576 PyObject* res;
1577 PyObject* action = self->start_event_obj;
1578 res = PyTuple_New(2);
1579 if (res) {
1580 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
1581 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
1582 PyList_Append(self->events, res);
1583 Py_DECREF(res);
1584 } else
1585 PyErr_Clear(); /* FIXME: propagate error */
1586 }
1587
1588 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001589
1590 error:
1591 Py_DECREF(node);
1592 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001593}
1594
1595LOCAL(PyObject*)
1596treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
1597{
1598 if (!self->data) {
1599 /* store the first item as is */
1600 Py_INCREF(data); self->data = data;
1601 } else {
1602 /* more than one item; use a list to collect items */
1603 if (PyString_CheckExact(self->data) && self->data->ob_refcnt == 1 &&
1604 PyString_CheckExact(data) && PyString_GET_SIZE(data) == 1) {
1605 /* expat often generates single character data sections; handle
1606 the most common case by resizing the existing string... */
1607 int size = PyString_GET_SIZE(self->data);
1608 if (_PyString_Resize(&self->data, size + 1) < 0)
1609 return NULL;
1610 PyString_AS_STRING(self->data)[size] = PyString_AS_STRING(data)[0];
1611 } else if (PyList_CheckExact(self->data)) {
1612 if (PyList_Append(self->data, data) < 0)
1613 return NULL;
1614 } else {
1615 PyObject* list = PyList_New(2);
1616 if (!list)
1617 return NULL;
1618 PyList_SET_ITEM(list, 0, self->data);
1619 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
1620 self->data = list;
1621 }
1622 }
1623
1624 Py_RETURN_NONE;
1625}
1626
1627LOCAL(PyObject*)
1628treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
1629{
1630 PyObject* item;
1631
1632 if (self->data) {
1633 if (self->this == self->last) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001634 Py_DECREF(JOIN_OBJ(self->last->text));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001635 self->last->text = JOIN_SET(
1636 self->data, PyList_CheckExact(self->data)
1637 );
1638 } else {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001639 Py_DECREF(JOIN_OBJ(self->last->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001640 self->last->tail = JOIN_SET(
1641 self->data, PyList_CheckExact(self->data)
1642 );
1643 }
1644 self->data = NULL;
1645 }
1646
1647 if (self->index == 0) {
1648 PyErr_SetString(
1649 PyExc_IndexError,
1650 "pop from empty stack"
1651 );
1652 return NULL;
1653 }
1654
1655 self->index--;
1656
1657 item = PyList_GET_ITEM(self->stack, self->index);
1658 Py_INCREF(item);
1659
1660 Py_DECREF(self->last);
1661
1662 self->last = (ElementObject*) self->this;
1663 self->this = (ElementObject*) item;
1664
1665 if (self->end_event_obj) {
1666 PyObject* res;
1667 PyObject* action = self->end_event_obj;
1668 PyObject* node = (PyObject*) self->last;
1669 res = PyTuple_New(2);
1670 if (res) {
1671 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
1672 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
1673 PyList_Append(self->events, res);
1674 Py_DECREF(res);
1675 } else
1676 PyErr_Clear(); /* FIXME: propagate error */
1677 }
1678
1679 Py_INCREF(self->last);
1680 return (PyObject*) self->last;
1681}
1682
1683LOCAL(void)
1684treebuilder_handle_namespace(TreeBuilderObject* self, int start,
1685 const char* prefix, const char *uri)
1686{
1687 PyObject* res;
1688 PyObject* action;
1689 PyObject* parcel;
1690
1691 if (!self->events)
1692 return;
1693
1694 if (start) {
1695 if (!self->start_ns_event_obj)
1696 return;
1697 action = self->start_ns_event_obj;
1698 /* FIXME: prefix and uri use utf-8 encoding! */
1699 parcel = Py_BuildValue("ss", (prefix) ? prefix : "", uri);
1700 if (!parcel)
1701 return;
1702 Py_INCREF(action);
1703 } else {
1704 if (!self->end_ns_event_obj)
1705 return;
1706 action = self->end_ns_event_obj;
1707 Py_INCREF(action);
1708 parcel = Py_None;
1709 Py_INCREF(parcel);
1710 }
1711
1712 res = PyTuple_New(2);
1713
1714 if (res) {
1715 PyTuple_SET_ITEM(res, 0, action);
1716 PyTuple_SET_ITEM(res, 1, parcel);
1717 PyList_Append(self->events, res);
1718 Py_DECREF(res);
1719 } else
1720 PyErr_Clear(); /* FIXME: propagate error */
1721}
1722
1723/* -------------------------------------------------------------------- */
1724/* methods (in alphabetical order) */
1725
1726static PyObject*
1727treebuilder_data(TreeBuilderObject* self, PyObject* args)
1728{
1729 PyObject* data;
1730 if (!PyArg_ParseTuple(args, "O:data", &data))
1731 return NULL;
1732
1733 return treebuilder_handle_data(self, data);
1734}
1735
1736static PyObject*
1737treebuilder_end(TreeBuilderObject* self, PyObject* args)
1738{
1739 PyObject* tag;
1740 if (!PyArg_ParseTuple(args, "O:end", &tag))
1741 return NULL;
1742
1743 return treebuilder_handle_end(self, tag);
1744}
1745
1746LOCAL(PyObject*)
1747treebuilder_done(TreeBuilderObject* self)
1748{
1749 PyObject* res;
1750
1751 /* FIXME: check stack size? */
1752
1753 if (self->root)
1754 res = self->root;
1755 else
1756 res = Py_None;
1757
1758 Py_INCREF(res);
1759 return res;
1760}
1761
1762static PyObject*
1763treebuilder_close(TreeBuilderObject* self, PyObject* args)
1764{
1765 if (!PyArg_ParseTuple(args, ":close"))
1766 return NULL;
1767
1768 return treebuilder_done(self);
1769}
1770
1771static PyObject*
1772treebuilder_start(TreeBuilderObject* self, PyObject* args)
1773{
1774 PyObject* tag;
1775 PyObject* attrib = Py_None;
1776 if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib))
1777 return NULL;
1778
1779 return treebuilder_handle_start(self, tag, attrib);
1780}
1781
1782static PyObject*
1783treebuilder_xml(TreeBuilderObject* self, PyObject* args)
1784{
1785 PyObject* encoding;
1786 PyObject* standalone;
1787 if (!PyArg_ParseTuple(args, "OO:xml", &encoding, &standalone))
1788 return NULL;
1789
1790 return treebuilder_handle_xml(self, encoding, standalone);
1791}
1792
1793static PyMethodDef treebuilder_methods[] = {
1794 {"data", (PyCFunction) treebuilder_data, METH_VARARGS},
1795 {"start", (PyCFunction) treebuilder_start, METH_VARARGS},
1796 {"end", (PyCFunction) treebuilder_end, METH_VARARGS},
1797 {"xml", (PyCFunction) treebuilder_xml, METH_VARARGS},
1798 {"close", (PyCFunction) treebuilder_close, METH_VARARGS},
1799 {NULL, NULL}
1800};
1801
1802static PyObject*
1803treebuilder_getattr(TreeBuilderObject* self, char* name)
1804{
1805 return Py_FindMethod(treebuilder_methods, (PyObject*) self, name);
1806}
1807
Neal Norwitz227b5332006-03-22 09:28:35 +00001808static PyTypeObject TreeBuilder_Type = {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001809 PyObject_HEAD_INIT(NULL)
1810 0, "TreeBuilder", sizeof(TreeBuilderObject), 0,
1811 /* methods */
1812 (destructor)treebuilder_dealloc, /* tp_dealloc */
1813 0, /* tp_print */
1814 (getattrfunc)treebuilder_getattr, /* tp_getattr */
1815};
1816
1817/* ==================================================================== */
1818/* the expat interface */
1819
1820#if defined(USE_EXPAT)
1821
1822#include "expat.h"
1823
1824#if defined(USE_PYEXPAT_CAPI)
1825#include "pyexpat.h"
1826static struct PyExpat_CAPI* expat_capi;
1827#define EXPAT(func) (expat_capi->func)
1828#else
1829#define EXPAT(func) (XML_##func)
1830#endif
1831
1832typedef struct {
1833 PyObject_HEAD
1834
1835 XML_Parser parser;
1836
1837 PyObject* target;
1838 PyObject* entity;
1839
1840 PyObject* names;
1841
1842 PyObject* handle_xml;
1843 PyObject* handle_start;
1844 PyObject* handle_data;
1845 PyObject* handle_end;
1846
1847 PyObject* handle_comment;
1848 PyObject* handle_pi;
1849
1850} XMLParserObject;
1851
Neal Norwitz227b5332006-03-22 09:28:35 +00001852static PyTypeObject XMLParser_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001853
1854/* helpers */
1855
1856#if defined(Py_USING_UNICODE)
1857LOCAL(int)
1858checkstring(const char* string, int size)
1859{
1860 int i;
1861
1862 /* check if an 8-bit string contains UTF-8 characters */
1863 for (i = 0; i < size; i++)
1864 if (string[i] & 0x80)
1865 return 1;
1866
1867 return 0;
1868}
1869#endif
1870
1871LOCAL(PyObject*)
1872makestring(const char* string, int size)
1873{
1874 /* convert a UTF-8 string to either a 7-bit ascii string or a
1875 Unicode string */
1876
1877#if defined(Py_USING_UNICODE)
1878 if (checkstring(string, size))
1879 return PyUnicode_DecodeUTF8(string, size, "strict");
1880#endif
1881
1882 return PyString_FromStringAndSize(string, size);
1883}
1884
1885LOCAL(PyObject*)
1886makeuniversal(XMLParserObject* self, const char* string)
1887{
1888 /* convert a UTF-8 tag/attribute name from the expat parser
1889 to a universal name string */
1890
1891 int size = strlen(string);
1892 PyObject* key;
1893 PyObject* value;
1894
1895 /* look the 'raw' name up in the names dictionary */
1896 key = PyString_FromStringAndSize(string, size);
1897 if (!key)
1898 return NULL;
1899
1900 value = PyDict_GetItem(self->names, key);
1901
1902 if (value) {
1903 Py_INCREF(value);
1904 } else {
1905 /* new name. convert to universal name, and decode as
1906 necessary */
1907
1908 PyObject* tag;
1909 char* p;
1910 int i;
1911
1912 /* look for namespace separator */
1913 for (i = 0; i < size; i++)
1914 if (string[i] == '}')
1915 break;
1916 if (i != size) {
1917 /* convert to universal name */
1918 tag = PyString_FromStringAndSize(NULL, size+1);
1919 p = PyString_AS_STRING(tag);
1920 p[0] = '{';
1921 memcpy(p+1, string, size);
1922 size++;
1923 } else {
1924 /* plain name; use key as tag */
1925 Py_INCREF(key);
1926 tag = key;
1927 }
1928
1929 /* decode universal name */
1930#if defined(Py_USING_UNICODE)
1931 /* inline makestring, to avoid duplicating the source string if
1932 it's not an utf-8 string */
1933 p = PyString_AS_STRING(tag);
1934 if (checkstring(p, size)) {
1935 value = PyUnicode_DecodeUTF8(p, size, "strict");
1936 Py_DECREF(tag);
1937 if (!value) {
1938 Py_DECREF(key);
1939 return NULL;
1940 }
1941 } else
1942#endif
1943 value = tag; /* use tag as is */
1944
1945 /* add to names dictionary */
1946 if (PyDict_SetItem(self->names, key, value) < 0) {
1947 Py_DECREF(key);
1948 Py_DECREF(value);
1949 return NULL;
1950 }
1951 }
1952
1953 Py_DECREF(key);
1954 return value;
1955}
1956
1957/* -------------------------------------------------------------------- */
1958/* handlers */
1959
1960static void
1961expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
1962 int data_len)
1963{
1964 PyObject* key;
1965 PyObject* value;
1966 PyObject* res;
1967
1968 if (data_len < 2 || data_in[0] != '&')
1969 return;
1970
1971 key = makestring(data_in + 1, data_len - 2);
1972 if (!key)
1973 return;
1974
1975 value = PyDict_GetItem(self->entity, key);
1976
1977 if (value) {
1978 if (TreeBuilder_CheckExact(self->target))
1979 res = treebuilder_handle_data(
1980 (TreeBuilderObject*) self->target, value
1981 );
1982 else if (self->handle_data)
1983 res = PyObject_CallFunction(self->handle_data, "O", value);
1984 else
1985 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001986 Py_XDECREF(res);
1987 } else {
1988 PyErr_Format(
1989 PyExc_SyntaxError, "undefined entity &%s;: line %d, column %d",
1990 PyString_AS_STRING(key),
1991 EXPAT(GetErrorLineNumber)(self->parser),
1992 EXPAT(GetErrorColumnNumber)(self->parser)
1993 );
1994 }
1995
1996 Py_DECREF(key);
1997}
1998
1999static void
2000expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2001 const XML_Char **attrib_in)
2002{
2003 PyObject* res;
2004 PyObject* tag;
2005 PyObject* attrib;
2006 int ok;
2007
2008 /* tag name */
2009 tag = makeuniversal(self, tag_in);
2010 if (!tag)
2011 return; /* parser will look for errors */
2012
2013 /* attributes */
2014 if (attrib_in[0]) {
2015 attrib = PyDict_New();
2016 if (!attrib)
2017 return;
2018 while (attrib_in[0] && attrib_in[1]) {
2019 PyObject* key = makeuniversal(self, attrib_in[0]);
2020 PyObject* value = makestring(attrib_in[1], strlen(attrib_in[1]));
2021 if (!key || !value) {
2022 Py_XDECREF(value);
2023 Py_XDECREF(key);
2024 Py_DECREF(attrib);
2025 return;
2026 }
2027 ok = PyDict_SetItem(attrib, key, value);
2028 Py_DECREF(value);
2029 Py_DECREF(key);
2030 if (ok < 0) {
2031 Py_DECREF(attrib);
2032 return;
2033 }
2034 attrib_in += 2;
2035 }
2036 } else {
2037 Py_INCREF(Py_None);
2038 attrib = Py_None;
2039 }
2040
2041 if (TreeBuilder_CheckExact(self->target))
2042 /* shortcut */
2043 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2044 tag, attrib);
2045 else if (self->handle_start)
2046 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
2047 else
2048 res = NULL;
2049
2050 Py_DECREF(tag);
2051 Py_DECREF(attrib);
2052
2053 Py_XDECREF(res);
2054}
2055
2056static void
2057expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2058 int data_len)
2059{
2060 PyObject* data;
2061 PyObject* res;
2062
2063 data = makestring(data_in, data_len);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002064 if (!data)
2065 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002066
2067 if (TreeBuilder_CheckExact(self->target))
2068 /* shortcut */
2069 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
2070 else if (self->handle_data)
2071 res = PyObject_CallFunction(self->handle_data, "O", data);
2072 else
2073 res = NULL;
2074
2075 Py_DECREF(data);
2076
2077 Py_XDECREF(res);
2078}
2079
2080static void
2081expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
2082{
2083 PyObject* tag;
2084 PyObject* res = NULL;
2085
2086 if (TreeBuilder_CheckExact(self->target))
2087 /* shortcut */
2088 /* the standard tree builder doesn't look at the end tag */
2089 res = treebuilder_handle_end(
2090 (TreeBuilderObject*) self->target, Py_None
2091 );
2092 else if (self->handle_end) {
2093 tag = makeuniversal(self, tag_in);
2094 if (tag) {
2095 res = PyObject_CallFunction(self->handle_end, "O", tag);
2096 Py_DECREF(tag);
2097 }
2098 }
2099
2100 Py_XDECREF(res);
2101}
2102
2103static void
2104expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
2105 const XML_Char *uri)
2106{
2107 treebuilder_handle_namespace(
2108 (TreeBuilderObject*) self->target, 1, prefix, uri
2109 );
2110}
2111
2112static void
2113expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
2114{
2115 treebuilder_handle_namespace(
2116 (TreeBuilderObject*) self->target, 0, NULL, NULL
2117 );
2118}
2119
2120static void
2121expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
2122{
2123 PyObject* comment;
2124 PyObject* res;
2125
2126 if (self->handle_comment) {
2127 comment = makestring(comment_in, strlen(comment_in));
2128 if (comment) {
2129 res = PyObject_CallFunction(self->handle_comment, "O", comment);
2130 Py_XDECREF(res);
2131 Py_DECREF(comment);
2132 }
2133 }
2134}
2135
2136static void
2137expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
2138 const XML_Char* data_in)
2139{
2140 PyObject* target;
2141 PyObject* data;
2142 PyObject* res;
2143
2144 if (self->handle_pi) {
2145 target = makestring(target_in, strlen(target_in));
2146 data = makestring(data_in, strlen(data_in));
2147 if (target && data) {
2148 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
2149 Py_XDECREF(res);
2150 Py_DECREF(data);
2151 Py_DECREF(target);
2152 } else {
2153 Py_XDECREF(data);
2154 Py_XDECREF(target);
2155 }
2156 }
2157}
2158
2159#if defined(Py_USING_UNICODE)
2160static int
2161expat_unknown_encoding_handler(XMLParserObject *self, const XML_Char *name,
2162 XML_Encoding *info)
2163{
2164 PyObject* u;
2165 Py_UNICODE* p;
2166 unsigned char s[256];
2167 int i;
2168
2169 memset(info, 0, sizeof(XML_Encoding));
2170
2171 for (i = 0; i < 256; i++)
2172 s[i] = i;
2173
Fredrik Lundhc3389992005-12-25 11:40:19 +00002174 u = PyUnicode_Decode((char*) s, 256, name, "replace");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002175 if (!u)
2176 return XML_STATUS_ERROR;
2177
2178 if (PyUnicode_GET_SIZE(u) != 256) {
2179 Py_DECREF(u);
2180 return XML_STATUS_ERROR;
2181 }
2182
2183 p = PyUnicode_AS_UNICODE(u);
2184
2185 for (i = 0; i < 256; i++) {
2186 if (p[i] != Py_UNICODE_REPLACEMENT_CHARACTER)
2187 info->map[i] = p[i];
2188 else
2189 info->map[i] = -1;
2190 }
2191
2192 Py_DECREF(u);
2193
2194 return XML_STATUS_OK;
2195}
2196#endif
2197
2198/* -------------------------------------------------------------------- */
2199/* constructor and destructor */
2200
2201static PyObject*
2202xmlparser(PyObject* _self, PyObject* args, PyObject* kw)
2203{
2204 XMLParserObject* self;
2205 /* FIXME: does this need to be static? */
2206 static XML_Memory_Handling_Suite memory_handler;
2207
2208 PyObject* target = NULL;
2209 char* encoding = NULL;
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +00002210 static char* kwlist[] = { "target", "encoding", NULL };
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002211 if (!PyArg_ParseTupleAndKeywords(args, kw, "|Oz:XMLParser", kwlist,
2212 &target, &encoding))
2213 return NULL;
2214
2215#if defined(USE_PYEXPAT_CAPI)
2216 if (!expat_capi) {
2217 PyErr_SetString(
2218 PyExc_RuntimeError, "cannot load dispatch table from pyexpat"
2219 );
2220 return NULL;
2221 }
2222#endif
2223
2224 self = PyObject_New(XMLParserObject, &XMLParser_Type);
2225 if (self == NULL)
2226 return NULL;
2227
2228 self->entity = PyDict_New();
2229 if (!self->entity) {
2230 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002231 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002232 }
2233
2234 self->names = PyDict_New();
2235 if (!self->names) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002236 PyObject_Del(self->entity);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002237 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002238 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002239 }
2240
2241 memory_handler.malloc_fcn = PyObject_Malloc;
2242 memory_handler.realloc_fcn = PyObject_Realloc;
2243 memory_handler.free_fcn = PyObject_Free;
2244
2245 self->parser = EXPAT(ParserCreate_MM)(encoding, &memory_handler, "}");
2246 if (!self->parser) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002247 PyObject_Del(self->names);
2248 PyObject_Del(self->entity);
2249 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002250 PyErr_NoMemory();
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002251 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002252 }
2253
2254 /* setup target handlers */
2255 if (!target) {
2256 target = treebuilder_new();
2257 if (!target) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002258 EXPAT(ParserFree)(self->parser);
2259 PyObject_Del(self->names);
2260 PyObject_Del(self->entity);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002261 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002262 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002263 }
2264 } else
2265 Py_INCREF(target);
2266 self->target = target;
2267
2268 self->handle_xml = PyObject_GetAttrString(target, "xml");
2269 self->handle_start = PyObject_GetAttrString(target, "start");
2270 self->handle_data = PyObject_GetAttrString(target, "data");
2271 self->handle_end = PyObject_GetAttrString(target, "end");
2272 self->handle_comment = PyObject_GetAttrString(target, "comment");
2273 self->handle_pi = PyObject_GetAttrString(target, "pi");
2274
2275 PyErr_Clear();
2276
2277 /* configure parser */
2278 EXPAT(SetUserData)(self->parser, self);
2279 EXPAT(SetElementHandler)(
2280 self->parser,
2281 (XML_StartElementHandler) expat_start_handler,
2282 (XML_EndElementHandler) expat_end_handler
2283 );
2284 EXPAT(SetDefaultHandlerExpand)(
2285 self->parser,
2286 (XML_DefaultHandler) expat_default_handler
2287 );
2288 EXPAT(SetCharacterDataHandler)(
2289 self->parser,
2290 (XML_CharacterDataHandler) expat_data_handler
2291 );
2292 if (self->handle_comment)
2293 EXPAT(SetCommentHandler)(
2294 self->parser,
2295 (XML_CommentHandler) expat_comment_handler
2296 );
2297 if (self->handle_pi)
2298 EXPAT(SetProcessingInstructionHandler)(
2299 self->parser,
2300 (XML_ProcessingInstructionHandler) expat_pi_handler
2301 );
2302#if defined(Py_USING_UNICODE)
2303 EXPAT(SetUnknownEncodingHandler)(
2304 self->parser,
2305 (XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL
2306 );
2307#endif
2308
2309 ALLOC(sizeof(XMLParserObject), "create expatparser");
2310
2311 return (PyObject*) self;
2312}
2313
2314static void
2315xmlparser_dealloc(XMLParserObject* self)
2316{
2317 EXPAT(ParserFree)(self->parser);
2318
2319 Py_XDECREF(self->handle_pi);
2320 Py_XDECREF(self->handle_comment);
2321 Py_XDECREF(self->handle_end);
2322 Py_XDECREF(self->handle_data);
2323 Py_XDECREF(self->handle_start);
2324 Py_XDECREF(self->handle_xml);
2325
2326 Py_DECREF(self->target);
2327 Py_DECREF(self->entity);
2328 Py_DECREF(self->names);
2329
2330 RELEASE(sizeof(XMLParserObject), "destroy expatparser");
2331
2332 PyObject_Del(self);
2333}
2334
2335/* -------------------------------------------------------------------- */
2336/* methods (in alphabetical order) */
2337
2338LOCAL(PyObject*)
2339expat_parse(XMLParserObject* self, char* data, int data_len, int final)
2340{
2341 int ok;
2342
2343 ok = EXPAT(Parse)(self->parser, data, data_len, final);
2344
2345 if (PyErr_Occurred())
2346 return NULL;
2347
2348 if (!ok) {
2349 PyErr_Format(
2350 PyExc_SyntaxError, "%s: line %d, column %d",
2351 EXPAT(ErrorString)(EXPAT(GetErrorCode)(self->parser)),
2352 EXPAT(GetErrorLineNumber)(self->parser),
2353 EXPAT(GetErrorColumnNumber)(self->parser)
2354 );
2355 return NULL;
2356 }
2357
2358 Py_RETURN_NONE;
2359}
2360
2361static PyObject*
2362xmlparser_close(XMLParserObject* self, PyObject* args)
2363{
2364 /* end feeding data to parser */
2365
2366 PyObject* res;
2367 if (!PyArg_ParseTuple(args, ":close"))
2368 return NULL;
2369
2370 res = expat_parse(self, "", 0, 1);
2371
2372 if (res && TreeBuilder_CheckExact(self->target)) {
2373 Py_DECREF(res);
2374 return treebuilder_done((TreeBuilderObject*) self->target);
2375 }
2376
2377 return res;
2378}
2379
2380static PyObject*
2381xmlparser_feed(XMLParserObject* self, PyObject* args)
2382{
2383 /* feed data to parser */
2384
2385 char* data;
2386 int data_len;
2387 if (!PyArg_ParseTuple(args, "s#:feed", &data, &data_len))
2388 return NULL;
2389
2390 return expat_parse(self, data, data_len, 0);
2391}
2392
2393static PyObject*
2394xmlparser_parse(XMLParserObject* self, PyObject* args)
2395{
2396 /* (internal) parse until end of input stream */
2397
2398 PyObject* reader;
2399 PyObject* buffer;
2400 PyObject* res;
2401
2402 PyObject* fileobj;
2403 if (!PyArg_ParseTuple(args, "O:_parse", &fileobj))
2404 return NULL;
2405
2406 reader = PyObject_GetAttrString(fileobj, "read");
2407 if (!reader)
2408 return NULL;
2409
2410 /* read from open file object */
2411 for (;;) {
2412
2413 buffer = PyObject_CallFunction(reader, "i", 64*1024);
2414
2415 if (!buffer) {
2416 /* read failed (e.g. due to KeyboardInterrupt) */
2417 Py_DECREF(reader);
2418 return NULL;
2419 }
2420
2421 if (!PyString_CheckExact(buffer) || PyString_GET_SIZE(buffer) == 0) {
2422 Py_DECREF(buffer);
2423 break;
2424 }
2425
2426 res = expat_parse(
2427 self, PyString_AS_STRING(buffer), PyString_GET_SIZE(buffer), 0
2428 );
2429
2430 Py_DECREF(buffer);
2431
2432 if (!res) {
2433 Py_DECREF(reader);
2434 return NULL;
2435 }
2436 Py_DECREF(res);
2437
2438 }
2439
2440 Py_DECREF(reader);
2441
2442 res = expat_parse(self, "", 0, 1);
2443
2444 if (res && TreeBuilder_CheckExact(self->target)) {
2445 Py_DECREF(res);
2446 return treebuilder_done((TreeBuilderObject*) self->target);
2447 }
2448
2449 return res;
2450}
2451
2452static PyObject*
2453xmlparser_setevents(XMLParserObject* self, PyObject* args)
2454{
2455 /* activate element event reporting */
2456
2457 int i;
2458 TreeBuilderObject* target;
2459
2460 PyObject* events; /* event collector */
2461 PyObject* event_set = Py_None;
2462 if (!PyArg_ParseTuple(args, "O!|O:_setevents", &PyList_Type, &events,
2463 &event_set))
2464 return NULL;
2465
2466 if (!TreeBuilder_CheckExact(self->target)) {
2467 PyErr_SetString(
2468 PyExc_TypeError,
2469 "event handling only supported for cElementTree.Treebuilder "
2470 "targets"
2471 );
2472 return NULL;
2473 }
2474
2475 target = (TreeBuilderObject*) self->target;
2476
2477 Py_INCREF(events);
2478 Py_XDECREF(target->events);
2479 target->events = events;
2480
2481 /* clear out existing events */
2482 Py_XDECREF(target->start_event_obj); target->start_event_obj = NULL;
2483 Py_XDECREF(target->end_event_obj); target->end_event_obj = NULL;
2484 Py_XDECREF(target->start_ns_event_obj); target->start_ns_event_obj = NULL;
2485 Py_XDECREF(target->end_ns_event_obj); target->end_ns_event_obj = NULL;
2486
2487 if (event_set == Py_None) {
2488 /* default is "end" only */
2489 target->end_event_obj = PyString_FromString("end");
2490 Py_RETURN_NONE;
2491 }
2492
2493 if (!PyTuple_Check(event_set)) /* FIXME: handle arbitrary sequences */
2494 goto error;
2495
2496 for (i = 0; i < PyTuple_GET_SIZE(event_set); i++) {
2497 PyObject* item = PyTuple_GET_ITEM(event_set, i);
2498 char* event;
2499 if (!PyString_Check(item))
2500 goto error;
2501 event = PyString_AS_STRING(item);
2502 if (strcmp(event, "start") == 0) {
2503 Py_INCREF(item);
2504 target->start_event_obj = item;
2505 } else if (strcmp(event, "end") == 0) {
2506 Py_INCREF(item);
2507 Py_XDECREF(target->end_event_obj);
2508 target->end_event_obj = item;
2509 } else if (strcmp(event, "start-ns") == 0) {
2510 Py_INCREF(item);
2511 Py_XDECREF(target->start_ns_event_obj);
2512 target->start_ns_event_obj = item;
2513 EXPAT(SetNamespaceDeclHandler)(
2514 self->parser,
2515 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2516 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2517 );
2518 } else if (strcmp(event, "end-ns") == 0) {
2519 Py_INCREF(item);
2520 Py_XDECREF(target->end_ns_event_obj);
2521 target->end_ns_event_obj = item;
2522 EXPAT(SetNamespaceDeclHandler)(
2523 self->parser,
2524 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2525 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2526 );
2527 } else {
2528 PyErr_Format(
2529 PyExc_ValueError,
2530 "unknown event '%s'", event
2531 );
2532 return NULL;
2533 }
2534 }
2535
2536 Py_RETURN_NONE;
2537
2538 error:
2539 PyErr_SetString(
2540 PyExc_TypeError,
2541 "invalid event tuple"
2542 );
2543 return NULL;
2544}
2545
2546static PyMethodDef xmlparser_methods[] = {
2547 {"feed", (PyCFunction) xmlparser_feed, METH_VARARGS},
2548 {"close", (PyCFunction) xmlparser_close, METH_VARARGS},
2549 {"_parse", (PyCFunction) xmlparser_parse, METH_VARARGS},
2550 {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
2551 {NULL, NULL}
2552};
2553
2554static PyObject*
2555xmlparser_getattr(XMLParserObject* self, char* name)
2556{
2557 PyObject* res;
2558
2559 res = Py_FindMethod(xmlparser_methods, (PyObject*) self, name);
2560 if (res)
2561 return res;
2562
2563 PyErr_Clear();
2564
2565 if (strcmp(name, "entity") == 0)
2566 res = self->entity;
2567 else if (strcmp(name, "target") == 0)
2568 res = self->target;
2569 else if (strcmp(name, "version") == 0) {
2570 char buffer[100];
2571 sprintf(buffer, "Expat %d.%d.%d", XML_MAJOR_VERSION,
2572 XML_MINOR_VERSION, XML_MICRO_VERSION);
2573 return PyString_FromString(buffer);
2574 } else {
2575 PyErr_SetString(PyExc_AttributeError, name);
2576 return NULL;
2577 }
2578
2579 Py_INCREF(res);
2580 return res;
2581}
2582
Neal Norwitz227b5332006-03-22 09:28:35 +00002583static PyTypeObject XMLParser_Type = {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002584 PyObject_HEAD_INIT(NULL)
2585 0, "XMLParser", sizeof(XMLParserObject), 0,
2586 /* methods */
2587 (destructor)xmlparser_dealloc, /* tp_dealloc */
2588 0, /* tp_print */
2589 (getattrfunc)xmlparser_getattr, /* tp_getattr */
2590};
2591
2592#endif
2593
2594/* ==================================================================== */
2595/* python module interface */
2596
2597static PyMethodDef _functions[] = {
2598 {"Element", (PyCFunction) element, METH_VARARGS|METH_KEYWORDS},
2599 {"SubElement", (PyCFunction) subelement, METH_VARARGS|METH_KEYWORDS},
2600 {"TreeBuilder", (PyCFunction) treebuilder, METH_VARARGS},
2601#if defined(USE_EXPAT)
2602 {"XMLParser", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
2603 {"XMLTreeBuilder", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
2604#endif
2605 {NULL, NULL}
2606};
2607
2608DL_EXPORT(void)
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002609init_elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002610{
2611 PyObject* m;
2612 PyObject* g;
2613 char* bootstrap;
2614#if defined(USE_PYEXPAT_CAPI)
2615 struct PyExpat_CAPI* capi;
2616#endif
2617
2618 /* Patch object type */
2619 Element_Type.ob_type = TreeBuilder_Type.ob_type = &PyType_Type;
2620#if defined(USE_EXPAT)
2621 XMLParser_Type.ob_type = &PyType_Type;
2622#endif
2623
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002624 m = Py_InitModule("_elementtree", _functions);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002625 if (!m)
2626 return;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002627
2628 /* python glue code */
2629
2630 g = PyDict_New();
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002631 if (!g)
2632 return;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002633
2634 PyDict_SetItemString(g, "__builtins__", PyEval_GetBuiltins());
2635
2636 bootstrap = (
2637
2638#if (PY_VERSION_HEX >= 0x02020000 && PY_VERSION_HEX < 0x02030000)
2639 "from __future__ import generators\n" /* enable yield under 2.2 */
2640#endif
2641
2642 "from copy import copy, deepcopy\n"
2643
2644 "try:\n"
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002645 " from xml.etree import ElementTree\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002646 "except ImportError:\n"
2647 " import ElementTree\n"
2648 "ET = ElementTree\n"
2649 "del ElementTree\n"
2650
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002651 "import _elementtree as cElementTree\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002652
2653 "try:\n" /* check if copy works as is */
2654 " copy(cElementTree.Element('x'))\n"
2655 "except:\n"
2656 " def copyelement(elem):\n"
2657 " return elem\n"
2658
2659 "def Comment(text=None):\n" /* public */
2660 " element = cElementTree.Element(ET.Comment)\n"
2661 " element.text = text\n"
2662 " return element\n"
2663 "cElementTree.Comment = Comment\n"
2664
2665 "class ElementTree(ET.ElementTree):\n" /* public */
2666 " def parse(self, source, parser=None):\n"
2667 " if not hasattr(source, 'read'):\n"
2668 " source = open(source, 'rb')\n"
2669 " if parser is not None:\n"
2670 " while 1:\n"
2671 " data = source.read(65536)\n"
2672 " if not data:\n"
2673 " break\n"
2674 " parser.feed(data)\n"
2675 " self._root = parser.close()\n"
2676 " else:\n"
2677 " parser = cElementTree.XMLParser()\n"
2678 " self._root = parser._parse(source)\n"
2679 " return self._root\n"
2680 "cElementTree.ElementTree = ElementTree\n"
2681
2682 "def getiterator(node, tag=None):\n" /* helper */
2683 " if tag == '*':\n"
2684 " tag = None\n"
2685#if (PY_VERSION_HEX < 0x02020000)
2686 " nodes = []\n" /* 2.1 doesn't have yield */
2687 " if tag is None or node.tag == tag:\n"
2688 " nodes.append(node)\n"
2689 " for node in node:\n"
2690 " nodes.extend(getiterator(node, tag))\n"
2691 " return nodes\n"
2692#else
2693 " if tag is None or node.tag == tag:\n"
2694 " yield node\n"
2695 " for node in node:\n"
2696 " for node in getiterator(node, tag):\n"
2697 " yield node\n"
2698#endif
2699
2700 "def parse(source, parser=None):\n" /* public */
2701 " tree = ElementTree()\n"
2702 " tree.parse(source, parser)\n"
2703 " return tree\n"
2704 "cElementTree.parse = parse\n"
2705
2706#if (PY_VERSION_HEX < 0x02020000)
2707 "if hasattr(ET, 'iterparse'):\n"
2708 " cElementTree.iterparse = ET.iterparse\n" /* delegate on 2.1 */
2709#else
2710 "class iterparse(object):\n"
2711 " root = None\n"
2712 " def __init__(self, file, events=None):\n"
2713 " if not hasattr(file, 'read'):\n"
2714 " file = open(file, 'rb')\n"
2715 " self._file = file\n"
2716 " self._events = events\n"
2717 " def __iter__(self):\n"
2718 " events = []\n"
2719 " b = cElementTree.TreeBuilder()\n"
2720 " p = cElementTree.XMLParser(b)\n"
2721 " p._setevents(events, self._events)\n"
2722 " while 1:\n"
2723 " data = self._file.read(16384)\n"
2724 " if not data:\n"
2725 " break\n"
2726 " p.feed(data)\n"
2727 " for event in events:\n"
2728 " yield event\n"
2729 " del events[:]\n"
2730 " root = p.close()\n"
2731 " for event in events:\n"
2732 " yield event\n"
2733 " self.root = root\n"
2734 "cElementTree.iterparse = iterparse\n"
2735#endif
2736
2737 "def PI(target, text=None):\n" /* public */
2738 " element = cElementTree.Element(ET.ProcessingInstruction)\n"
2739 " element.text = target\n"
2740 " if text:\n"
2741 " element.text = element.text + ' ' + text\n"
2742 " return element\n"
2743
2744 " elem = cElementTree.Element(ET.PI)\n"
2745 " elem.text = text\n"
2746 " return elem\n"
2747 "cElementTree.PI = cElementTree.ProcessingInstruction = PI\n"
2748
2749 "def XML(text):\n" /* public */
2750 " parser = cElementTree.XMLParser()\n"
2751 " parser.feed(text)\n"
2752 " return parser.close()\n"
2753 "cElementTree.XML = cElementTree.fromstring = XML\n"
2754
2755 "def XMLID(text):\n" /* public */
2756 " tree = XML(text)\n"
2757 " ids = {}\n"
2758 " for elem in tree.getiterator():\n"
2759 " id = elem.get('id')\n"
2760 " if id:\n"
2761 " ids[id] = elem\n"
2762 " return tree, ids\n"
2763 "cElementTree.XMLID = XMLID\n"
2764
2765 "cElementTree.dump = ET.dump\n"
2766 "cElementTree.ElementPath = ElementPath = ET.ElementPath\n"
2767 "cElementTree.iselement = ET.iselement\n"
2768 "cElementTree.QName = ET.QName\n"
2769 "cElementTree.tostring = ET.tostring\n"
2770 "cElementTree.VERSION = '" VERSION "'\n"
2771 "cElementTree.__version__ = '" VERSION "'\n"
2772 "cElementTree.XMLParserError = SyntaxError\n"
2773
2774 );
2775
2776 PyRun_String(bootstrap, Py_file_input, g, NULL);
2777
2778 elementpath_obj = PyDict_GetItemString(g, "ElementPath");
2779
2780 elementtree_copyelement_obj = PyDict_GetItemString(g, "copyelement");
2781 if (elementtree_copyelement_obj) {
2782 /* reduce hack needed; enable reduce method */
2783 PyMethodDef* mp;
2784 for (mp = element_methods; mp->ml_name; mp++)
2785 if (mp->ml_meth == (PyCFunction) element_reduce) {
2786 mp->ml_name = "__reduce__";
2787 break;
2788 }
2789 } else
2790 PyErr_Clear();
2791 elementtree_deepcopy_obj = PyDict_GetItemString(g, "deepcopy");
2792 elementtree_getiterator_obj = PyDict_GetItemString(g, "getiterator");
2793
2794#if defined(USE_PYEXPAT_CAPI)
2795 /* link against pyexpat, if possible */
2796 capi = PyCObject_Import("pyexpat", "expat_CAPI");
2797 if (capi &&
2798 strcmp(capi->magic, PyExpat_CAPI_MAGIC) == 0 &&
2799 capi->size <= sizeof(*expat_capi) &&
2800 capi->MAJOR_VERSION == XML_MAJOR_VERSION &&
2801 capi->MINOR_VERSION == XML_MINOR_VERSION &&
2802 capi->MICRO_VERSION == XML_MICRO_VERSION)
2803 expat_capi = capi;
2804 else
2805 expat_capi = NULL;
2806#endif
2807
2808}