blob: d237cbb75b6efb81ae85d260ff24566845856a01 [file] [log] [blame]
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001/*
2 * ElementTree
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003 * $Id: _elementtree.c 2657 2006-03-12 20:50:32Z fredrik $
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
5 * elementtree accelerator
6 *
7 * History:
8 * 1999-06-20 fl created (as part of sgmlop)
9 * 2001-05-29 fl effdom edition
10 * 2001-06-05 fl backported to unix; fixed bogus free in clear
11 * 2001-07-10 fl added findall helper
12 * 2003-02-27 fl elementtree edition (alpha)
13 * 2004-06-03 fl updates for elementtree 1.2
14 * 2005-01-05 fl added universal name cache, Element/SubElement factories
15 * 2005-01-06 fl moved python helpers into C module; removed 1.5.2 support
16 * 2005-01-07 fl added 2.1 support; work around broken __copy__ in 2.3
17 * 2005-01-08 fl added makeelement method; fixed path support
18 * 2005-01-10 fl optimized memory usage
19 * 2005-01-11 fl first public release (cElementTree 0.8)
20 * 2005-01-12 fl split element object into base and extras
21 * 2005-01-13 fl use tagged pointers for tail/text (cElementTree 0.9)
22 * 2005-01-17 fl added treebuilder close method
23 * 2005-01-17 fl fixed crash in getchildren
24 * 2005-01-18 fl removed observer api, added iterparse (cElementTree 0.9.3)
25 * 2005-01-23 fl revised iterparse api; added namespace event support (0.9.8)
26 * 2005-01-26 fl added VERSION module property (cElementTree 1.0)
27 * 2005-01-28 fl added remove method (1.0.1)
28 * 2005-03-01 fl added iselement function; fixed makeelement aliasing (1.0.2)
29 * 2005-03-13 fl export Comment and ProcessingInstruction/PI helpers
30 * 2005-03-26 fl added Comment and PI support to XMLParser
31 * 2005-03-27 fl event optimizations; complain about bogus events
32 * 2005-08-08 fl fixed read error handling in parse
33 * 2005-08-11 fl added runtime test for copy workaround (1.0.3)
34 * 2005-12-13 fl added expat_capi support (for xml.etree) (1.0.4)
35 * 2005-12-16 fl added support for non-standard encodings
Fredrik Lundh44ed4db2006-03-12 21:06:35 +000036 * 2006-03-08 fl fixed a couple of potential null-refs and leaks
37 * 2006-03-12 fl merge in 2.5 ssize_t changes
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000038 *
Fredrik Lundh44ed4db2006-03-12 21:06:35 +000039 * Copyright (c) 1999-2006 by Secret Labs AB. All rights reserved.
40 * Copyright (c) 1999-2006 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000041 *
42 * info@pythonware.com
43 * http://www.pythonware.com
44 */
45
Fredrik Lundh6d52b552005-12-16 22:06:43 +000046/* Licensed to PSF under a Contributor Agreement. */
47/* See http://www.python.org/2.4/license for licensing details. */
48
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000049#include "Python.h"
50
Thomas Wouters00ee7ba2006-08-21 19:07:27 +000051#define VERSION "1.0.6"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000052
53/* -------------------------------------------------------------------- */
54/* configuration */
55
56/* Leave defined to include the expat-based XMLParser type */
57#define USE_EXPAT
58
59/* Define to to all expat calls via pyexpat's embedded expat library */
60/* #define USE_PYEXPAT_CAPI */
61
62/* An element can hold this many children without extra memory
63 allocations. */
64#define STATIC_CHILDREN 4
65
66/* For best performance, chose a value so that 80-90% of all nodes
67 have no more than the given number of children. Set this to zero
68 to minimize the size of the element structure itself (this only
69 helps if you have lots of leaf nodes with attributes). */
70
71/* Also note that pymalloc always allocates blocks in multiples of
72 eight bytes. For the current version of cElementTree, this means
73 that the number of children should be an even number, at least on
74 32-bit platforms. */
75
76/* -------------------------------------------------------------------- */
77
78#if 0
79static int memory = 0;
80#define ALLOC(size, comment)\
81do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
82#define RELEASE(size, comment)\
83do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
84#else
85#define ALLOC(size, comment)
86#define RELEASE(size, comment)
87#endif
88
89/* compiler tweaks */
90#if defined(_MSC_VER)
91#define LOCAL(type) static __inline type __fastcall
92#else
93#define LOCAL(type) static type
94#endif
95
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000096/* macros used to store 'join' flags in string object pointers. note
97 that all use of text and tail as object pointers must be wrapped in
98 JOIN_OBJ. see comments in the ElementObject definition for more
99 info. */
100#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
101#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
102#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~1))
103
104/* glue functions (see the init function for details) */
105static PyObject* elementtree_copyelement_obj;
106static PyObject* elementtree_deepcopy_obj;
107static PyObject* elementtree_getiterator_obj;
108static PyObject* elementpath_obj;
109
110/* helpers */
111
112LOCAL(PyObject*)
113deepcopy(PyObject* object, PyObject* memo)
114{
115 /* do a deep copy of the given object */
116
117 PyObject* args;
118 PyObject* result;
119
120 if (!elementtree_deepcopy_obj) {
121 PyErr_SetString(
122 PyExc_RuntimeError,
123 "deepcopy helper not found"
124 );
125 return NULL;
126 }
127
128 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000129 if (!args)
130 return NULL;
131
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000132 Py_INCREF(object); PyTuple_SET_ITEM(args, 0, (PyObject*) object);
133 Py_INCREF(memo); PyTuple_SET_ITEM(args, 1, (PyObject*) memo);
134
135 result = PyObject_CallObject(elementtree_deepcopy_obj, args);
136
137 Py_DECREF(args);
138
139 return result;
140}
141
142LOCAL(PyObject*)
143list_join(PyObject* list)
144{
145 /* join list elements (destroying the list in the process) */
146
147 PyObject* joiner;
148 PyObject* function;
149 PyObject* args;
150 PyObject* result;
151
152 switch (PyList_GET_SIZE(list)) {
153 case 0:
154 Py_DECREF(list);
155 return PyString_FromString("");
156 case 1:
157 result = PyList_GET_ITEM(list, 0);
158 Py_INCREF(result);
159 Py_DECREF(list);
160 return result;
161 }
162
163 /* two or more elements: slice out a suitable separator from the
164 first member, and use that to join the entire list */
165
166 joiner = PySequence_GetSlice(PyList_GET_ITEM(list, 0), 0, 0);
167 if (!joiner)
168 return NULL;
169
170 function = PyObject_GetAttrString(joiner, "join");
171 if (!function) {
172 Py_DECREF(joiner);
173 return NULL;
174 }
175
176 args = PyTuple_New(1);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000177 if (!args)
178 return NULL;
179
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000180 PyTuple_SET_ITEM(args, 0, list);
181
182 result = PyObject_CallObject(function, args);
183
184 Py_DECREF(args); /* also removes list */
185 Py_DECREF(function);
186 Py_DECREF(joiner);
187
188 return result;
189}
190
191#if (PY_VERSION_HEX < 0x02020000)
192LOCAL(int)
193PyDict_Update(PyObject* dict, PyObject* other)
194{
195 /* PyDict_Update emulation for 2.1 and earlier */
196
197 PyObject* res;
198
199 res = PyObject_CallMethod(dict, "update", "O", other);
200 if (!res)
201 return -1;
202
203 Py_DECREF(res);
204 return 0;
205}
206#endif
207
208/* -------------------------------------------------------------------- */
209/* the element type */
210
211typedef struct {
212
213 /* attributes (a dictionary object), or None if no attributes */
214 PyObject* attrib;
215
216 /* child elements */
217 int length; /* actual number of items */
218 int allocated; /* allocated items */
219
220 /* this either points to _children or to a malloced buffer */
221 PyObject* *children;
222
223 PyObject* _children[STATIC_CHILDREN];
224
225} ElementObjectExtra;
226
227typedef struct {
228 PyObject_HEAD
229
230 /* element tag (a string). */
231 PyObject* tag;
232
233 /* text before first child. note that this is a tagged pointer;
234 use JOIN_OBJ to get the object pointer. the join flag is used
235 to distinguish lists created by the tree builder from lists
236 assigned to the attribute by application code; the former
237 should be joined before being returned to the user, the latter
238 should be left intact. */
239 PyObject* text;
240
241 /* text after this element, in parent. note that this is a tagged
242 pointer; use JOIN_OBJ to get the object pointer. */
243 PyObject* tail;
244
245 ElementObjectExtra* extra;
246
247} ElementObject;
248
Neal Norwitz227b5332006-03-22 09:28:35 +0000249static PyTypeObject Element_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000250
Christian Heimes90aa7642007-12-19 02:45:37 +0000251#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000252
253/* -------------------------------------------------------------------- */
254/* element constructor and destructor */
255
256LOCAL(int)
257element_new_extra(ElementObject* self, PyObject* attrib)
258{
259 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
260 if (!self->extra)
261 return -1;
262
263 if (!attrib)
264 attrib = Py_None;
265
266 Py_INCREF(attrib);
267 self->extra->attrib = attrib;
268
269 self->extra->length = 0;
270 self->extra->allocated = STATIC_CHILDREN;
271 self->extra->children = self->extra->_children;
272
273 return 0;
274}
275
276LOCAL(void)
277element_dealloc_extra(ElementObject* self)
278{
279 int i;
280
281 Py_DECREF(self->extra->attrib);
282
283 for (i = 0; i < self->extra->length; i++)
284 Py_DECREF(self->extra->children[i]);
285
286 if (self->extra->children != self->extra->_children)
287 PyObject_Free(self->extra->children);
288
289 PyObject_Free(self->extra);
290}
291
292LOCAL(PyObject*)
293element_new(PyObject* tag, PyObject* attrib)
294{
295 ElementObject* self;
296
297 self = PyObject_New(ElementObject, &Element_Type);
298 if (self == NULL)
299 return NULL;
300
301 /* use None for empty dictionaries */
302 if (PyDict_CheckExact(attrib) && !PyDict_Size(attrib))
303 attrib = Py_None;
304
305 self->extra = NULL;
306
307 if (attrib != Py_None) {
308
Thomas Wouters477c8d52006-05-27 19:21:47 +0000309 if (element_new_extra(self, attrib) < 0) {
310 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000311 return NULL;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000312 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000313
314 self->extra->length = 0;
315 self->extra->allocated = STATIC_CHILDREN;
316 self->extra->children = self->extra->_children;
317
318 }
319
320 Py_INCREF(tag);
321 self->tag = tag;
322
323 Py_INCREF(Py_None);
324 self->text = Py_None;
325
326 Py_INCREF(Py_None);
327 self->tail = Py_None;
328
329 ALLOC(sizeof(ElementObject), "create element");
330
331 return (PyObject*) self;
332}
333
334LOCAL(int)
335element_resize(ElementObject* self, int extra)
336{
337 int size;
338 PyObject* *children;
339
340 /* make sure self->children can hold the given number of extra
341 elements. set an exception and return -1 if allocation failed */
342
343 if (!self->extra)
344 element_new_extra(self, NULL);
345
346 size = self->extra->length + extra;
347
348 if (size > self->extra->allocated) {
349 /* use Python 2.4's list growth strategy */
350 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000351 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
352 * which needs at least 4 bytes.
353 * Although it's a false alarm always assume at least one child to
354 * be safe.
355 */
356 size = size ? size : 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000357 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000358 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
359 * "children", which needs at least 4 bytes. Although it's a
360 * false alarm always assume at least one child to be safe.
361 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000362 children = PyObject_Realloc(self->extra->children,
363 size * sizeof(PyObject*));
364 if (!children)
365 goto nomemory;
366 } else {
367 children = PyObject_Malloc(size * sizeof(PyObject*));
368 if (!children)
369 goto nomemory;
370 /* copy existing children from static area to malloc buffer */
371 memcpy(children, self->extra->children,
372 self->extra->length * sizeof(PyObject*));
373 }
374 self->extra->children = children;
375 self->extra->allocated = size;
376 }
377
378 return 0;
379
380 nomemory:
381 PyErr_NoMemory();
382 return -1;
383}
384
385LOCAL(int)
386element_add_subelement(ElementObject* self, PyObject* element)
387{
388 /* add a child element to a parent */
389
390 if (element_resize(self, 1) < 0)
391 return -1;
392
393 Py_INCREF(element);
394 self->extra->children[self->extra->length] = element;
395
396 self->extra->length++;
397
398 return 0;
399}
400
401LOCAL(PyObject*)
402element_get_attrib(ElementObject* self)
403{
404 /* return borrowed reference to attrib dictionary */
405 /* note: this function assumes that the extra section exists */
406
407 PyObject* res = self->extra->attrib;
408
409 if (res == Py_None) {
410 /* create missing dictionary */
411 res = PyDict_New();
412 if (!res)
413 return NULL;
414 self->extra->attrib = res;
415 }
416
417 return res;
418}
419
420LOCAL(PyObject*)
421element_get_text(ElementObject* self)
422{
423 /* return borrowed reference to text attribute */
424
425 PyObject* res = self->text;
426
427 if (JOIN_GET(res)) {
428 res = JOIN_OBJ(res);
429 if (PyList_CheckExact(res)) {
430 res = list_join(res);
431 if (!res)
432 return NULL;
433 self->text = res;
434 }
435 }
436
437 return res;
438}
439
440LOCAL(PyObject*)
441element_get_tail(ElementObject* self)
442{
443 /* return borrowed reference to text attribute */
444
445 PyObject* res = self->tail;
446
447 if (JOIN_GET(res)) {
448 res = JOIN_OBJ(res);
449 if (PyList_CheckExact(res)) {
450 res = list_join(res);
451 if (!res)
452 return NULL;
453 self->tail = res;
454 }
455 }
456
457 return res;
458}
459
460static PyObject*
461element(PyObject* self, PyObject* args, PyObject* kw)
462{
463 PyObject* elem;
464
465 PyObject* tag;
466 PyObject* attrib = NULL;
467 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag,
468 &PyDict_Type, &attrib))
469 return NULL;
470
471 if (attrib || kw) {
472 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
473 if (!attrib)
474 return NULL;
475 if (kw)
476 PyDict_Update(attrib, kw);
477 } else {
478 Py_INCREF(Py_None);
479 attrib = Py_None;
480 }
481
482 elem = element_new(tag, attrib);
483
484 Py_DECREF(attrib);
485
486 return elem;
487}
488
489static PyObject*
490subelement(PyObject* self, PyObject* args, PyObject* kw)
491{
492 PyObject* elem;
493
494 ElementObject* parent;
495 PyObject* tag;
496 PyObject* attrib = NULL;
497 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
498 &Element_Type, &parent, &tag,
499 &PyDict_Type, &attrib))
500 return NULL;
501
502 if (attrib || kw) {
503 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
504 if (!attrib)
505 return NULL;
506 if (kw)
507 PyDict_Update(attrib, kw);
508 } else {
509 Py_INCREF(Py_None);
510 attrib = Py_None;
511 }
512
513 elem = element_new(tag, attrib);
514
515 Py_DECREF(attrib);
516
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000517 if (element_add_subelement(parent, elem) < 0) {
518 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000519 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000520 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000521
522 return elem;
523}
524
525static void
526element_dealloc(ElementObject* self)
527{
528 if (self->extra)
529 element_dealloc_extra(self);
530
531 /* discard attributes */
532 Py_DECREF(self->tag);
533 Py_DECREF(JOIN_OBJ(self->text));
534 Py_DECREF(JOIN_OBJ(self->tail));
535
536 RELEASE(sizeof(ElementObject), "destroy element");
537
538 PyObject_Del(self);
539}
540
541/* -------------------------------------------------------------------- */
542/* methods (in alphabetical order) */
543
544static PyObject*
545element_append(ElementObject* self, PyObject* args)
546{
547 PyObject* element;
548 if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element))
549 return NULL;
550
551 if (element_add_subelement(self, element) < 0)
552 return NULL;
553
554 Py_RETURN_NONE;
555}
556
557static PyObject*
558element_clear(ElementObject* self, PyObject* args)
559{
560 if (!PyArg_ParseTuple(args, ":clear"))
561 return NULL;
562
563 if (self->extra) {
564 element_dealloc_extra(self);
565 self->extra = NULL;
566 }
567
568 Py_INCREF(Py_None);
569 Py_DECREF(JOIN_OBJ(self->text));
570 self->text = Py_None;
571
572 Py_INCREF(Py_None);
573 Py_DECREF(JOIN_OBJ(self->tail));
574 self->tail = Py_None;
575
576 Py_RETURN_NONE;
577}
578
579static PyObject*
580element_copy(ElementObject* self, PyObject* args)
581{
582 int i;
583 ElementObject* element;
584
585 if (!PyArg_ParseTuple(args, ":__copy__"))
586 return NULL;
587
588 element = (ElementObject*) element_new(
589 self->tag, (self->extra) ? self->extra->attrib : Py_None
590 );
591 if (!element)
592 return NULL;
593
594 Py_DECREF(JOIN_OBJ(element->text));
595 element->text = self->text;
596 Py_INCREF(JOIN_OBJ(element->text));
597
598 Py_DECREF(JOIN_OBJ(element->tail));
599 element->tail = self->tail;
600 Py_INCREF(JOIN_OBJ(element->tail));
601
602 if (self->extra) {
603
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000604 if (element_resize(element, self->extra->length) < 0) {
605 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000606 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000607 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000608
609 for (i = 0; i < self->extra->length; i++) {
610 Py_INCREF(self->extra->children[i]);
611 element->extra->children[i] = self->extra->children[i];
612 }
613
614 element->extra->length = self->extra->length;
615
616 }
617
618 return (PyObject*) element;
619}
620
621static PyObject*
622element_deepcopy(ElementObject* self, PyObject* args)
623{
624 int i;
625 ElementObject* element;
626 PyObject* tag;
627 PyObject* attrib;
628 PyObject* text;
629 PyObject* tail;
630 PyObject* id;
631
632 PyObject* memo;
633 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
634 return NULL;
635
636 tag = deepcopy(self->tag, memo);
637 if (!tag)
638 return NULL;
639
640 if (self->extra) {
641 attrib = deepcopy(self->extra->attrib, memo);
642 if (!attrib) {
643 Py_DECREF(tag);
644 return NULL;
645 }
646 } else {
647 Py_INCREF(Py_None);
648 attrib = Py_None;
649 }
650
651 element = (ElementObject*) element_new(tag, attrib);
652
653 Py_DECREF(tag);
654 Py_DECREF(attrib);
655
656 if (!element)
657 return NULL;
658
659 text = deepcopy(JOIN_OBJ(self->text), memo);
660 if (!text)
661 goto error;
662 Py_DECREF(element->text);
663 element->text = JOIN_SET(text, JOIN_GET(self->text));
664
665 tail = deepcopy(JOIN_OBJ(self->tail), memo);
666 if (!tail)
667 goto error;
668 Py_DECREF(element->tail);
669 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
670
671 if (self->extra) {
672
673 if (element_resize(element, self->extra->length) < 0)
674 goto error;
675
676 for (i = 0; i < self->extra->length; i++) {
677 PyObject* child = deepcopy(self->extra->children[i], memo);
678 if (!child) {
679 element->extra->length = i;
680 goto error;
681 }
682 element->extra->children[i] = child;
683 }
684
685 element->extra->length = self->extra->length;
686
687 }
688
689 /* add object to memo dictionary (so deepcopy won't visit it again) */
Christian Heimes217cfd12007-12-02 14:31:20 +0000690 id = PyLong_FromLong((Py_uintptr_t) self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000691
692 i = PyDict_SetItem(memo, id, (PyObject*) element);
693
694 Py_DECREF(id);
695
696 if (i < 0)
697 goto error;
698
699 return (PyObject*) element;
700
701 error:
702 Py_DECREF(element);
703 return NULL;
704}
705
706LOCAL(int)
707checkpath(PyObject* tag)
708{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000709 Py_ssize_t i;
710 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000711
712 /* check if a tag contains an xpath character */
713
714#define PATHCHAR(ch) (ch == '/' || ch == '*' || ch == '[' || ch == '@')
715
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000716 if (PyUnicode_Check(tag)) {
717 Py_UNICODE *p = PyUnicode_AS_UNICODE(tag);
718 for (i = 0; i < PyUnicode_GET_SIZE(tag); i++) {
719 if (p[i] == '{')
720 check = 0;
721 else if (p[i] == '}')
722 check = 1;
723 else if (check && PATHCHAR(p[i]))
724 return 1;
725 }
726 return 0;
727 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000728 if (PyString_Check(tag)) {
729 char *p = PyString_AS_STRING(tag);
730 for (i = 0; i < PyString_GET_SIZE(tag); i++) {
731 if (p[i] == '{')
732 check = 0;
733 else if (p[i] == '}')
734 check = 1;
735 else if (check && PATHCHAR(p[i]))
736 return 1;
737 }
738 return 0;
739 }
740
741 return 1; /* unknown type; might be path expression */
742}
743
744static PyObject*
745element_find(ElementObject* self, PyObject* args)
746{
747 int i;
748
749 PyObject* tag;
750 if (!PyArg_ParseTuple(args, "O:find", &tag))
751 return NULL;
752
753 if (checkpath(tag))
754 return PyObject_CallMethod(
755 elementpath_obj, "find", "OO", self, tag
756 );
757
758 if (!self->extra)
759 Py_RETURN_NONE;
760
761 for (i = 0; i < self->extra->length; i++) {
762 PyObject* item = self->extra->children[i];
763 if (Element_CheckExact(item) &&
764 PyObject_Compare(((ElementObject*)item)->tag, tag) == 0) {
765 Py_INCREF(item);
766 return item;
767 }
768 }
769
770 Py_RETURN_NONE;
771}
772
773static PyObject*
774element_findtext(ElementObject* self, PyObject* args)
775{
776 int i;
777
778 PyObject* tag;
779 PyObject* default_value = Py_None;
780 if (!PyArg_ParseTuple(args, "O|O:findtext", &tag, &default_value))
781 return NULL;
782
783 if (checkpath(tag))
784 return PyObject_CallMethod(
785 elementpath_obj, "findtext", "OOO", self, tag, default_value
786 );
787
788 if (!self->extra) {
789 Py_INCREF(default_value);
790 return default_value;
791 }
792
793 for (i = 0; i < self->extra->length; i++) {
794 ElementObject* item = (ElementObject*) self->extra->children[i];
795 if (Element_CheckExact(item) && !PyObject_Compare(item->tag, tag)) {
796 PyObject* text = element_get_text(item);
797 if (text == Py_None)
798 return PyString_FromString("");
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000799 Py_XINCREF(text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000800 return text;
801 }
802 }
803
804 Py_INCREF(default_value);
805 return default_value;
806}
807
808static PyObject*
809element_findall(ElementObject* self, PyObject* args)
810{
811 int i;
812 PyObject* out;
813
814 PyObject* tag;
815 if (!PyArg_ParseTuple(args, "O:findall", &tag))
816 return NULL;
817
818 if (checkpath(tag))
819 return PyObject_CallMethod(
820 elementpath_obj, "findall", "OO", self, tag
821 );
822
823 out = PyList_New(0);
824 if (!out)
825 return NULL;
826
827 if (!self->extra)
828 return out;
829
830 for (i = 0; i < self->extra->length; i++) {
831 PyObject* item = self->extra->children[i];
832 if (Element_CheckExact(item) &&
833 PyObject_Compare(((ElementObject*)item)->tag, tag) == 0) {
834 if (PyList_Append(out, item) < 0) {
835 Py_DECREF(out);
836 return NULL;
837 }
838 }
839 }
840
841 return out;
842}
843
844static PyObject*
845element_get(ElementObject* self, PyObject* args)
846{
847 PyObject* value;
848
849 PyObject* key;
850 PyObject* default_value = Py_None;
851 if (!PyArg_ParseTuple(args, "O|O:get", &key, &default_value))
852 return NULL;
853
854 if (!self->extra || self->extra->attrib == Py_None)
855 value = default_value;
856 else {
857 value = PyDict_GetItem(self->extra->attrib, key);
858 if (!value)
859 value = default_value;
860 }
861
862 Py_INCREF(value);
863 return value;
864}
865
866static PyObject*
867element_getchildren(ElementObject* self, PyObject* args)
868{
869 int i;
870 PyObject* list;
871
872 if (!PyArg_ParseTuple(args, ":getchildren"))
873 return NULL;
874
875 if (!self->extra)
876 return PyList_New(0);
877
878 list = PyList_New(self->extra->length);
879 if (!list)
880 return NULL;
881
882 for (i = 0; i < self->extra->length; i++) {
883 PyObject* item = self->extra->children[i];
884 Py_INCREF(item);
885 PyList_SET_ITEM(list, i, item);
886 }
887
888 return list;
889}
890
891static PyObject*
892element_getiterator(ElementObject* self, PyObject* args)
893{
894 PyObject* result;
895
896 PyObject* tag = Py_None;
897 if (!PyArg_ParseTuple(args, "|O:getiterator", &tag))
898 return NULL;
899
900 if (!elementtree_getiterator_obj) {
901 PyErr_SetString(
902 PyExc_RuntimeError,
903 "getiterator helper not found"
904 );
905 return NULL;
906 }
907
908 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000909 if (!args)
910 return NULL;
Neal Norwitz02876df2006-02-07 06:58:52 +0000911
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000912 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
913 Py_INCREF(tag); PyTuple_SET_ITEM(args, 1, (PyObject*) tag);
914
915 result = PyObject_CallObject(elementtree_getiterator_obj, args);
916
917 Py_DECREF(args);
918
919 return result;
920}
921
922static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000923element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000924{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000925 ElementObject* self = (ElementObject*) self_;
926
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000927 if (!self->extra || index < 0 || index >= self->extra->length) {
928 PyErr_SetString(
929 PyExc_IndexError,
930 "child index out of range"
931 );
932 return NULL;
933 }
934
935 Py_INCREF(self->extra->children[index]);
936 return self->extra->children[index];
937}
938
939static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000940element_getslice(PyObject* self_, Py_ssize_t start, Py_ssize_t end)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000941{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000942 ElementObject* self = (ElementObject*) self_;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000943 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000944 PyObject* list;
945
946 if (!self->extra)
947 return PyList_New(0);
948
949 /* standard clamping */
950 if (start < 0)
951 start = 0;
952 if (end < 0)
953 end = 0;
954 if (end > self->extra->length)
955 end = self->extra->length;
956 if (start > end)
957 start = end;
958
959 list = PyList_New(end - start);
960 if (!list)
961 return NULL;
962
963 for (i = start; i < end; i++) {
964 PyObject* item = self->extra->children[i];
965 Py_INCREF(item);
966 PyList_SET_ITEM(list, i - start, item);
967 }
968
969 return list;
970}
971
972static PyObject*
973element_insert(ElementObject* self, PyObject* args)
974{
975 int i;
976
977 int index;
978 PyObject* element;
979 if (!PyArg_ParseTuple(args, "iO!:insert", &index,
980 &Element_Type, &element))
981 return NULL;
982
983 if (!self->extra)
984 element_new_extra(self, NULL);
985
986 if (index < 0)
987 index = 0;
988 if (index > self->extra->length)
989 index = self->extra->length;
990
991 if (element_resize(self, 1) < 0)
992 return NULL;
993
994 for (i = self->extra->length; i > index; i--)
995 self->extra->children[i] = self->extra->children[i-1];
996
997 Py_INCREF(element);
998 self->extra->children[index] = element;
999
1000 self->extra->length++;
1001
1002 Py_RETURN_NONE;
1003}
1004
1005static PyObject*
1006element_items(ElementObject* self, PyObject* args)
1007{
1008 if (!PyArg_ParseTuple(args, ":items"))
1009 return NULL;
1010
1011 if (!self->extra || self->extra->attrib == Py_None)
1012 return PyList_New(0);
1013
1014 return PyDict_Items(self->extra->attrib);
1015}
1016
1017static PyObject*
1018element_keys(ElementObject* self, PyObject* args)
1019{
1020 if (!PyArg_ParseTuple(args, ":keys"))
1021 return NULL;
1022
1023 if (!self->extra || self->extra->attrib == Py_None)
1024 return PyList_New(0);
1025
1026 return PyDict_Keys(self->extra->attrib);
1027}
1028
Martin v. Löwis18e16552006-02-15 17:27:45 +00001029static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001030element_length(ElementObject* self)
1031{
1032 if (!self->extra)
1033 return 0;
1034
1035 return self->extra->length;
1036}
1037
1038static PyObject*
1039element_makeelement(PyObject* self, PyObject* args, PyObject* kw)
1040{
1041 PyObject* elem;
1042
1043 PyObject* tag;
1044 PyObject* attrib;
1045 if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib))
1046 return NULL;
1047
1048 attrib = PyDict_Copy(attrib);
1049 if (!attrib)
1050 return NULL;
1051
1052 elem = element_new(tag, attrib);
1053
1054 Py_DECREF(attrib);
1055
1056 return elem;
1057}
1058
1059static PyObject*
1060element_reduce(ElementObject* self, PyObject* args)
1061{
1062 if (!PyArg_ParseTuple(args, ":__reduce__"))
1063 return NULL;
1064
1065 /* Hack alert: This method is used to work around a __copy__
1066 problem on certain 2.3 and 2.4 versions. To save time and
1067 simplify the code, we create the copy in here, and use a dummy
1068 copyelement helper to trick the copy module into doing the
1069 right thing. */
1070
1071 if (!elementtree_copyelement_obj) {
1072 PyErr_SetString(
1073 PyExc_RuntimeError,
1074 "copyelement helper not found"
1075 );
1076 return NULL;
1077 }
1078
1079 return Py_BuildValue(
1080 "O(N)", elementtree_copyelement_obj, element_copy(self, args)
1081 );
1082}
1083
1084static PyObject*
1085element_remove(ElementObject* self, PyObject* args)
1086{
1087 int i;
1088
1089 PyObject* element;
1090 if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element))
1091 return NULL;
1092
1093 if (!self->extra) {
1094 /* element has no children, so raise exception */
1095 PyErr_SetString(
1096 PyExc_ValueError,
1097 "list.remove(x): x not in list"
1098 );
1099 return NULL;
1100 }
1101
1102 for (i = 0; i < self->extra->length; i++) {
1103 if (self->extra->children[i] == element)
1104 break;
1105 if (PyObject_Compare(self->extra->children[i], element) == 0)
1106 break;
1107 }
1108
1109 if (i == self->extra->length) {
1110 /* element is not in children, so raise exception */
1111 PyErr_SetString(
1112 PyExc_ValueError,
1113 "list.remove(x): x not in list"
1114 );
1115 return NULL;
1116 }
1117
1118 Py_DECREF(self->extra->children[i]);
1119
1120 self->extra->length--;
1121
1122 for (; i < self->extra->length; i++)
1123 self->extra->children[i] = self->extra->children[i+1];
1124
1125 Py_RETURN_NONE;
1126}
1127
1128static PyObject*
1129element_repr(ElementObject* self)
1130{
Walter Dörwald7569dfe2007-05-19 21:49:49 +00001131 return PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001132}
1133
1134static PyObject*
1135element_set(ElementObject* self, PyObject* args)
1136{
1137 PyObject* attrib;
1138
1139 PyObject* key;
1140 PyObject* value;
1141 if (!PyArg_ParseTuple(args, "OO:set", &key, &value))
1142 return NULL;
1143
1144 if (!self->extra)
1145 element_new_extra(self, NULL);
1146
1147 attrib = element_get_attrib(self);
1148 if (!attrib)
1149 return NULL;
1150
1151 if (PyDict_SetItem(attrib, key, value) < 0)
1152 return NULL;
1153
1154 Py_RETURN_NONE;
1155}
1156
1157static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001158element_setslice(PyObject* self_, Py_ssize_t start, Py_ssize_t end, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001159{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001160 ElementObject* self = (ElementObject*) self_;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001161 Py_ssize_t i, new, old;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001162 PyObject* recycle = NULL;
1163
1164 if (!self->extra)
1165 element_new_extra(self, NULL);
1166
1167 /* standard clamping */
1168 if (start < 0)
1169 start = 0;
1170 if (end < 0)
1171 end = 0;
1172 if (end > self->extra->length)
1173 end = self->extra->length;
1174 if (start > end)
1175 start = end;
1176
1177 old = end - start;
1178
1179 if (item == NULL)
1180 new = 0;
1181 else if (PyList_CheckExact(item)) {
1182 new = PyList_GET_SIZE(item);
1183 } else {
1184 /* FIXME: support arbitrary sequences? */
1185 PyErr_Format(
1186 PyExc_TypeError,
Christian Heimes90aa7642007-12-19 02:45:37 +00001187 "expected list, not \"%.200s\"", Py_TYPE(item)->tp_name
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001188 );
1189 return -1;
1190 }
1191
1192 if (old > 0) {
1193 /* to avoid recursive calls to this method (via decref), move
1194 old items to the recycle bin here, and get rid of them when
1195 we're done modifying the element */
1196 recycle = PyList_New(old);
1197 for (i = 0; i < old; i++)
1198 PyList_SET_ITEM(recycle, i, self->extra->children[i + start]);
1199 }
1200
1201 if (new < old) {
1202 /* delete slice */
1203 for (i = end; i < self->extra->length; i++)
1204 self->extra->children[i + new - old] = self->extra->children[i];
1205 } else if (new > old) {
1206 /* insert slice */
1207 if (element_resize(self, new - old) < 0)
1208 return -1;
1209 for (i = self->extra->length-1; i >= end; i--)
1210 self->extra->children[i + new - old] = self->extra->children[i];
1211 }
1212
1213 /* replace the slice */
1214 for (i = 0; i < new; i++) {
1215 PyObject* element = PyList_GET_ITEM(item, i);
1216 Py_INCREF(element);
1217 self->extra->children[i + start] = element;
1218 }
1219
1220 self->extra->length += new - old;
1221
1222 /* discard the recycle bin, and everything in it */
1223 Py_XDECREF(recycle);
1224
1225 return 0;
1226}
1227
1228static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001229element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001230{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001231 ElementObject* self = (ElementObject*) self_;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001232 int i;
1233 PyObject* old;
1234
1235 if (!self->extra || index < 0 || index >= self->extra->length) {
1236 PyErr_SetString(
1237 PyExc_IndexError,
1238 "child assignment index out of range");
1239 return -1;
1240 }
1241
1242 old = self->extra->children[index];
1243
1244 if (item) {
1245 Py_INCREF(item);
1246 self->extra->children[index] = item;
1247 } else {
1248 self->extra->length--;
1249 for (i = index; i < self->extra->length; i++)
1250 self->extra->children[i] = self->extra->children[i+1];
1251 }
1252
1253 Py_DECREF(old);
1254
1255 return 0;
1256}
1257
1258static PyMethodDef element_methods[] = {
1259
1260 {"clear", (PyCFunction) element_clear, METH_VARARGS},
1261
1262 {"get", (PyCFunction) element_get, METH_VARARGS},
1263 {"set", (PyCFunction) element_set, METH_VARARGS},
1264
1265 {"find", (PyCFunction) element_find, METH_VARARGS},
1266 {"findtext", (PyCFunction) element_findtext, METH_VARARGS},
1267 {"findall", (PyCFunction) element_findall, METH_VARARGS},
1268
1269 {"append", (PyCFunction) element_append, METH_VARARGS},
1270 {"insert", (PyCFunction) element_insert, METH_VARARGS},
1271 {"remove", (PyCFunction) element_remove, METH_VARARGS},
1272
1273 {"getiterator", (PyCFunction) element_getiterator, METH_VARARGS},
1274 {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS},
1275
1276 {"items", (PyCFunction) element_items, METH_VARARGS},
1277 {"keys", (PyCFunction) element_keys, METH_VARARGS},
1278
1279 {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS},
1280
1281 {"__copy__", (PyCFunction) element_copy, METH_VARARGS},
1282 {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS},
1283
1284 /* Some 2.3 and 2.4 versions do not handle the __copy__ method on
1285 C objects correctly, so we have to fake it using a __reduce__-
1286 based hack (see the element_reduce implementation above for
1287 details). */
1288
1289 /* The behaviour has been changed in 2.3.5 and 2.4.1, so we're
1290 using a runtime test to figure out if we need to fake things
1291 or now (see the init code below). The following entry is
1292 enabled only if the hack is needed. */
1293
1294 {"!__reduce__", (PyCFunction) element_reduce, METH_VARARGS},
1295
1296 {NULL, NULL}
1297};
1298
1299static PyObject*
1300element_getattr(ElementObject* self, char* name)
1301{
1302 PyObject* res;
1303
1304 res = Py_FindMethod(element_methods, (PyObject*) self, name);
1305 if (res)
1306 return res;
1307
1308 PyErr_Clear();
1309
1310 if (strcmp(name, "tag") == 0)
1311 res = self->tag;
1312 else if (strcmp(name, "text") == 0)
1313 res = element_get_text(self);
1314 else if (strcmp(name, "tail") == 0) {
1315 res = element_get_tail(self);
1316 } else if (strcmp(name, "attrib") == 0) {
1317 if (!self->extra)
1318 element_new_extra(self, NULL);
1319 res = element_get_attrib(self);
1320 } else {
1321 PyErr_SetString(PyExc_AttributeError, name);
1322 return NULL;
1323 }
1324
1325 if (!res)
1326 return NULL;
1327
1328 Py_INCREF(res);
1329 return res;
1330}
1331
1332static int
1333element_setattr(ElementObject* self, const char* name, PyObject* value)
1334{
1335 if (value == NULL) {
1336 PyErr_SetString(
1337 PyExc_AttributeError,
1338 "can't delete element attributes"
1339 );
1340 return -1;
1341 }
1342
1343 if (strcmp(name, "tag") == 0) {
1344 Py_DECREF(self->tag);
1345 self->tag = value;
1346 Py_INCREF(self->tag);
1347 } else if (strcmp(name, "text") == 0) {
1348 Py_DECREF(JOIN_OBJ(self->text));
1349 self->text = value;
1350 Py_INCREF(self->text);
1351 } else if (strcmp(name, "tail") == 0) {
1352 Py_DECREF(JOIN_OBJ(self->tail));
1353 self->tail = value;
1354 Py_INCREF(self->tail);
1355 } else if (strcmp(name, "attrib") == 0) {
1356 if (!self->extra)
1357 element_new_extra(self, NULL);
1358 Py_DECREF(self->extra->attrib);
1359 self->extra->attrib = value;
1360 Py_INCREF(self->extra->attrib);
1361 } else {
1362 PyErr_SetString(PyExc_AttributeError, name);
1363 return -1;
1364 }
1365
1366 return 0;
1367}
1368
1369static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001370 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001371 0, /* sq_concat */
1372 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001373 element_getitem,
1374 element_getslice,
1375 element_setitem,
1376 element_setslice,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001377};
1378
Neal Norwitz227b5332006-03-22 09:28:35 +00001379static PyTypeObject Element_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001380 PyVarObject_HEAD_INIT(NULL, 0)
1381 "Element", sizeof(ElementObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001382 /* methods */
1383 (destructor)element_dealloc, /* tp_dealloc */
1384 0, /* tp_print */
1385 (getattrfunc)element_getattr, /* tp_getattr */
1386 (setattrfunc)element_setattr, /* tp_setattr */
1387 0, /* tp_compare */
1388 (reprfunc)element_repr, /* tp_repr */
1389 0, /* tp_as_number */
1390 &element_as_sequence, /* tp_as_sequence */
1391};
1392
1393/* ==================================================================== */
1394/* the tree builder type */
1395
1396typedef struct {
1397 PyObject_HEAD
1398
1399 PyObject* root; /* root node (first created node) */
1400
1401 ElementObject* this; /* current node */
1402 ElementObject* last; /* most recently created node */
1403
1404 PyObject* data; /* data collector (string or list), or NULL */
1405
1406 PyObject* stack; /* element stack */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001407 Py_ssize_t index; /* current stack size (0=empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001408
1409 /* element tracing */
1410 PyObject* events; /* list of events, or NULL if not collecting */
1411 PyObject* start_event_obj; /* event objects (NULL to ignore) */
1412 PyObject* end_event_obj;
1413 PyObject* start_ns_event_obj;
1414 PyObject* end_ns_event_obj;
1415
1416} TreeBuilderObject;
1417
Neal Norwitz227b5332006-03-22 09:28:35 +00001418static PyTypeObject TreeBuilder_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001419
Christian Heimes90aa7642007-12-19 02:45:37 +00001420#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001421
1422/* -------------------------------------------------------------------- */
1423/* constructor and destructor */
1424
1425LOCAL(PyObject*)
1426treebuilder_new(void)
1427{
1428 TreeBuilderObject* self;
1429
1430 self = PyObject_New(TreeBuilderObject, &TreeBuilder_Type);
1431 if (self == NULL)
1432 return NULL;
1433
1434 self->root = NULL;
1435
1436 Py_INCREF(Py_None);
1437 self->this = (ElementObject*) Py_None;
1438
1439 Py_INCREF(Py_None);
1440 self->last = (ElementObject*) Py_None;
1441
1442 self->data = NULL;
1443
1444 self->stack = PyList_New(20);
1445 self->index = 0;
1446
1447 self->events = NULL;
1448 self->start_event_obj = self->end_event_obj = NULL;
1449 self->start_ns_event_obj = self->end_ns_event_obj = NULL;
1450
1451 ALLOC(sizeof(TreeBuilderObject), "create treebuilder");
1452
1453 return (PyObject*) self;
1454}
1455
1456static PyObject*
Thomas Wouters73e5a5b2006-06-08 15:35:45 +00001457treebuilder(PyObject* self_, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001458{
1459 if (!PyArg_ParseTuple(args, ":TreeBuilder"))
1460 return NULL;
1461
1462 return treebuilder_new();
1463}
1464
1465static void
1466treebuilder_dealloc(TreeBuilderObject* self)
1467{
1468 Py_XDECREF(self->end_ns_event_obj);
1469 Py_XDECREF(self->start_ns_event_obj);
1470 Py_XDECREF(self->end_event_obj);
1471 Py_XDECREF(self->start_event_obj);
1472 Py_XDECREF(self->events);
1473 Py_DECREF(self->stack);
1474 Py_XDECREF(self->data);
1475 Py_DECREF(self->last);
1476 Py_DECREF(self->this);
1477 Py_XDECREF(self->root);
1478
1479 RELEASE(sizeof(TreeBuilderObject), "destroy treebuilder");
1480
1481 PyObject_Del(self);
1482}
1483
1484/* -------------------------------------------------------------------- */
1485/* handlers */
1486
1487LOCAL(PyObject*)
1488treebuilder_handle_xml(TreeBuilderObject* self, PyObject* encoding,
1489 PyObject* standalone)
1490{
1491 Py_RETURN_NONE;
1492}
1493
1494LOCAL(PyObject*)
1495treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
1496 PyObject* attrib)
1497{
1498 PyObject* node;
1499 PyObject* this;
1500
1501 if (self->data) {
1502 if (self->this == self->last) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001503 Py_DECREF(JOIN_OBJ(self->last->text));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001504 self->last->text = JOIN_SET(
1505 self->data, PyList_CheckExact(self->data)
1506 );
1507 } else {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001508 Py_DECREF(JOIN_OBJ(self->last->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001509 self->last->tail = JOIN_SET(
1510 self->data, PyList_CheckExact(self->data)
1511 );
1512 }
1513 self->data = NULL;
1514 }
1515
1516 node = element_new(tag, attrib);
1517 if (!node)
1518 return NULL;
1519
1520 this = (PyObject*) self->this;
1521
1522 if (this != Py_None) {
1523 if (element_add_subelement((ElementObject*) this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001524 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001525 } else {
1526 if (self->root) {
1527 PyErr_SetString(
1528 PyExc_SyntaxError,
1529 "multiple elements on top level"
1530 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001531 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001532 }
1533 Py_INCREF(node);
1534 self->root = node;
1535 }
1536
1537 if (self->index < PyList_GET_SIZE(self->stack)) {
1538 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001539 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001540 Py_INCREF(this);
1541 } else {
1542 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001543 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001544 }
1545 self->index++;
1546
1547 Py_DECREF(this);
1548 Py_INCREF(node);
1549 self->this = (ElementObject*) node;
1550
1551 Py_DECREF(self->last);
1552 Py_INCREF(node);
1553 self->last = (ElementObject*) node;
1554
1555 if (self->start_event_obj) {
1556 PyObject* res;
1557 PyObject* action = self->start_event_obj;
1558 res = PyTuple_New(2);
1559 if (res) {
1560 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
1561 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
1562 PyList_Append(self->events, res);
1563 Py_DECREF(res);
1564 } else
1565 PyErr_Clear(); /* FIXME: propagate error */
1566 }
1567
1568 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001569
1570 error:
1571 Py_DECREF(node);
1572 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001573}
1574
1575LOCAL(PyObject*)
1576treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
1577{
1578 if (!self->data) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001579 if (self->last == (ElementObject*) Py_None) {
1580 /* ignore calls to data before the first call to start */
1581 Py_RETURN_NONE;
1582 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001583 /* store the first item as is */
1584 Py_INCREF(data); self->data = data;
1585 } else {
1586 /* more than one item; use a list to collect items */
Christian Heimes90aa7642007-12-19 02:45:37 +00001587 if (PyString_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001588 PyString_CheckExact(data) && PyString_GET_SIZE(data) == 1) {
1589 /* expat often generates single character data sections; handle
1590 the most common case by resizing the existing string... */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001591 Py_ssize_t size = PyString_GET_SIZE(self->data);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001592 if (_PyString_Resize(&self->data, size + 1) < 0)
1593 return NULL;
1594 PyString_AS_STRING(self->data)[size] = PyString_AS_STRING(data)[0];
1595 } else if (PyList_CheckExact(self->data)) {
1596 if (PyList_Append(self->data, data) < 0)
1597 return NULL;
1598 } else {
1599 PyObject* list = PyList_New(2);
1600 if (!list)
1601 return NULL;
1602 PyList_SET_ITEM(list, 0, self->data);
1603 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
1604 self->data = list;
1605 }
1606 }
1607
1608 Py_RETURN_NONE;
1609}
1610
1611LOCAL(PyObject*)
1612treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
1613{
1614 PyObject* item;
1615
1616 if (self->data) {
1617 if (self->this == self->last) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001618 Py_DECREF(JOIN_OBJ(self->last->text));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001619 self->last->text = JOIN_SET(
1620 self->data, PyList_CheckExact(self->data)
1621 );
1622 } else {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001623 Py_DECREF(JOIN_OBJ(self->last->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001624 self->last->tail = JOIN_SET(
1625 self->data, PyList_CheckExact(self->data)
1626 );
1627 }
1628 self->data = NULL;
1629 }
1630
1631 if (self->index == 0) {
1632 PyErr_SetString(
1633 PyExc_IndexError,
1634 "pop from empty stack"
1635 );
1636 return NULL;
1637 }
1638
1639 self->index--;
1640
1641 item = PyList_GET_ITEM(self->stack, self->index);
1642 Py_INCREF(item);
1643
1644 Py_DECREF(self->last);
1645
1646 self->last = (ElementObject*) self->this;
1647 self->this = (ElementObject*) item;
1648
1649 if (self->end_event_obj) {
1650 PyObject* res;
1651 PyObject* action = self->end_event_obj;
1652 PyObject* node = (PyObject*) self->last;
1653 res = PyTuple_New(2);
1654 if (res) {
1655 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
1656 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
1657 PyList_Append(self->events, res);
1658 Py_DECREF(res);
1659 } else
1660 PyErr_Clear(); /* FIXME: propagate error */
1661 }
1662
1663 Py_INCREF(self->last);
1664 return (PyObject*) self->last;
1665}
1666
1667LOCAL(void)
1668treebuilder_handle_namespace(TreeBuilderObject* self, int start,
1669 const char* prefix, const char *uri)
1670{
1671 PyObject* res;
1672 PyObject* action;
1673 PyObject* parcel;
1674
1675 if (!self->events)
1676 return;
1677
1678 if (start) {
1679 if (!self->start_ns_event_obj)
1680 return;
1681 action = self->start_ns_event_obj;
1682 /* FIXME: prefix and uri use utf-8 encoding! */
1683 parcel = Py_BuildValue("ss", (prefix) ? prefix : "", uri);
1684 if (!parcel)
1685 return;
1686 Py_INCREF(action);
1687 } else {
1688 if (!self->end_ns_event_obj)
1689 return;
1690 action = self->end_ns_event_obj;
1691 Py_INCREF(action);
1692 parcel = Py_None;
1693 Py_INCREF(parcel);
1694 }
1695
1696 res = PyTuple_New(2);
1697
1698 if (res) {
1699 PyTuple_SET_ITEM(res, 0, action);
1700 PyTuple_SET_ITEM(res, 1, parcel);
1701 PyList_Append(self->events, res);
1702 Py_DECREF(res);
1703 } else
1704 PyErr_Clear(); /* FIXME: propagate error */
1705}
1706
1707/* -------------------------------------------------------------------- */
1708/* methods (in alphabetical order) */
1709
1710static PyObject*
1711treebuilder_data(TreeBuilderObject* self, PyObject* args)
1712{
1713 PyObject* data;
1714 if (!PyArg_ParseTuple(args, "O:data", &data))
1715 return NULL;
1716
1717 return treebuilder_handle_data(self, data);
1718}
1719
1720static PyObject*
1721treebuilder_end(TreeBuilderObject* self, PyObject* args)
1722{
1723 PyObject* tag;
1724 if (!PyArg_ParseTuple(args, "O:end", &tag))
1725 return NULL;
1726
1727 return treebuilder_handle_end(self, tag);
1728}
1729
1730LOCAL(PyObject*)
1731treebuilder_done(TreeBuilderObject* self)
1732{
1733 PyObject* res;
1734
1735 /* FIXME: check stack size? */
1736
1737 if (self->root)
1738 res = self->root;
1739 else
1740 res = Py_None;
1741
1742 Py_INCREF(res);
1743 return res;
1744}
1745
1746static PyObject*
1747treebuilder_close(TreeBuilderObject* self, PyObject* args)
1748{
1749 if (!PyArg_ParseTuple(args, ":close"))
1750 return NULL;
1751
1752 return treebuilder_done(self);
1753}
1754
1755static PyObject*
1756treebuilder_start(TreeBuilderObject* self, PyObject* args)
1757{
1758 PyObject* tag;
1759 PyObject* attrib = Py_None;
1760 if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib))
1761 return NULL;
1762
1763 return treebuilder_handle_start(self, tag, attrib);
1764}
1765
1766static PyObject*
1767treebuilder_xml(TreeBuilderObject* self, PyObject* args)
1768{
1769 PyObject* encoding;
1770 PyObject* standalone;
1771 if (!PyArg_ParseTuple(args, "OO:xml", &encoding, &standalone))
1772 return NULL;
1773
1774 return treebuilder_handle_xml(self, encoding, standalone);
1775}
1776
1777static PyMethodDef treebuilder_methods[] = {
1778 {"data", (PyCFunction) treebuilder_data, METH_VARARGS},
1779 {"start", (PyCFunction) treebuilder_start, METH_VARARGS},
1780 {"end", (PyCFunction) treebuilder_end, METH_VARARGS},
1781 {"xml", (PyCFunction) treebuilder_xml, METH_VARARGS},
1782 {"close", (PyCFunction) treebuilder_close, METH_VARARGS},
1783 {NULL, NULL}
1784};
1785
1786static PyObject*
1787treebuilder_getattr(TreeBuilderObject* self, char* name)
1788{
1789 return Py_FindMethod(treebuilder_methods, (PyObject*) self, name);
1790}
1791
Neal Norwitz227b5332006-03-22 09:28:35 +00001792static PyTypeObject TreeBuilder_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001793 PyVarObject_HEAD_INIT(NULL, 0)
1794 "TreeBuilder", sizeof(TreeBuilderObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001795 /* methods */
1796 (destructor)treebuilder_dealloc, /* tp_dealloc */
1797 0, /* tp_print */
1798 (getattrfunc)treebuilder_getattr, /* tp_getattr */
1799};
1800
1801/* ==================================================================== */
1802/* the expat interface */
1803
1804#if defined(USE_EXPAT)
1805
1806#include "expat.h"
1807
1808#if defined(USE_PYEXPAT_CAPI)
1809#include "pyexpat.h"
1810static struct PyExpat_CAPI* expat_capi;
1811#define EXPAT(func) (expat_capi->func)
1812#else
1813#define EXPAT(func) (XML_##func)
1814#endif
1815
1816typedef struct {
1817 PyObject_HEAD
1818
1819 XML_Parser parser;
1820
1821 PyObject* target;
1822 PyObject* entity;
1823
1824 PyObject* names;
1825
1826 PyObject* handle_xml;
1827 PyObject* handle_start;
1828 PyObject* handle_data;
1829 PyObject* handle_end;
1830
1831 PyObject* handle_comment;
1832 PyObject* handle_pi;
1833
1834} XMLParserObject;
1835
Neal Norwitz227b5332006-03-22 09:28:35 +00001836static PyTypeObject XMLParser_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001837
1838/* helpers */
1839
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001840LOCAL(PyObject*)
1841makeuniversal(XMLParserObject* self, const char* string)
1842{
1843 /* convert a UTF-8 tag/attribute name from the expat parser
1844 to a universal name string */
1845
1846 int size = strlen(string);
1847 PyObject* key;
1848 PyObject* value;
1849
1850 /* look the 'raw' name up in the names dictionary */
1851 key = PyString_FromStringAndSize(string, size);
1852 if (!key)
1853 return NULL;
1854
1855 value = PyDict_GetItem(self->names, key);
1856
1857 if (value) {
1858 Py_INCREF(value);
1859 } else {
1860 /* new name. convert to universal name, and decode as
1861 necessary */
1862
1863 PyObject* tag;
1864 char* p;
1865 int i;
1866
1867 /* look for namespace separator */
1868 for (i = 0; i < size; i++)
1869 if (string[i] == '}')
1870 break;
1871 if (i != size) {
1872 /* convert to universal name */
1873 tag = PyString_FromStringAndSize(NULL, size+1);
1874 p = PyString_AS_STRING(tag);
1875 p[0] = '{';
1876 memcpy(p+1, string, size);
1877 size++;
1878 } else {
1879 /* plain name; use key as tag */
1880 Py_INCREF(key);
1881 tag = key;
1882 }
1883
1884 /* decode universal name */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001885 p = PyString_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00001886 value = PyUnicode_DecodeUTF8(p, size, "strict");
1887 Py_DECREF(tag);
1888 if (!value) {
1889 Py_DECREF(key);
1890 return NULL;
1891 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001892
1893 /* add to names dictionary */
1894 if (PyDict_SetItem(self->names, key, value) < 0) {
1895 Py_DECREF(key);
1896 Py_DECREF(value);
1897 return NULL;
1898 }
1899 }
1900
1901 Py_DECREF(key);
1902 return value;
1903}
1904
1905/* -------------------------------------------------------------------- */
1906/* handlers */
1907
1908static void
1909expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
1910 int data_len)
1911{
1912 PyObject* key;
1913 PyObject* value;
1914 PyObject* res;
1915
1916 if (data_len < 2 || data_in[0] != '&')
1917 return;
1918
Neal Norwitz0269b912007-08-08 06:56:02 +00001919 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001920 if (!key)
1921 return;
1922
1923 value = PyDict_GetItem(self->entity, key);
1924
1925 if (value) {
1926 if (TreeBuilder_CheckExact(self->target))
1927 res = treebuilder_handle_data(
1928 (TreeBuilderObject*) self->target, value
1929 );
1930 else if (self->handle_data)
1931 res = PyObject_CallFunction(self->handle_data, "O", value);
1932 else
1933 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001934 Py_XDECREF(res);
1935 } else {
1936 PyErr_Format(
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001937 PyExc_SyntaxError, "undefined entity &%s;: line %ld, column %ld",
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001938 PyString_AS_STRING(key),
1939 EXPAT(GetErrorLineNumber)(self->parser),
1940 EXPAT(GetErrorColumnNumber)(self->parser)
1941 );
1942 }
1943
1944 Py_DECREF(key);
1945}
1946
1947static void
1948expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
1949 const XML_Char **attrib_in)
1950{
1951 PyObject* res;
1952 PyObject* tag;
1953 PyObject* attrib;
1954 int ok;
1955
1956 /* tag name */
1957 tag = makeuniversal(self, tag_in);
1958 if (!tag)
1959 return; /* parser will look for errors */
1960
1961 /* attributes */
1962 if (attrib_in[0]) {
1963 attrib = PyDict_New();
1964 if (!attrib)
1965 return;
1966 while (attrib_in[0] && attrib_in[1]) {
1967 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00001968 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001969 if (!key || !value) {
1970 Py_XDECREF(value);
1971 Py_XDECREF(key);
1972 Py_DECREF(attrib);
1973 return;
1974 }
1975 ok = PyDict_SetItem(attrib, key, value);
1976 Py_DECREF(value);
1977 Py_DECREF(key);
1978 if (ok < 0) {
1979 Py_DECREF(attrib);
1980 return;
1981 }
1982 attrib_in += 2;
1983 }
1984 } else {
1985 Py_INCREF(Py_None);
1986 attrib = Py_None;
1987 }
1988
1989 if (TreeBuilder_CheckExact(self->target))
1990 /* shortcut */
1991 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
1992 tag, attrib);
1993 else if (self->handle_start)
1994 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
1995 else
1996 res = NULL;
1997
1998 Py_DECREF(tag);
1999 Py_DECREF(attrib);
2000
2001 Py_XDECREF(res);
2002}
2003
2004static void
2005expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2006 int data_len)
2007{
2008 PyObject* data;
2009 PyObject* res;
2010
Neal Norwitz0269b912007-08-08 06:56:02 +00002011 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002012 if (!data)
2013 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002014
2015 if (TreeBuilder_CheckExact(self->target))
2016 /* shortcut */
2017 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
2018 else if (self->handle_data)
2019 res = PyObject_CallFunction(self->handle_data, "O", data);
2020 else
2021 res = NULL;
2022
2023 Py_DECREF(data);
2024
2025 Py_XDECREF(res);
2026}
2027
2028static void
2029expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
2030{
2031 PyObject* tag;
2032 PyObject* res = NULL;
2033
2034 if (TreeBuilder_CheckExact(self->target))
2035 /* shortcut */
2036 /* the standard tree builder doesn't look at the end tag */
2037 res = treebuilder_handle_end(
2038 (TreeBuilderObject*) self->target, Py_None
2039 );
2040 else if (self->handle_end) {
2041 tag = makeuniversal(self, tag_in);
2042 if (tag) {
2043 res = PyObject_CallFunction(self->handle_end, "O", tag);
2044 Py_DECREF(tag);
2045 }
2046 }
2047
2048 Py_XDECREF(res);
2049}
2050
2051static void
2052expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
2053 const XML_Char *uri)
2054{
2055 treebuilder_handle_namespace(
2056 (TreeBuilderObject*) self->target, 1, prefix, uri
2057 );
2058}
2059
2060static void
2061expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
2062{
2063 treebuilder_handle_namespace(
2064 (TreeBuilderObject*) self->target, 0, NULL, NULL
2065 );
2066}
2067
2068static void
2069expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
2070{
2071 PyObject* comment;
2072 PyObject* res;
2073
2074 if (self->handle_comment) {
Neal Norwitz0269b912007-08-08 06:56:02 +00002075 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002076 if (comment) {
2077 res = PyObject_CallFunction(self->handle_comment, "O", comment);
2078 Py_XDECREF(res);
2079 Py_DECREF(comment);
2080 }
2081 }
2082}
2083
2084static void
2085expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
2086 const XML_Char* data_in)
2087{
2088 PyObject* target;
2089 PyObject* data;
2090 PyObject* res;
2091
2092 if (self->handle_pi) {
Neal Norwitz0269b912007-08-08 06:56:02 +00002093 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
2094 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002095 if (target && data) {
2096 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
2097 Py_XDECREF(res);
2098 Py_DECREF(data);
2099 Py_DECREF(target);
2100 } else {
2101 Py_XDECREF(data);
2102 Py_XDECREF(target);
2103 }
2104 }
2105}
2106
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002107static int
2108expat_unknown_encoding_handler(XMLParserObject *self, const XML_Char *name,
2109 XML_Encoding *info)
2110{
2111 PyObject* u;
2112 Py_UNICODE* p;
2113 unsigned char s[256];
2114 int i;
2115
2116 memset(info, 0, sizeof(XML_Encoding));
2117
2118 for (i = 0; i < 256; i++)
2119 s[i] = i;
2120
Fredrik Lundhc3389992005-12-25 11:40:19 +00002121 u = PyUnicode_Decode((char*) s, 256, name, "replace");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002122 if (!u)
2123 return XML_STATUS_ERROR;
2124
2125 if (PyUnicode_GET_SIZE(u) != 256) {
2126 Py_DECREF(u);
2127 return XML_STATUS_ERROR;
2128 }
2129
2130 p = PyUnicode_AS_UNICODE(u);
2131
2132 for (i = 0; i < 256; i++) {
2133 if (p[i] != Py_UNICODE_REPLACEMENT_CHARACTER)
2134 info->map[i] = p[i];
2135 else
2136 info->map[i] = -1;
2137 }
2138
2139 Py_DECREF(u);
2140
2141 return XML_STATUS_OK;
2142}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002143
2144/* -------------------------------------------------------------------- */
2145/* constructor and destructor */
2146
2147static PyObject*
Thomas Wouters73e5a5b2006-06-08 15:35:45 +00002148xmlparser(PyObject* self_, PyObject* args, PyObject* kw)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002149{
2150 XMLParserObject* self;
2151 /* FIXME: does this need to be static? */
2152 static XML_Memory_Handling_Suite memory_handler;
2153
2154 PyObject* target = NULL;
2155 char* encoding = NULL;
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +00002156 static char* kwlist[] = { "target", "encoding", NULL };
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002157 if (!PyArg_ParseTupleAndKeywords(args, kw, "|Oz:XMLParser", kwlist,
2158 &target, &encoding))
2159 return NULL;
2160
2161#if defined(USE_PYEXPAT_CAPI)
2162 if (!expat_capi) {
2163 PyErr_SetString(
2164 PyExc_RuntimeError, "cannot load dispatch table from pyexpat"
2165 );
2166 return NULL;
2167 }
2168#endif
2169
2170 self = PyObject_New(XMLParserObject, &XMLParser_Type);
2171 if (self == NULL)
2172 return NULL;
2173
2174 self->entity = PyDict_New();
2175 if (!self->entity) {
2176 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002177 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002178 }
2179
2180 self->names = PyDict_New();
2181 if (!self->names) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002182 PyObject_Del(self->entity);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002183 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002184 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002185 }
2186
2187 memory_handler.malloc_fcn = PyObject_Malloc;
2188 memory_handler.realloc_fcn = PyObject_Realloc;
2189 memory_handler.free_fcn = PyObject_Free;
2190
2191 self->parser = EXPAT(ParserCreate_MM)(encoding, &memory_handler, "}");
2192 if (!self->parser) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002193 PyObject_Del(self->names);
2194 PyObject_Del(self->entity);
2195 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002196 PyErr_NoMemory();
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002197 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002198 }
2199
2200 /* setup target handlers */
2201 if (!target) {
2202 target = treebuilder_new();
2203 if (!target) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002204 EXPAT(ParserFree)(self->parser);
2205 PyObject_Del(self->names);
2206 PyObject_Del(self->entity);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002207 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002208 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002209 }
2210 } else
2211 Py_INCREF(target);
2212 self->target = target;
2213
2214 self->handle_xml = PyObject_GetAttrString(target, "xml");
2215 self->handle_start = PyObject_GetAttrString(target, "start");
2216 self->handle_data = PyObject_GetAttrString(target, "data");
2217 self->handle_end = PyObject_GetAttrString(target, "end");
2218 self->handle_comment = PyObject_GetAttrString(target, "comment");
2219 self->handle_pi = PyObject_GetAttrString(target, "pi");
2220
2221 PyErr_Clear();
2222
2223 /* configure parser */
2224 EXPAT(SetUserData)(self->parser, self);
2225 EXPAT(SetElementHandler)(
2226 self->parser,
2227 (XML_StartElementHandler) expat_start_handler,
2228 (XML_EndElementHandler) expat_end_handler
2229 );
2230 EXPAT(SetDefaultHandlerExpand)(
2231 self->parser,
2232 (XML_DefaultHandler) expat_default_handler
2233 );
2234 EXPAT(SetCharacterDataHandler)(
2235 self->parser,
2236 (XML_CharacterDataHandler) expat_data_handler
2237 );
2238 if (self->handle_comment)
2239 EXPAT(SetCommentHandler)(
2240 self->parser,
2241 (XML_CommentHandler) expat_comment_handler
2242 );
2243 if (self->handle_pi)
2244 EXPAT(SetProcessingInstructionHandler)(
2245 self->parser,
2246 (XML_ProcessingInstructionHandler) expat_pi_handler
2247 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002248 EXPAT(SetUnknownEncodingHandler)(
2249 self->parser,
2250 (XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL
2251 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002252
2253 ALLOC(sizeof(XMLParserObject), "create expatparser");
2254
2255 return (PyObject*) self;
2256}
2257
2258static void
2259xmlparser_dealloc(XMLParserObject* self)
2260{
2261 EXPAT(ParserFree)(self->parser);
2262
2263 Py_XDECREF(self->handle_pi);
2264 Py_XDECREF(self->handle_comment);
2265 Py_XDECREF(self->handle_end);
2266 Py_XDECREF(self->handle_data);
2267 Py_XDECREF(self->handle_start);
2268 Py_XDECREF(self->handle_xml);
2269
2270 Py_DECREF(self->target);
2271 Py_DECREF(self->entity);
2272 Py_DECREF(self->names);
2273
2274 RELEASE(sizeof(XMLParserObject), "destroy expatparser");
2275
2276 PyObject_Del(self);
2277}
2278
2279/* -------------------------------------------------------------------- */
2280/* methods (in alphabetical order) */
2281
2282LOCAL(PyObject*)
2283expat_parse(XMLParserObject* self, char* data, int data_len, int final)
2284{
2285 int ok;
2286
2287 ok = EXPAT(Parse)(self->parser, data, data_len, final);
2288
2289 if (PyErr_Occurred())
2290 return NULL;
2291
2292 if (!ok) {
2293 PyErr_Format(
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002294 PyExc_SyntaxError, "%s: line %ld, column %ld",
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002295 EXPAT(ErrorString)(EXPAT(GetErrorCode)(self->parser)),
2296 EXPAT(GetErrorLineNumber)(self->parser),
2297 EXPAT(GetErrorColumnNumber)(self->parser)
2298 );
2299 return NULL;
2300 }
2301
2302 Py_RETURN_NONE;
2303}
2304
2305static PyObject*
2306xmlparser_close(XMLParserObject* self, PyObject* args)
2307{
2308 /* end feeding data to parser */
2309
2310 PyObject* res;
2311 if (!PyArg_ParseTuple(args, ":close"))
2312 return NULL;
2313
2314 res = expat_parse(self, "", 0, 1);
2315
2316 if (res && TreeBuilder_CheckExact(self->target)) {
2317 Py_DECREF(res);
2318 return treebuilder_done((TreeBuilderObject*) self->target);
2319 }
2320
2321 return res;
2322}
2323
2324static PyObject*
2325xmlparser_feed(XMLParserObject* self, PyObject* args)
2326{
2327 /* feed data to parser */
2328
2329 char* data;
2330 int data_len;
2331 if (!PyArg_ParseTuple(args, "s#:feed", &data, &data_len))
2332 return NULL;
2333
2334 return expat_parse(self, data, data_len, 0);
2335}
2336
2337static PyObject*
2338xmlparser_parse(XMLParserObject* self, PyObject* args)
2339{
2340 /* (internal) parse until end of input stream */
2341
2342 PyObject* reader;
2343 PyObject* buffer;
2344 PyObject* res;
2345
2346 PyObject* fileobj;
2347 if (!PyArg_ParseTuple(args, "O:_parse", &fileobj))
2348 return NULL;
2349
2350 reader = PyObject_GetAttrString(fileobj, "read");
2351 if (!reader)
2352 return NULL;
2353
2354 /* read from open file object */
2355 for (;;) {
2356
2357 buffer = PyObject_CallFunction(reader, "i", 64*1024);
2358
2359 if (!buffer) {
2360 /* read failed (e.g. due to KeyboardInterrupt) */
2361 Py_DECREF(reader);
2362 return NULL;
2363 }
2364
2365 if (!PyString_CheckExact(buffer) || PyString_GET_SIZE(buffer) == 0) {
2366 Py_DECREF(buffer);
2367 break;
2368 }
2369
2370 res = expat_parse(
2371 self, PyString_AS_STRING(buffer), PyString_GET_SIZE(buffer), 0
2372 );
2373
2374 Py_DECREF(buffer);
2375
2376 if (!res) {
2377 Py_DECREF(reader);
2378 return NULL;
2379 }
2380 Py_DECREF(res);
2381
2382 }
2383
2384 Py_DECREF(reader);
2385
2386 res = expat_parse(self, "", 0, 1);
2387
2388 if (res && TreeBuilder_CheckExact(self->target)) {
2389 Py_DECREF(res);
2390 return treebuilder_done((TreeBuilderObject*) self->target);
2391 }
2392
2393 return res;
2394}
2395
2396static PyObject*
2397xmlparser_setevents(XMLParserObject* self, PyObject* args)
2398{
2399 /* activate element event reporting */
2400
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002401 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002402 TreeBuilderObject* target;
2403
2404 PyObject* events; /* event collector */
2405 PyObject* event_set = Py_None;
2406 if (!PyArg_ParseTuple(args, "O!|O:_setevents", &PyList_Type, &events,
2407 &event_set))
2408 return NULL;
2409
2410 if (!TreeBuilder_CheckExact(self->target)) {
2411 PyErr_SetString(
2412 PyExc_TypeError,
2413 "event handling only supported for cElementTree.Treebuilder "
2414 "targets"
2415 );
2416 return NULL;
2417 }
2418
2419 target = (TreeBuilderObject*) self->target;
2420
2421 Py_INCREF(events);
2422 Py_XDECREF(target->events);
2423 target->events = events;
2424
2425 /* clear out existing events */
2426 Py_XDECREF(target->start_event_obj); target->start_event_obj = NULL;
2427 Py_XDECREF(target->end_event_obj); target->end_event_obj = NULL;
2428 Py_XDECREF(target->start_ns_event_obj); target->start_ns_event_obj = NULL;
2429 Py_XDECREF(target->end_ns_event_obj); target->end_ns_event_obj = NULL;
2430
2431 if (event_set == Py_None) {
2432 /* default is "end" only */
2433 target->end_event_obj = PyString_FromString("end");
2434 Py_RETURN_NONE;
2435 }
2436
2437 if (!PyTuple_Check(event_set)) /* FIXME: handle arbitrary sequences */
2438 goto error;
2439
2440 for (i = 0; i < PyTuple_GET_SIZE(event_set); i++) {
2441 PyObject* item = PyTuple_GET_ITEM(event_set, i);
2442 char* event;
2443 if (!PyString_Check(item))
2444 goto error;
2445 event = PyString_AS_STRING(item);
2446 if (strcmp(event, "start") == 0) {
2447 Py_INCREF(item);
2448 target->start_event_obj = item;
2449 } else if (strcmp(event, "end") == 0) {
2450 Py_INCREF(item);
2451 Py_XDECREF(target->end_event_obj);
2452 target->end_event_obj = item;
2453 } else if (strcmp(event, "start-ns") == 0) {
2454 Py_INCREF(item);
2455 Py_XDECREF(target->start_ns_event_obj);
2456 target->start_ns_event_obj = item;
2457 EXPAT(SetNamespaceDeclHandler)(
2458 self->parser,
2459 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2460 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2461 );
2462 } else if (strcmp(event, "end-ns") == 0) {
2463 Py_INCREF(item);
2464 Py_XDECREF(target->end_ns_event_obj);
2465 target->end_ns_event_obj = item;
2466 EXPAT(SetNamespaceDeclHandler)(
2467 self->parser,
2468 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2469 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2470 );
2471 } else {
2472 PyErr_Format(
2473 PyExc_ValueError,
2474 "unknown event '%s'", event
2475 );
2476 return NULL;
2477 }
2478 }
2479
2480 Py_RETURN_NONE;
2481
2482 error:
2483 PyErr_SetString(
2484 PyExc_TypeError,
2485 "invalid event tuple"
2486 );
2487 return NULL;
2488}
2489
2490static PyMethodDef xmlparser_methods[] = {
2491 {"feed", (PyCFunction) xmlparser_feed, METH_VARARGS},
2492 {"close", (PyCFunction) xmlparser_close, METH_VARARGS},
2493 {"_parse", (PyCFunction) xmlparser_parse, METH_VARARGS},
2494 {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
2495 {NULL, NULL}
2496};
2497
2498static PyObject*
2499xmlparser_getattr(XMLParserObject* self, char* name)
2500{
2501 PyObject* res;
2502
2503 res = Py_FindMethod(xmlparser_methods, (PyObject*) self, name);
2504 if (res)
2505 return res;
2506
2507 PyErr_Clear();
2508
2509 if (strcmp(name, "entity") == 0)
2510 res = self->entity;
2511 else if (strcmp(name, "target") == 0)
2512 res = self->target;
2513 else if (strcmp(name, "version") == 0) {
2514 char buffer[100];
2515 sprintf(buffer, "Expat %d.%d.%d", XML_MAJOR_VERSION,
2516 XML_MINOR_VERSION, XML_MICRO_VERSION);
2517 return PyString_FromString(buffer);
2518 } else {
2519 PyErr_SetString(PyExc_AttributeError, name);
2520 return NULL;
2521 }
2522
2523 Py_INCREF(res);
2524 return res;
2525}
2526
Neal Norwitz227b5332006-03-22 09:28:35 +00002527static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002528 PyVarObject_HEAD_INIT(NULL, 0)
2529 "XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002530 /* methods */
2531 (destructor)xmlparser_dealloc, /* tp_dealloc */
2532 0, /* tp_print */
2533 (getattrfunc)xmlparser_getattr, /* tp_getattr */
2534};
2535
2536#endif
2537
2538/* ==================================================================== */
2539/* python module interface */
2540
2541static PyMethodDef _functions[] = {
2542 {"Element", (PyCFunction) element, METH_VARARGS|METH_KEYWORDS},
2543 {"SubElement", (PyCFunction) subelement, METH_VARARGS|METH_KEYWORDS},
2544 {"TreeBuilder", (PyCFunction) treebuilder, METH_VARARGS},
2545#if defined(USE_EXPAT)
2546 {"XMLParser", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
2547 {"XMLTreeBuilder", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
2548#endif
2549 {NULL, NULL}
2550};
2551
Neal Norwitzf6657e62006-12-28 04:47:50 +00002552PyMODINIT_FUNC
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002553init_elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002554{
2555 PyObject* m;
2556 PyObject* g;
2557 char* bootstrap;
2558#if defined(USE_PYEXPAT_CAPI)
2559 struct PyExpat_CAPI* capi;
2560#endif
2561
2562 /* Patch object type */
Christian Heimes90aa7642007-12-19 02:45:37 +00002563 Py_TYPE(&Element_Type) = Py_TYPE(&TreeBuilder_Type) = &PyType_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002564#if defined(USE_EXPAT)
Christian Heimes90aa7642007-12-19 02:45:37 +00002565 Py_TYPE(&XMLParser_Type) = &PyType_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002566#endif
2567
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002568 m = Py_InitModule("_elementtree", _functions);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002569 if (!m)
2570 return;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002571
2572 /* python glue code */
2573
2574 g = PyDict_New();
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002575 if (!g)
2576 return;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002577
2578 PyDict_SetItemString(g, "__builtins__", PyEval_GetBuiltins());
2579
2580 bootstrap = (
2581
2582#if (PY_VERSION_HEX >= 0x02020000 && PY_VERSION_HEX < 0x02030000)
2583 "from __future__ import generators\n" /* enable yield under 2.2 */
2584#endif
2585
2586 "from copy import copy, deepcopy\n"
2587
2588 "try:\n"
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002589 " from xml.etree import ElementTree\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002590 "except ImportError:\n"
2591 " import ElementTree\n"
2592 "ET = ElementTree\n"
2593 "del ElementTree\n"
2594
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002595 "import _elementtree as cElementTree\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002596
2597 "try:\n" /* check if copy works as is */
2598 " copy(cElementTree.Element('x'))\n"
2599 "except:\n"
2600 " def copyelement(elem):\n"
2601 " return elem\n"
2602
2603 "def Comment(text=None):\n" /* public */
2604 " element = cElementTree.Element(ET.Comment)\n"
2605 " element.text = text\n"
2606 " return element\n"
2607 "cElementTree.Comment = Comment\n"
2608
2609 "class ElementTree(ET.ElementTree):\n" /* public */
2610 " def parse(self, source, parser=None):\n"
2611 " if not hasattr(source, 'read'):\n"
2612 " source = open(source, 'rb')\n"
2613 " if parser is not None:\n"
2614 " while 1:\n"
2615 " data = source.read(65536)\n"
2616 " if not data:\n"
2617 " break\n"
2618 " parser.feed(data)\n"
2619 " self._root = parser.close()\n"
2620 " else:\n"
2621 " parser = cElementTree.XMLParser()\n"
2622 " self._root = parser._parse(source)\n"
2623 " return self._root\n"
2624 "cElementTree.ElementTree = ElementTree\n"
2625
2626 "def getiterator(node, tag=None):\n" /* helper */
2627 " if tag == '*':\n"
2628 " tag = None\n"
2629#if (PY_VERSION_HEX < 0x02020000)
2630 " nodes = []\n" /* 2.1 doesn't have yield */
2631 " if tag is None or node.tag == tag:\n"
2632 " nodes.append(node)\n"
2633 " for node in node:\n"
2634 " nodes.extend(getiterator(node, tag))\n"
2635 " return nodes\n"
2636#else
2637 " if tag is None or node.tag == tag:\n"
2638 " yield node\n"
2639 " for node in node:\n"
2640 " for node in getiterator(node, tag):\n"
2641 " yield node\n"
2642#endif
2643
2644 "def parse(source, parser=None):\n" /* public */
2645 " tree = ElementTree()\n"
2646 " tree.parse(source, parser)\n"
2647 " return tree\n"
2648 "cElementTree.parse = parse\n"
2649
2650#if (PY_VERSION_HEX < 0x02020000)
2651 "if hasattr(ET, 'iterparse'):\n"
2652 " cElementTree.iterparse = ET.iterparse\n" /* delegate on 2.1 */
2653#else
2654 "class iterparse(object):\n"
2655 " root = None\n"
2656 " def __init__(self, file, events=None):\n"
2657 " if not hasattr(file, 'read'):\n"
2658 " file = open(file, 'rb')\n"
2659 " self._file = file\n"
2660 " self._events = events\n"
2661 " def __iter__(self):\n"
2662 " events = []\n"
2663 " b = cElementTree.TreeBuilder()\n"
2664 " p = cElementTree.XMLParser(b)\n"
2665 " p._setevents(events, self._events)\n"
2666 " while 1:\n"
2667 " data = self._file.read(16384)\n"
2668 " if not data:\n"
2669 " break\n"
2670 " p.feed(data)\n"
2671 " for event in events:\n"
2672 " yield event\n"
2673 " del events[:]\n"
2674 " root = p.close()\n"
2675 " for event in events:\n"
2676 " yield event\n"
2677 " self.root = root\n"
2678 "cElementTree.iterparse = iterparse\n"
2679#endif
2680
2681 "def PI(target, text=None):\n" /* public */
2682 " element = cElementTree.Element(ET.ProcessingInstruction)\n"
2683 " element.text = target\n"
2684 " if text:\n"
2685 " element.text = element.text + ' ' + text\n"
2686 " return element\n"
2687
2688 " elem = cElementTree.Element(ET.PI)\n"
2689 " elem.text = text\n"
2690 " return elem\n"
2691 "cElementTree.PI = cElementTree.ProcessingInstruction = PI\n"
2692
2693 "def XML(text):\n" /* public */
2694 " parser = cElementTree.XMLParser()\n"
2695 " parser.feed(text)\n"
2696 " return parser.close()\n"
2697 "cElementTree.XML = cElementTree.fromstring = XML\n"
2698
2699 "def XMLID(text):\n" /* public */
2700 " tree = XML(text)\n"
2701 " ids = {}\n"
2702 " for elem in tree.getiterator():\n"
2703 " id = elem.get('id')\n"
2704 " if id:\n"
2705 " ids[id] = elem\n"
2706 " return tree, ids\n"
2707 "cElementTree.XMLID = XMLID\n"
2708
2709 "cElementTree.dump = ET.dump\n"
2710 "cElementTree.ElementPath = ElementPath = ET.ElementPath\n"
2711 "cElementTree.iselement = ET.iselement\n"
2712 "cElementTree.QName = ET.QName\n"
2713 "cElementTree.tostring = ET.tostring\n"
2714 "cElementTree.VERSION = '" VERSION "'\n"
2715 "cElementTree.__version__ = '" VERSION "'\n"
2716 "cElementTree.XMLParserError = SyntaxError\n"
2717
2718 );
2719
2720 PyRun_String(bootstrap, Py_file_input, g, NULL);
2721
2722 elementpath_obj = PyDict_GetItemString(g, "ElementPath");
2723
2724 elementtree_copyelement_obj = PyDict_GetItemString(g, "copyelement");
2725 if (elementtree_copyelement_obj) {
2726 /* reduce hack needed; enable reduce method */
2727 PyMethodDef* mp;
2728 for (mp = element_methods; mp->ml_name; mp++)
2729 if (mp->ml_meth == (PyCFunction) element_reduce) {
2730 mp->ml_name = "__reduce__";
2731 break;
2732 }
2733 } else
2734 PyErr_Clear();
2735 elementtree_deepcopy_obj = PyDict_GetItemString(g, "deepcopy");
2736 elementtree_getiterator_obj = PyDict_GetItemString(g, "getiterator");
2737
2738#if defined(USE_PYEXPAT_CAPI)
2739 /* link against pyexpat, if possible */
2740 capi = PyCObject_Import("pyexpat", "expat_CAPI");
2741 if (capi &&
2742 strcmp(capi->magic, PyExpat_CAPI_MAGIC) == 0 &&
2743 capi->size <= sizeof(*expat_capi) &&
2744 capi->MAJOR_VERSION == XML_MAJOR_VERSION &&
2745 capi->MINOR_VERSION == XML_MINOR_VERSION &&
2746 capi->MICRO_VERSION == XML_MICRO_VERSION)
2747 expat_capi = capi;
2748 else
2749 expat_capi = NULL;
2750#endif
2751
2752}