blob: da223c4ee09f7ae1cf0456aa226849d65d6d604a [file] [log] [blame]
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001/*
2 * ElementTree
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003 * $Id: _elementtree.c 2657 2006-03-12 20:50:32Z fredrik $
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
5 * elementtree accelerator
6 *
7 * History:
8 * 1999-06-20 fl created (as part of sgmlop)
9 * 2001-05-29 fl effdom edition
10 * 2001-06-05 fl backported to unix; fixed bogus free in clear
11 * 2001-07-10 fl added findall helper
12 * 2003-02-27 fl elementtree edition (alpha)
13 * 2004-06-03 fl updates for elementtree 1.2
14 * 2005-01-05 fl added universal name cache, Element/SubElement factories
15 * 2005-01-06 fl moved python helpers into C module; removed 1.5.2 support
16 * 2005-01-07 fl added 2.1 support; work around broken __copy__ in 2.3
17 * 2005-01-08 fl added makeelement method; fixed path support
18 * 2005-01-10 fl optimized memory usage
19 * 2005-01-11 fl first public release (cElementTree 0.8)
20 * 2005-01-12 fl split element object into base and extras
21 * 2005-01-13 fl use tagged pointers for tail/text (cElementTree 0.9)
22 * 2005-01-17 fl added treebuilder close method
23 * 2005-01-17 fl fixed crash in getchildren
24 * 2005-01-18 fl removed observer api, added iterparse (cElementTree 0.9.3)
25 * 2005-01-23 fl revised iterparse api; added namespace event support (0.9.8)
26 * 2005-01-26 fl added VERSION module property (cElementTree 1.0)
27 * 2005-01-28 fl added remove method (1.0.1)
28 * 2005-03-01 fl added iselement function; fixed makeelement aliasing (1.0.2)
29 * 2005-03-13 fl export Comment and ProcessingInstruction/PI helpers
30 * 2005-03-26 fl added Comment and PI support to XMLParser
31 * 2005-03-27 fl event optimizations; complain about bogus events
32 * 2005-08-08 fl fixed read error handling in parse
33 * 2005-08-11 fl added runtime test for copy workaround (1.0.3)
34 * 2005-12-13 fl added expat_capi support (for xml.etree) (1.0.4)
35 * 2005-12-16 fl added support for non-standard encodings
Fredrik Lundh44ed4db2006-03-12 21:06:35 +000036 * 2006-03-08 fl fixed a couple of potential null-refs and leaks
37 * 2006-03-12 fl merge in 2.5 ssize_t changes
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000038 *
Fredrik Lundh44ed4db2006-03-12 21:06:35 +000039 * Copyright (c) 1999-2006 by Secret Labs AB. All rights reserved.
40 * Copyright (c) 1999-2006 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000041 *
42 * info@pythonware.com
43 * http://www.pythonware.com
44 */
45
Fredrik Lundh6d52b552005-12-16 22:06:43 +000046/* Licensed to PSF under a Contributor Agreement. */
47/* See http://www.python.org/2.4/license for licensing details. */
48
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000049#include "Python.h"
50
Thomas Wouters00ee7ba2006-08-21 19:07:27 +000051#define VERSION "1.0.6"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000052
53/* -------------------------------------------------------------------- */
54/* configuration */
55
56/* Leave defined to include the expat-based XMLParser type */
57#define USE_EXPAT
58
59/* Define to to all expat calls via pyexpat's embedded expat library */
60/* #define USE_PYEXPAT_CAPI */
61
62/* An element can hold this many children without extra memory
63 allocations. */
64#define STATIC_CHILDREN 4
65
66/* For best performance, chose a value so that 80-90% of all nodes
67 have no more than the given number of children. Set this to zero
68 to minimize the size of the element structure itself (this only
69 helps if you have lots of leaf nodes with attributes). */
70
71/* Also note that pymalloc always allocates blocks in multiples of
72 eight bytes. For the current version of cElementTree, this means
73 that the number of children should be an even number, at least on
74 32-bit platforms. */
75
76/* -------------------------------------------------------------------- */
77
78#if 0
79static int memory = 0;
80#define ALLOC(size, comment)\
81do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
82#define RELEASE(size, comment)\
83do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
84#else
85#define ALLOC(size, comment)
86#define RELEASE(size, comment)
87#endif
88
89/* compiler tweaks */
90#if defined(_MSC_VER)
91#define LOCAL(type) static __inline type __fastcall
92#else
93#define LOCAL(type) static type
94#endif
95
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000096/* macros used to store 'join' flags in string object pointers. note
97 that all use of text and tail as object pointers must be wrapped in
98 JOIN_OBJ. see comments in the ElementObject definition for more
99 info. */
100#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
101#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
102#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~1))
103
104/* glue functions (see the init function for details) */
105static PyObject* elementtree_copyelement_obj;
106static PyObject* elementtree_deepcopy_obj;
107static PyObject* elementtree_getiterator_obj;
108static PyObject* elementpath_obj;
109
110/* helpers */
111
112LOCAL(PyObject*)
113deepcopy(PyObject* object, PyObject* memo)
114{
115 /* do a deep copy of the given object */
116
117 PyObject* args;
118 PyObject* result;
119
120 if (!elementtree_deepcopy_obj) {
121 PyErr_SetString(
122 PyExc_RuntimeError,
123 "deepcopy helper not found"
124 );
125 return NULL;
126 }
127
128 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000129 if (!args)
130 return NULL;
131
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000132 Py_INCREF(object); PyTuple_SET_ITEM(args, 0, (PyObject*) object);
133 Py_INCREF(memo); PyTuple_SET_ITEM(args, 1, (PyObject*) memo);
134
135 result = PyObject_CallObject(elementtree_deepcopy_obj, args);
136
137 Py_DECREF(args);
138
139 return result;
140}
141
142LOCAL(PyObject*)
143list_join(PyObject* list)
144{
145 /* join list elements (destroying the list in the process) */
146
147 PyObject* joiner;
148 PyObject* function;
149 PyObject* args;
150 PyObject* result;
151
152 switch (PyList_GET_SIZE(list)) {
153 case 0:
154 Py_DECREF(list);
Christian Heimes72b710a2008-05-26 13:28:38 +0000155 return PyBytes_FromString("");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000156 case 1:
157 result = PyList_GET_ITEM(list, 0);
158 Py_INCREF(result);
159 Py_DECREF(list);
160 return result;
161 }
162
163 /* two or more elements: slice out a suitable separator from the
164 first member, and use that to join the entire list */
165
166 joiner = PySequence_GetSlice(PyList_GET_ITEM(list, 0), 0, 0);
167 if (!joiner)
168 return NULL;
169
170 function = PyObject_GetAttrString(joiner, "join");
171 if (!function) {
172 Py_DECREF(joiner);
173 return NULL;
174 }
175
176 args = PyTuple_New(1);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000177 if (!args)
178 return NULL;
179
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000180 PyTuple_SET_ITEM(args, 0, list);
181
182 result = PyObject_CallObject(function, args);
183
184 Py_DECREF(args); /* also removes list */
185 Py_DECREF(function);
186 Py_DECREF(joiner);
187
188 return result;
189}
190
191#if (PY_VERSION_HEX < 0x02020000)
192LOCAL(int)
193PyDict_Update(PyObject* dict, PyObject* other)
194{
195 /* PyDict_Update emulation for 2.1 and earlier */
196
197 PyObject* res;
198
199 res = PyObject_CallMethod(dict, "update", "O", other);
200 if (!res)
201 return -1;
202
203 Py_DECREF(res);
204 return 0;
205}
206#endif
207
208/* -------------------------------------------------------------------- */
209/* the element type */
210
211typedef struct {
212
213 /* attributes (a dictionary object), or None if no attributes */
214 PyObject* attrib;
215
216 /* child elements */
217 int length; /* actual number of items */
218 int allocated; /* allocated items */
219
220 /* this either points to _children or to a malloced buffer */
221 PyObject* *children;
222
223 PyObject* _children[STATIC_CHILDREN];
224
225} ElementObjectExtra;
226
227typedef struct {
228 PyObject_HEAD
229
230 /* element tag (a string). */
231 PyObject* tag;
232
233 /* text before first child. note that this is a tagged pointer;
234 use JOIN_OBJ to get the object pointer. the join flag is used
235 to distinguish lists created by the tree builder from lists
236 assigned to the attribute by application code; the former
237 should be joined before being returned to the user, the latter
238 should be left intact. */
239 PyObject* text;
240
241 /* text after this element, in parent. note that this is a tagged
242 pointer; use JOIN_OBJ to get the object pointer. */
243 PyObject* tail;
244
245 ElementObjectExtra* extra;
246
247} ElementObject;
248
Neal Norwitz227b5332006-03-22 09:28:35 +0000249static PyTypeObject Element_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000250
Christian Heimes90aa7642007-12-19 02:45:37 +0000251#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000252
253/* -------------------------------------------------------------------- */
254/* element constructor and destructor */
255
256LOCAL(int)
257element_new_extra(ElementObject* self, PyObject* attrib)
258{
259 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
260 if (!self->extra)
261 return -1;
262
263 if (!attrib)
264 attrib = Py_None;
265
266 Py_INCREF(attrib);
267 self->extra->attrib = attrib;
268
269 self->extra->length = 0;
270 self->extra->allocated = STATIC_CHILDREN;
271 self->extra->children = self->extra->_children;
272
273 return 0;
274}
275
276LOCAL(void)
277element_dealloc_extra(ElementObject* self)
278{
279 int i;
280
281 Py_DECREF(self->extra->attrib);
282
283 for (i = 0; i < self->extra->length; i++)
284 Py_DECREF(self->extra->children[i]);
285
286 if (self->extra->children != self->extra->_children)
287 PyObject_Free(self->extra->children);
288
289 PyObject_Free(self->extra);
290}
291
292LOCAL(PyObject*)
293element_new(PyObject* tag, PyObject* attrib)
294{
295 ElementObject* self;
296
297 self = PyObject_New(ElementObject, &Element_Type);
298 if (self == NULL)
299 return NULL;
300
301 /* use None for empty dictionaries */
302 if (PyDict_CheckExact(attrib) && !PyDict_Size(attrib))
303 attrib = Py_None;
304
305 self->extra = NULL;
306
307 if (attrib != Py_None) {
308
Thomas Wouters477c8d52006-05-27 19:21:47 +0000309 if (element_new_extra(self, attrib) < 0) {
310 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000311 return NULL;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000312 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000313
314 self->extra->length = 0;
315 self->extra->allocated = STATIC_CHILDREN;
316 self->extra->children = self->extra->_children;
317
318 }
319
320 Py_INCREF(tag);
321 self->tag = tag;
322
323 Py_INCREF(Py_None);
324 self->text = Py_None;
325
326 Py_INCREF(Py_None);
327 self->tail = Py_None;
328
329 ALLOC(sizeof(ElementObject), "create element");
330
331 return (PyObject*) self;
332}
333
334LOCAL(int)
335element_resize(ElementObject* self, int extra)
336{
337 int size;
338 PyObject* *children;
339
340 /* make sure self->children can hold the given number of extra
341 elements. set an exception and return -1 if allocation failed */
342
343 if (!self->extra)
344 element_new_extra(self, NULL);
345
346 size = self->extra->length + extra;
347
348 if (size > self->extra->allocated) {
349 /* use Python 2.4's list growth strategy */
350 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000351 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
352 * which needs at least 4 bytes.
353 * Although it's a false alarm always assume at least one child to
354 * be safe.
355 */
356 size = size ? size : 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000357 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000358 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
359 * "children", which needs at least 4 bytes. Although it's a
360 * false alarm always assume at least one child to be safe.
361 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000362 children = PyObject_Realloc(self->extra->children,
363 size * sizeof(PyObject*));
364 if (!children)
365 goto nomemory;
366 } else {
367 children = PyObject_Malloc(size * sizeof(PyObject*));
368 if (!children)
369 goto nomemory;
370 /* copy existing children from static area to malloc buffer */
371 memcpy(children, self->extra->children,
372 self->extra->length * sizeof(PyObject*));
373 }
374 self->extra->children = children;
375 self->extra->allocated = size;
376 }
377
378 return 0;
379
380 nomemory:
381 PyErr_NoMemory();
382 return -1;
383}
384
385LOCAL(int)
386element_add_subelement(ElementObject* self, PyObject* element)
387{
388 /* add a child element to a parent */
389
390 if (element_resize(self, 1) < 0)
391 return -1;
392
393 Py_INCREF(element);
394 self->extra->children[self->extra->length] = element;
395
396 self->extra->length++;
397
398 return 0;
399}
400
401LOCAL(PyObject*)
402element_get_attrib(ElementObject* self)
403{
404 /* return borrowed reference to attrib dictionary */
405 /* note: this function assumes that the extra section exists */
406
407 PyObject* res = self->extra->attrib;
408
409 if (res == Py_None) {
410 /* create missing dictionary */
411 res = PyDict_New();
412 if (!res)
413 return NULL;
414 self->extra->attrib = res;
415 }
416
417 return res;
418}
419
420LOCAL(PyObject*)
421element_get_text(ElementObject* self)
422{
423 /* return borrowed reference to text attribute */
424
425 PyObject* res = self->text;
426
427 if (JOIN_GET(res)) {
428 res = JOIN_OBJ(res);
429 if (PyList_CheckExact(res)) {
430 res = list_join(res);
431 if (!res)
432 return NULL;
433 self->text = res;
434 }
435 }
436
437 return res;
438}
439
440LOCAL(PyObject*)
441element_get_tail(ElementObject* self)
442{
443 /* return borrowed reference to text attribute */
444
445 PyObject* res = self->tail;
446
447 if (JOIN_GET(res)) {
448 res = JOIN_OBJ(res);
449 if (PyList_CheckExact(res)) {
450 res = list_join(res);
451 if (!res)
452 return NULL;
453 self->tail = res;
454 }
455 }
456
457 return res;
458}
459
460static PyObject*
461element(PyObject* self, PyObject* args, PyObject* kw)
462{
463 PyObject* elem;
464
465 PyObject* tag;
466 PyObject* attrib = NULL;
467 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag,
468 &PyDict_Type, &attrib))
469 return NULL;
470
471 if (attrib || kw) {
472 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
473 if (!attrib)
474 return NULL;
475 if (kw)
476 PyDict_Update(attrib, kw);
477 } else {
478 Py_INCREF(Py_None);
479 attrib = Py_None;
480 }
481
482 elem = element_new(tag, attrib);
483
484 Py_DECREF(attrib);
485
486 return elem;
487}
488
489static PyObject*
490subelement(PyObject* self, PyObject* args, PyObject* kw)
491{
492 PyObject* elem;
493
494 ElementObject* parent;
495 PyObject* tag;
496 PyObject* attrib = NULL;
497 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
498 &Element_Type, &parent, &tag,
499 &PyDict_Type, &attrib))
500 return NULL;
501
502 if (attrib || kw) {
503 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
504 if (!attrib)
505 return NULL;
506 if (kw)
507 PyDict_Update(attrib, kw);
508 } else {
509 Py_INCREF(Py_None);
510 attrib = Py_None;
511 }
512
513 elem = element_new(tag, attrib);
514
515 Py_DECREF(attrib);
516
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000517 if (element_add_subelement(parent, elem) < 0) {
518 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000519 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000520 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000521
522 return elem;
523}
524
525static void
526element_dealloc(ElementObject* self)
527{
528 if (self->extra)
529 element_dealloc_extra(self);
530
531 /* discard attributes */
532 Py_DECREF(self->tag);
533 Py_DECREF(JOIN_OBJ(self->text));
534 Py_DECREF(JOIN_OBJ(self->tail));
535
536 RELEASE(sizeof(ElementObject), "destroy element");
537
538 PyObject_Del(self);
539}
540
541/* -------------------------------------------------------------------- */
542/* methods (in alphabetical order) */
543
544static PyObject*
545element_append(ElementObject* self, PyObject* args)
546{
547 PyObject* element;
548 if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element))
549 return NULL;
550
551 if (element_add_subelement(self, element) < 0)
552 return NULL;
553
554 Py_RETURN_NONE;
555}
556
557static PyObject*
558element_clear(ElementObject* self, PyObject* args)
559{
560 if (!PyArg_ParseTuple(args, ":clear"))
561 return NULL;
562
563 if (self->extra) {
564 element_dealloc_extra(self);
565 self->extra = NULL;
566 }
567
568 Py_INCREF(Py_None);
569 Py_DECREF(JOIN_OBJ(self->text));
570 self->text = Py_None;
571
572 Py_INCREF(Py_None);
573 Py_DECREF(JOIN_OBJ(self->tail));
574 self->tail = Py_None;
575
576 Py_RETURN_NONE;
577}
578
579static PyObject*
580element_copy(ElementObject* self, PyObject* args)
581{
582 int i;
583 ElementObject* element;
584
585 if (!PyArg_ParseTuple(args, ":__copy__"))
586 return NULL;
587
588 element = (ElementObject*) element_new(
589 self->tag, (self->extra) ? self->extra->attrib : Py_None
590 );
591 if (!element)
592 return NULL;
593
594 Py_DECREF(JOIN_OBJ(element->text));
595 element->text = self->text;
596 Py_INCREF(JOIN_OBJ(element->text));
597
598 Py_DECREF(JOIN_OBJ(element->tail));
599 element->tail = self->tail;
600 Py_INCREF(JOIN_OBJ(element->tail));
601
602 if (self->extra) {
603
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000604 if (element_resize(element, self->extra->length) < 0) {
605 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000606 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000607 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000608
609 for (i = 0; i < self->extra->length; i++) {
610 Py_INCREF(self->extra->children[i]);
611 element->extra->children[i] = self->extra->children[i];
612 }
613
614 element->extra->length = self->extra->length;
615
616 }
617
618 return (PyObject*) element;
619}
620
621static PyObject*
622element_deepcopy(ElementObject* self, PyObject* args)
623{
624 int i;
625 ElementObject* element;
626 PyObject* tag;
627 PyObject* attrib;
628 PyObject* text;
629 PyObject* tail;
630 PyObject* id;
631
632 PyObject* memo;
633 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
634 return NULL;
635
636 tag = deepcopy(self->tag, memo);
637 if (!tag)
638 return NULL;
639
640 if (self->extra) {
641 attrib = deepcopy(self->extra->attrib, memo);
642 if (!attrib) {
643 Py_DECREF(tag);
644 return NULL;
645 }
646 } else {
647 Py_INCREF(Py_None);
648 attrib = Py_None;
649 }
650
651 element = (ElementObject*) element_new(tag, attrib);
652
653 Py_DECREF(tag);
654 Py_DECREF(attrib);
655
656 if (!element)
657 return NULL;
658
659 text = deepcopy(JOIN_OBJ(self->text), memo);
660 if (!text)
661 goto error;
662 Py_DECREF(element->text);
663 element->text = JOIN_SET(text, JOIN_GET(self->text));
664
665 tail = deepcopy(JOIN_OBJ(self->tail), memo);
666 if (!tail)
667 goto error;
668 Py_DECREF(element->tail);
669 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
670
671 if (self->extra) {
672
673 if (element_resize(element, self->extra->length) < 0)
674 goto error;
675
676 for (i = 0; i < self->extra->length; i++) {
677 PyObject* child = deepcopy(self->extra->children[i], memo);
678 if (!child) {
679 element->extra->length = i;
680 goto error;
681 }
682 element->extra->children[i] = child;
683 }
684
685 element->extra->length = self->extra->length;
686
687 }
688
689 /* add object to memo dictionary (so deepcopy won't visit it again) */
Christian Heimes217cfd12007-12-02 14:31:20 +0000690 id = PyLong_FromLong((Py_uintptr_t) self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000691
692 i = PyDict_SetItem(memo, id, (PyObject*) element);
693
694 Py_DECREF(id);
695
696 if (i < 0)
697 goto error;
698
699 return (PyObject*) element;
700
701 error:
702 Py_DECREF(element);
703 return NULL;
704}
705
706LOCAL(int)
707checkpath(PyObject* tag)
708{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000709 Py_ssize_t i;
710 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000711
712 /* check if a tag contains an xpath character */
713
714#define PATHCHAR(ch) (ch == '/' || ch == '*' || ch == '[' || ch == '@')
715
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000716 if (PyUnicode_Check(tag)) {
717 Py_UNICODE *p = PyUnicode_AS_UNICODE(tag);
718 for (i = 0; i < PyUnicode_GET_SIZE(tag); i++) {
719 if (p[i] == '{')
720 check = 0;
721 else if (p[i] == '}')
722 check = 1;
723 else if (check && PATHCHAR(p[i]))
724 return 1;
725 }
726 return 0;
727 }
Christian Heimes72b710a2008-05-26 13:28:38 +0000728 if (PyBytes_Check(tag)) {
729 char *p = PyBytes_AS_STRING(tag);
730 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000731 if (p[i] == '{')
732 check = 0;
733 else if (p[i] == '}')
734 check = 1;
735 else if (check && PATHCHAR(p[i]))
736 return 1;
737 }
738 return 0;
739 }
740
741 return 1; /* unknown type; might be path expression */
742}
743
744static PyObject*
745element_find(ElementObject* self, PyObject* args)
746{
747 int i;
748
749 PyObject* tag;
750 if (!PyArg_ParseTuple(args, "O:find", &tag))
751 return NULL;
752
753 if (checkpath(tag))
754 return PyObject_CallMethod(
755 elementpath_obj, "find", "OO", self, tag
756 );
757
758 if (!self->extra)
759 Py_RETURN_NONE;
760
761 for (i = 0; i < self->extra->length; i++) {
762 PyObject* item = self->extra->children[i];
763 if (Element_CheckExact(item) &&
764 PyObject_Compare(((ElementObject*)item)->tag, tag) == 0) {
765 Py_INCREF(item);
766 return item;
767 }
768 }
769
770 Py_RETURN_NONE;
771}
772
773static PyObject*
774element_findtext(ElementObject* self, PyObject* args)
775{
776 int i;
777
778 PyObject* tag;
779 PyObject* default_value = Py_None;
780 if (!PyArg_ParseTuple(args, "O|O:findtext", &tag, &default_value))
781 return NULL;
782
783 if (checkpath(tag))
784 return PyObject_CallMethod(
785 elementpath_obj, "findtext", "OOO", self, tag, default_value
786 );
787
788 if (!self->extra) {
789 Py_INCREF(default_value);
790 return default_value;
791 }
792
793 for (i = 0; i < self->extra->length; i++) {
794 ElementObject* item = (ElementObject*) self->extra->children[i];
795 if (Element_CheckExact(item) && !PyObject_Compare(item->tag, tag)) {
796 PyObject* text = element_get_text(item);
797 if (text == Py_None)
Christian Heimes72b710a2008-05-26 13:28:38 +0000798 return PyBytes_FromString("");
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000799 Py_XINCREF(text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000800 return text;
801 }
802 }
803
804 Py_INCREF(default_value);
805 return default_value;
806}
807
808static PyObject*
809element_findall(ElementObject* self, PyObject* args)
810{
811 int i;
812 PyObject* out;
813
814 PyObject* tag;
815 if (!PyArg_ParseTuple(args, "O:findall", &tag))
816 return NULL;
817
818 if (checkpath(tag))
819 return PyObject_CallMethod(
820 elementpath_obj, "findall", "OO", self, tag
821 );
822
823 out = PyList_New(0);
824 if (!out)
825 return NULL;
826
827 if (!self->extra)
828 return out;
829
830 for (i = 0; i < self->extra->length; i++) {
831 PyObject* item = self->extra->children[i];
832 if (Element_CheckExact(item) &&
833 PyObject_Compare(((ElementObject*)item)->tag, tag) == 0) {
834 if (PyList_Append(out, item) < 0) {
835 Py_DECREF(out);
836 return NULL;
837 }
838 }
839 }
840
841 return out;
842}
843
844static PyObject*
845element_get(ElementObject* self, PyObject* args)
846{
847 PyObject* value;
848
849 PyObject* key;
850 PyObject* default_value = Py_None;
851 if (!PyArg_ParseTuple(args, "O|O:get", &key, &default_value))
852 return NULL;
853
854 if (!self->extra || self->extra->attrib == Py_None)
855 value = default_value;
856 else {
857 value = PyDict_GetItem(self->extra->attrib, key);
858 if (!value)
859 value = default_value;
860 }
861
862 Py_INCREF(value);
863 return value;
864}
865
866static PyObject*
867element_getchildren(ElementObject* self, PyObject* args)
868{
869 int i;
870 PyObject* list;
871
872 if (!PyArg_ParseTuple(args, ":getchildren"))
873 return NULL;
874
875 if (!self->extra)
876 return PyList_New(0);
877
878 list = PyList_New(self->extra->length);
879 if (!list)
880 return NULL;
881
882 for (i = 0; i < self->extra->length; i++) {
883 PyObject* item = self->extra->children[i];
884 Py_INCREF(item);
885 PyList_SET_ITEM(list, i, item);
886 }
887
888 return list;
889}
890
891static PyObject*
892element_getiterator(ElementObject* self, PyObject* args)
893{
894 PyObject* result;
895
896 PyObject* tag = Py_None;
897 if (!PyArg_ParseTuple(args, "|O:getiterator", &tag))
898 return NULL;
899
900 if (!elementtree_getiterator_obj) {
901 PyErr_SetString(
902 PyExc_RuntimeError,
903 "getiterator helper not found"
904 );
905 return NULL;
906 }
907
908 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000909 if (!args)
910 return NULL;
Neal Norwitz02876df2006-02-07 06:58:52 +0000911
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000912 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
913 Py_INCREF(tag); PyTuple_SET_ITEM(args, 1, (PyObject*) tag);
914
915 result = PyObject_CallObject(elementtree_getiterator_obj, args);
916
917 Py_DECREF(args);
918
919 return result;
920}
921
922static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000923element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000924{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000925 ElementObject* self = (ElementObject*) self_;
926
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000927 if (!self->extra || index < 0 || index >= self->extra->length) {
928 PyErr_SetString(
929 PyExc_IndexError,
930 "child index out of range"
931 );
932 return NULL;
933 }
934
935 Py_INCREF(self->extra->children[index]);
936 return self->extra->children[index];
937}
938
939static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000940element_getslice(PyObject* self_, Py_ssize_t start, Py_ssize_t end)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000941{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000942 ElementObject* self = (ElementObject*) self_;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000943 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000944 PyObject* list;
945
946 if (!self->extra)
947 return PyList_New(0);
948
949 /* standard clamping */
950 if (start < 0)
951 start = 0;
952 if (end < 0)
953 end = 0;
954 if (end > self->extra->length)
955 end = self->extra->length;
956 if (start > end)
957 start = end;
958
959 list = PyList_New(end - start);
960 if (!list)
961 return NULL;
962
963 for (i = start; i < end; i++) {
964 PyObject* item = self->extra->children[i];
965 Py_INCREF(item);
966 PyList_SET_ITEM(list, i - start, item);
967 }
968
969 return list;
970}
971
972static PyObject*
973element_insert(ElementObject* self, PyObject* args)
974{
975 int i;
976
977 int index;
978 PyObject* element;
979 if (!PyArg_ParseTuple(args, "iO!:insert", &index,
980 &Element_Type, &element))
981 return NULL;
982
983 if (!self->extra)
984 element_new_extra(self, NULL);
985
986 if (index < 0)
987 index = 0;
988 if (index > self->extra->length)
989 index = self->extra->length;
990
991 if (element_resize(self, 1) < 0)
992 return NULL;
993
994 for (i = self->extra->length; i > index; i--)
995 self->extra->children[i] = self->extra->children[i-1];
996
997 Py_INCREF(element);
998 self->extra->children[index] = element;
999
1000 self->extra->length++;
1001
1002 Py_RETURN_NONE;
1003}
1004
1005static PyObject*
1006element_items(ElementObject* self, PyObject* args)
1007{
1008 if (!PyArg_ParseTuple(args, ":items"))
1009 return NULL;
1010
1011 if (!self->extra || self->extra->attrib == Py_None)
1012 return PyList_New(0);
1013
1014 return PyDict_Items(self->extra->attrib);
1015}
1016
1017static PyObject*
1018element_keys(ElementObject* self, PyObject* args)
1019{
1020 if (!PyArg_ParseTuple(args, ":keys"))
1021 return NULL;
1022
1023 if (!self->extra || self->extra->attrib == Py_None)
1024 return PyList_New(0);
1025
1026 return PyDict_Keys(self->extra->attrib);
1027}
1028
Martin v. Löwis18e16552006-02-15 17:27:45 +00001029static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001030element_length(ElementObject* self)
1031{
1032 if (!self->extra)
1033 return 0;
1034
1035 return self->extra->length;
1036}
1037
1038static PyObject*
1039element_makeelement(PyObject* self, PyObject* args, PyObject* kw)
1040{
1041 PyObject* elem;
1042
1043 PyObject* tag;
1044 PyObject* attrib;
1045 if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib))
1046 return NULL;
1047
1048 attrib = PyDict_Copy(attrib);
1049 if (!attrib)
1050 return NULL;
1051
1052 elem = element_new(tag, attrib);
1053
1054 Py_DECREF(attrib);
1055
1056 return elem;
1057}
1058
1059static PyObject*
1060element_reduce(ElementObject* self, PyObject* args)
1061{
1062 if (!PyArg_ParseTuple(args, ":__reduce__"))
1063 return NULL;
1064
1065 /* Hack alert: This method is used to work around a __copy__
1066 problem on certain 2.3 and 2.4 versions. To save time and
1067 simplify the code, we create the copy in here, and use a dummy
1068 copyelement helper to trick the copy module into doing the
1069 right thing. */
1070
1071 if (!elementtree_copyelement_obj) {
1072 PyErr_SetString(
1073 PyExc_RuntimeError,
1074 "copyelement helper not found"
1075 );
1076 return NULL;
1077 }
1078
1079 return Py_BuildValue(
1080 "O(N)", elementtree_copyelement_obj, element_copy(self, args)
1081 );
1082}
1083
1084static PyObject*
1085element_remove(ElementObject* self, PyObject* args)
1086{
1087 int i;
1088
1089 PyObject* element;
1090 if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element))
1091 return NULL;
1092
1093 if (!self->extra) {
1094 /* element has no children, so raise exception */
1095 PyErr_SetString(
1096 PyExc_ValueError,
1097 "list.remove(x): x not in list"
1098 );
1099 return NULL;
1100 }
1101
1102 for (i = 0; i < self->extra->length; i++) {
1103 if (self->extra->children[i] == element)
1104 break;
1105 if (PyObject_Compare(self->extra->children[i], element) == 0)
1106 break;
1107 }
1108
1109 if (i == self->extra->length) {
1110 /* element is not in children, so raise exception */
1111 PyErr_SetString(
1112 PyExc_ValueError,
1113 "list.remove(x): x not in list"
1114 );
1115 return NULL;
1116 }
1117
1118 Py_DECREF(self->extra->children[i]);
1119
1120 self->extra->length--;
1121
1122 for (; i < self->extra->length; i++)
1123 self->extra->children[i] = self->extra->children[i+1];
1124
1125 Py_RETURN_NONE;
1126}
1127
1128static PyObject*
1129element_repr(ElementObject* self)
1130{
Walter Dörwald7569dfe2007-05-19 21:49:49 +00001131 return PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001132}
1133
1134static PyObject*
1135element_set(ElementObject* self, PyObject* args)
1136{
1137 PyObject* attrib;
1138
1139 PyObject* key;
1140 PyObject* value;
1141 if (!PyArg_ParseTuple(args, "OO:set", &key, &value))
1142 return NULL;
1143
1144 if (!self->extra)
1145 element_new_extra(self, NULL);
1146
1147 attrib = element_get_attrib(self);
1148 if (!attrib)
1149 return NULL;
1150
1151 if (PyDict_SetItem(attrib, key, value) < 0)
1152 return NULL;
1153
1154 Py_RETURN_NONE;
1155}
1156
1157static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001158element_setslice(PyObject* self_, Py_ssize_t start, Py_ssize_t end, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001159{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001160 ElementObject* self = (ElementObject*) self_;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001161 Py_ssize_t i, new, old;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001162 PyObject* recycle = NULL;
1163
1164 if (!self->extra)
1165 element_new_extra(self, NULL);
1166
1167 /* standard clamping */
1168 if (start < 0)
1169 start = 0;
1170 if (end < 0)
1171 end = 0;
1172 if (end > self->extra->length)
1173 end = self->extra->length;
1174 if (start > end)
1175 start = end;
1176
1177 old = end - start;
1178
1179 if (item == NULL)
1180 new = 0;
1181 else if (PyList_CheckExact(item)) {
1182 new = PyList_GET_SIZE(item);
1183 } else {
1184 /* FIXME: support arbitrary sequences? */
1185 PyErr_Format(
1186 PyExc_TypeError,
Christian Heimes90aa7642007-12-19 02:45:37 +00001187 "expected list, not \"%.200s\"", Py_TYPE(item)->tp_name
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001188 );
1189 return -1;
1190 }
1191
1192 if (old > 0) {
1193 /* to avoid recursive calls to this method (via decref), move
1194 old items to the recycle bin here, and get rid of them when
1195 we're done modifying the element */
1196 recycle = PyList_New(old);
1197 for (i = 0; i < old; i++)
1198 PyList_SET_ITEM(recycle, i, self->extra->children[i + start]);
1199 }
1200
1201 if (new < old) {
1202 /* delete slice */
1203 for (i = end; i < self->extra->length; i++)
1204 self->extra->children[i + new - old] = self->extra->children[i];
1205 } else if (new > old) {
1206 /* insert slice */
1207 if (element_resize(self, new - old) < 0)
1208 return -1;
1209 for (i = self->extra->length-1; i >= end; i--)
1210 self->extra->children[i + new - old] = self->extra->children[i];
1211 }
1212
1213 /* replace the slice */
1214 for (i = 0; i < new; i++) {
1215 PyObject* element = PyList_GET_ITEM(item, i);
1216 Py_INCREF(element);
1217 self->extra->children[i + start] = element;
1218 }
1219
1220 self->extra->length += new - old;
1221
1222 /* discard the recycle bin, and everything in it */
1223 Py_XDECREF(recycle);
1224
1225 return 0;
1226}
1227
1228static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001229element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001230{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001231 ElementObject* self = (ElementObject*) self_;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001232 int i;
1233 PyObject* old;
1234
1235 if (!self->extra || index < 0 || index >= self->extra->length) {
1236 PyErr_SetString(
1237 PyExc_IndexError,
1238 "child assignment index out of range");
1239 return -1;
1240 }
1241
1242 old = self->extra->children[index];
1243
1244 if (item) {
1245 Py_INCREF(item);
1246 self->extra->children[index] = item;
1247 } else {
1248 self->extra->length--;
1249 for (i = index; i < self->extra->length; i++)
1250 self->extra->children[i] = self->extra->children[i+1];
1251 }
1252
1253 Py_DECREF(old);
1254
1255 return 0;
1256}
1257
1258static PyMethodDef element_methods[] = {
1259
1260 {"clear", (PyCFunction) element_clear, METH_VARARGS},
1261
1262 {"get", (PyCFunction) element_get, METH_VARARGS},
1263 {"set", (PyCFunction) element_set, METH_VARARGS},
1264
1265 {"find", (PyCFunction) element_find, METH_VARARGS},
1266 {"findtext", (PyCFunction) element_findtext, METH_VARARGS},
1267 {"findall", (PyCFunction) element_findall, METH_VARARGS},
1268
1269 {"append", (PyCFunction) element_append, METH_VARARGS},
1270 {"insert", (PyCFunction) element_insert, METH_VARARGS},
1271 {"remove", (PyCFunction) element_remove, METH_VARARGS},
1272
1273 {"getiterator", (PyCFunction) element_getiterator, METH_VARARGS},
1274 {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS},
1275
1276 {"items", (PyCFunction) element_items, METH_VARARGS},
1277 {"keys", (PyCFunction) element_keys, METH_VARARGS},
1278
1279 {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS},
1280
1281 {"__copy__", (PyCFunction) element_copy, METH_VARARGS},
1282 {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS},
1283
1284 /* Some 2.3 and 2.4 versions do not handle the __copy__ method on
1285 C objects correctly, so we have to fake it using a __reduce__-
1286 based hack (see the element_reduce implementation above for
1287 details). */
1288
1289 /* The behaviour has been changed in 2.3.5 and 2.4.1, so we're
1290 using a runtime test to figure out if we need to fake things
1291 or now (see the init code below). The following entry is
1292 enabled only if the hack is needed. */
1293
1294 {"!__reduce__", (PyCFunction) element_reduce, METH_VARARGS},
1295
1296 {NULL, NULL}
1297};
1298
1299static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001300element_getattro(ElementObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001301{
1302 PyObject* res;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001303 char *name = "";
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001304
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001305 if (PyUnicode_Check(nameobj))
1306 name = PyUnicode_AsString(nameobj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001307
1308 if (strcmp(name, "tag") == 0)
1309 res = self->tag;
1310 else if (strcmp(name, "text") == 0)
1311 res = element_get_text(self);
1312 else if (strcmp(name, "tail") == 0) {
1313 res = element_get_tail(self);
1314 } else if (strcmp(name, "attrib") == 0) {
1315 if (!self->extra)
1316 element_new_extra(self, NULL);
1317 res = element_get_attrib(self);
1318 } else {
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001319 return PyObject_GenericGetAttr((PyObject*) self, nameobj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001320 }
1321
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001322 Py_XINCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001323 return res;
1324}
1325
1326static int
1327element_setattr(ElementObject* self, const char* name, PyObject* value)
1328{
1329 if (value == NULL) {
1330 PyErr_SetString(
1331 PyExc_AttributeError,
1332 "can't delete element attributes"
1333 );
1334 return -1;
1335 }
1336
1337 if (strcmp(name, "tag") == 0) {
1338 Py_DECREF(self->tag);
1339 self->tag = value;
1340 Py_INCREF(self->tag);
1341 } else if (strcmp(name, "text") == 0) {
1342 Py_DECREF(JOIN_OBJ(self->text));
1343 self->text = value;
1344 Py_INCREF(self->text);
1345 } else if (strcmp(name, "tail") == 0) {
1346 Py_DECREF(JOIN_OBJ(self->tail));
1347 self->tail = value;
1348 Py_INCREF(self->tail);
1349 } else if (strcmp(name, "attrib") == 0) {
1350 if (!self->extra)
1351 element_new_extra(self, NULL);
1352 Py_DECREF(self->extra->attrib);
1353 self->extra->attrib = value;
1354 Py_INCREF(self->extra->attrib);
1355 } else {
1356 PyErr_SetString(PyExc_AttributeError, name);
1357 return -1;
1358 }
1359
1360 return 0;
1361}
1362
1363static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001364 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001365 0, /* sq_concat */
1366 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001367 element_getitem,
1368 element_getslice,
1369 element_setitem,
1370 element_setslice,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001371};
1372
Neal Norwitz227b5332006-03-22 09:28:35 +00001373static PyTypeObject Element_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001374 PyVarObject_HEAD_INIT(NULL, 0)
1375 "Element", sizeof(ElementObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001376 /* methods */
1377 (destructor)element_dealloc, /* tp_dealloc */
1378 0, /* tp_print */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001379 0, /* tp_getattr */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001380 (setattrfunc)element_setattr, /* tp_setattr */
1381 0, /* tp_compare */
1382 (reprfunc)element_repr, /* tp_repr */
1383 0, /* tp_as_number */
1384 &element_as_sequence, /* tp_as_sequence */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001385 0, /* tp_as_mapping */
1386 0, /* tp_hash */
1387 0, /* tp_call */
1388 0, /* tp_str */
1389 (getattrofunc)element_getattro, /* tp_getattro */
1390 0, /* tp_setattro */
1391 0, /* tp_as_buffer */
1392 Py_TPFLAGS_DEFAULT, /* tp_flags */
1393 0, /* tp_doc */
1394 0, /* tp_traverse */
1395 0, /* tp_clear */
1396 0, /* tp_richcompare */
1397 0, /* tp_weaklistoffset */
1398 0, /* tp_iter */
1399 0, /* tp_iternext */
1400 element_methods, /* tp_methods */
1401 0, /* tp_members */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001402};
1403
1404/* ==================================================================== */
1405/* the tree builder type */
1406
1407typedef struct {
1408 PyObject_HEAD
1409
1410 PyObject* root; /* root node (first created node) */
1411
1412 ElementObject* this; /* current node */
1413 ElementObject* last; /* most recently created node */
1414
1415 PyObject* data; /* data collector (string or list), or NULL */
1416
1417 PyObject* stack; /* element stack */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001418 Py_ssize_t index; /* current stack size (0=empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001419
1420 /* element tracing */
1421 PyObject* events; /* list of events, or NULL if not collecting */
1422 PyObject* start_event_obj; /* event objects (NULL to ignore) */
1423 PyObject* end_event_obj;
1424 PyObject* start_ns_event_obj;
1425 PyObject* end_ns_event_obj;
1426
1427} TreeBuilderObject;
1428
Neal Norwitz227b5332006-03-22 09:28:35 +00001429static PyTypeObject TreeBuilder_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001430
Christian Heimes90aa7642007-12-19 02:45:37 +00001431#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001432
1433/* -------------------------------------------------------------------- */
1434/* constructor and destructor */
1435
1436LOCAL(PyObject*)
1437treebuilder_new(void)
1438{
1439 TreeBuilderObject* self;
1440
1441 self = PyObject_New(TreeBuilderObject, &TreeBuilder_Type);
1442 if (self == NULL)
1443 return NULL;
1444
1445 self->root = NULL;
1446
1447 Py_INCREF(Py_None);
1448 self->this = (ElementObject*) Py_None;
1449
1450 Py_INCREF(Py_None);
1451 self->last = (ElementObject*) Py_None;
1452
1453 self->data = NULL;
1454
1455 self->stack = PyList_New(20);
1456 self->index = 0;
1457
1458 self->events = NULL;
1459 self->start_event_obj = self->end_event_obj = NULL;
1460 self->start_ns_event_obj = self->end_ns_event_obj = NULL;
1461
1462 ALLOC(sizeof(TreeBuilderObject), "create treebuilder");
1463
1464 return (PyObject*) self;
1465}
1466
1467static PyObject*
Thomas Wouters73e5a5b2006-06-08 15:35:45 +00001468treebuilder(PyObject* self_, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001469{
1470 if (!PyArg_ParseTuple(args, ":TreeBuilder"))
1471 return NULL;
1472
1473 return treebuilder_new();
1474}
1475
1476static void
1477treebuilder_dealloc(TreeBuilderObject* self)
1478{
1479 Py_XDECREF(self->end_ns_event_obj);
1480 Py_XDECREF(self->start_ns_event_obj);
1481 Py_XDECREF(self->end_event_obj);
1482 Py_XDECREF(self->start_event_obj);
1483 Py_XDECREF(self->events);
1484 Py_DECREF(self->stack);
1485 Py_XDECREF(self->data);
1486 Py_DECREF(self->last);
1487 Py_DECREF(self->this);
1488 Py_XDECREF(self->root);
1489
1490 RELEASE(sizeof(TreeBuilderObject), "destroy treebuilder");
1491
1492 PyObject_Del(self);
1493}
1494
1495/* -------------------------------------------------------------------- */
1496/* handlers */
1497
1498LOCAL(PyObject*)
1499treebuilder_handle_xml(TreeBuilderObject* self, PyObject* encoding,
1500 PyObject* standalone)
1501{
1502 Py_RETURN_NONE;
1503}
1504
1505LOCAL(PyObject*)
1506treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
1507 PyObject* attrib)
1508{
1509 PyObject* node;
1510 PyObject* this;
1511
1512 if (self->data) {
1513 if (self->this == self->last) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001514 Py_DECREF(JOIN_OBJ(self->last->text));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001515 self->last->text = JOIN_SET(
1516 self->data, PyList_CheckExact(self->data)
1517 );
1518 } else {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001519 Py_DECREF(JOIN_OBJ(self->last->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001520 self->last->tail = JOIN_SET(
1521 self->data, PyList_CheckExact(self->data)
1522 );
1523 }
1524 self->data = NULL;
1525 }
1526
1527 node = element_new(tag, attrib);
1528 if (!node)
1529 return NULL;
1530
1531 this = (PyObject*) self->this;
1532
1533 if (this != Py_None) {
1534 if (element_add_subelement((ElementObject*) this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001535 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001536 } else {
1537 if (self->root) {
1538 PyErr_SetString(
1539 PyExc_SyntaxError,
1540 "multiple elements on top level"
1541 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001542 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001543 }
1544 Py_INCREF(node);
1545 self->root = node;
1546 }
1547
1548 if (self->index < PyList_GET_SIZE(self->stack)) {
1549 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001550 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001551 Py_INCREF(this);
1552 } else {
1553 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001554 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001555 }
1556 self->index++;
1557
1558 Py_DECREF(this);
1559 Py_INCREF(node);
1560 self->this = (ElementObject*) node;
1561
1562 Py_DECREF(self->last);
1563 Py_INCREF(node);
1564 self->last = (ElementObject*) node;
1565
1566 if (self->start_event_obj) {
1567 PyObject* res;
1568 PyObject* action = self->start_event_obj;
1569 res = PyTuple_New(2);
1570 if (res) {
1571 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
1572 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
1573 PyList_Append(self->events, res);
1574 Py_DECREF(res);
1575 } else
1576 PyErr_Clear(); /* FIXME: propagate error */
1577 }
1578
1579 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001580
1581 error:
1582 Py_DECREF(node);
1583 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001584}
1585
1586LOCAL(PyObject*)
1587treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
1588{
1589 if (!self->data) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001590 if (self->last == (ElementObject*) Py_None) {
1591 /* ignore calls to data before the first call to start */
1592 Py_RETURN_NONE;
1593 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001594 /* store the first item as is */
1595 Py_INCREF(data); self->data = data;
1596 } else {
1597 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00001598 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
1599 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001600 /* expat often generates single character data sections; handle
1601 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00001602 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
1603 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001604 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00001605 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001606 } else if (PyList_CheckExact(self->data)) {
1607 if (PyList_Append(self->data, data) < 0)
1608 return NULL;
1609 } else {
1610 PyObject* list = PyList_New(2);
1611 if (!list)
1612 return NULL;
1613 PyList_SET_ITEM(list, 0, self->data);
1614 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
1615 self->data = list;
1616 }
1617 }
1618
1619 Py_RETURN_NONE;
1620}
1621
1622LOCAL(PyObject*)
1623treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
1624{
1625 PyObject* item;
1626
1627 if (self->data) {
1628 if (self->this == self->last) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001629 Py_DECREF(JOIN_OBJ(self->last->text));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001630 self->last->text = JOIN_SET(
1631 self->data, PyList_CheckExact(self->data)
1632 );
1633 } else {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001634 Py_DECREF(JOIN_OBJ(self->last->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001635 self->last->tail = JOIN_SET(
1636 self->data, PyList_CheckExact(self->data)
1637 );
1638 }
1639 self->data = NULL;
1640 }
1641
1642 if (self->index == 0) {
1643 PyErr_SetString(
1644 PyExc_IndexError,
1645 "pop from empty stack"
1646 );
1647 return NULL;
1648 }
1649
1650 self->index--;
1651
1652 item = PyList_GET_ITEM(self->stack, self->index);
1653 Py_INCREF(item);
1654
1655 Py_DECREF(self->last);
1656
1657 self->last = (ElementObject*) self->this;
1658 self->this = (ElementObject*) item;
1659
1660 if (self->end_event_obj) {
1661 PyObject* res;
1662 PyObject* action = self->end_event_obj;
1663 PyObject* node = (PyObject*) self->last;
1664 res = PyTuple_New(2);
1665 if (res) {
1666 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
1667 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
1668 PyList_Append(self->events, res);
1669 Py_DECREF(res);
1670 } else
1671 PyErr_Clear(); /* FIXME: propagate error */
1672 }
1673
1674 Py_INCREF(self->last);
1675 return (PyObject*) self->last;
1676}
1677
1678LOCAL(void)
1679treebuilder_handle_namespace(TreeBuilderObject* self, int start,
1680 const char* prefix, const char *uri)
1681{
1682 PyObject* res;
1683 PyObject* action;
1684 PyObject* parcel;
1685
1686 if (!self->events)
1687 return;
1688
1689 if (start) {
1690 if (!self->start_ns_event_obj)
1691 return;
1692 action = self->start_ns_event_obj;
1693 /* FIXME: prefix and uri use utf-8 encoding! */
1694 parcel = Py_BuildValue("ss", (prefix) ? prefix : "", uri);
1695 if (!parcel)
1696 return;
1697 Py_INCREF(action);
1698 } else {
1699 if (!self->end_ns_event_obj)
1700 return;
1701 action = self->end_ns_event_obj;
1702 Py_INCREF(action);
1703 parcel = Py_None;
1704 Py_INCREF(parcel);
1705 }
1706
1707 res = PyTuple_New(2);
1708
1709 if (res) {
1710 PyTuple_SET_ITEM(res, 0, action);
1711 PyTuple_SET_ITEM(res, 1, parcel);
1712 PyList_Append(self->events, res);
1713 Py_DECREF(res);
1714 } else
1715 PyErr_Clear(); /* FIXME: propagate error */
1716}
1717
1718/* -------------------------------------------------------------------- */
1719/* methods (in alphabetical order) */
1720
1721static PyObject*
1722treebuilder_data(TreeBuilderObject* self, PyObject* args)
1723{
1724 PyObject* data;
1725 if (!PyArg_ParseTuple(args, "O:data", &data))
1726 return NULL;
1727
1728 return treebuilder_handle_data(self, data);
1729}
1730
1731static PyObject*
1732treebuilder_end(TreeBuilderObject* self, PyObject* args)
1733{
1734 PyObject* tag;
1735 if (!PyArg_ParseTuple(args, "O:end", &tag))
1736 return NULL;
1737
1738 return treebuilder_handle_end(self, tag);
1739}
1740
1741LOCAL(PyObject*)
1742treebuilder_done(TreeBuilderObject* self)
1743{
1744 PyObject* res;
1745
1746 /* FIXME: check stack size? */
1747
1748 if (self->root)
1749 res = self->root;
1750 else
1751 res = Py_None;
1752
1753 Py_INCREF(res);
1754 return res;
1755}
1756
1757static PyObject*
1758treebuilder_close(TreeBuilderObject* self, PyObject* args)
1759{
1760 if (!PyArg_ParseTuple(args, ":close"))
1761 return NULL;
1762
1763 return treebuilder_done(self);
1764}
1765
1766static PyObject*
1767treebuilder_start(TreeBuilderObject* self, PyObject* args)
1768{
1769 PyObject* tag;
1770 PyObject* attrib = Py_None;
1771 if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib))
1772 return NULL;
1773
1774 return treebuilder_handle_start(self, tag, attrib);
1775}
1776
1777static PyObject*
1778treebuilder_xml(TreeBuilderObject* self, PyObject* args)
1779{
1780 PyObject* encoding;
1781 PyObject* standalone;
1782 if (!PyArg_ParseTuple(args, "OO:xml", &encoding, &standalone))
1783 return NULL;
1784
1785 return treebuilder_handle_xml(self, encoding, standalone);
1786}
1787
1788static PyMethodDef treebuilder_methods[] = {
1789 {"data", (PyCFunction) treebuilder_data, METH_VARARGS},
1790 {"start", (PyCFunction) treebuilder_start, METH_VARARGS},
1791 {"end", (PyCFunction) treebuilder_end, METH_VARARGS},
1792 {"xml", (PyCFunction) treebuilder_xml, METH_VARARGS},
1793 {"close", (PyCFunction) treebuilder_close, METH_VARARGS},
1794 {NULL, NULL}
1795};
1796
Neal Norwitz227b5332006-03-22 09:28:35 +00001797static PyTypeObject TreeBuilder_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001798 PyVarObject_HEAD_INIT(NULL, 0)
1799 "TreeBuilder", sizeof(TreeBuilderObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001800 /* methods */
1801 (destructor)treebuilder_dealloc, /* tp_dealloc */
1802 0, /* tp_print */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001803 0, /* tp_getattr */
1804 0, /* tp_setattr */
1805 0, /* tp_compare */
1806 0, /* tp_repr */
1807 0, /* tp_as_number */
1808 0, /* tp_as_sequence */
1809 0, /* tp_as_mapping */
1810 0, /* tp_hash */
1811 0, /* tp_call */
1812 0, /* tp_str */
1813 0, /* tp_getattro */
1814 0, /* tp_setattro */
1815 0, /* tp_as_buffer */
1816 Py_TPFLAGS_DEFAULT, /* tp_flags */
1817 0, /* tp_doc */
1818 0, /* tp_traverse */
1819 0, /* tp_clear */
1820 0, /* tp_richcompare */
1821 0, /* tp_weaklistoffset */
1822 0, /* tp_iter */
1823 0, /* tp_iternext */
1824 treebuilder_methods, /* tp_methods */
1825 0, /* tp_members */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001826};
1827
1828/* ==================================================================== */
1829/* the expat interface */
1830
1831#if defined(USE_EXPAT)
1832
1833#include "expat.h"
1834
1835#if defined(USE_PYEXPAT_CAPI)
1836#include "pyexpat.h"
1837static struct PyExpat_CAPI* expat_capi;
1838#define EXPAT(func) (expat_capi->func)
1839#else
1840#define EXPAT(func) (XML_##func)
1841#endif
1842
1843typedef struct {
1844 PyObject_HEAD
1845
1846 XML_Parser parser;
1847
1848 PyObject* target;
1849 PyObject* entity;
1850
1851 PyObject* names;
1852
1853 PyObject* handle_xml;
1854 PyObject* handle_start;
1855 PyObject* handle_data;
1856 PyObject* handle_end;
1857
1858 PyObject* handle_comment;
1859 PyObject* handle_pi;
1860
1861} XMLParserObject;
1862
Neal Norwitz227b5332006-03-22 09:28:35 +00001863static PyTypeObject XMLParser_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001864
1865/* helpers */
1866
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001867LOCAL(PyObject*)
1868makeuniversal(XMLParserObject* self, const char* string)
1869{
1870 /* convert a UTF-8 tag/attribute name from the expat parser
1871 to a universal name string */
1872
1873 int size = strlen(string);
1874 PyObject* key;
1875 PyObject* value;
1876
1877 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00001878 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001879 if (!key)
1880 return NULL;
1881
1882 value = PyDict_GetItem(self->names, key);
1883
1884 if (value) {
1885 Py_INCREF(value);
1886 } else {
1887 /* new name. convert to universal name, and decode as
1888 necessary */
1889
1890 PyObject* tag;
1891 char* p;
1892 int i;
1893
1894 /* look for namespace separator */
1895 for (i = 0; i < size; i++)
1896 if (string[i] == '}')
1897 break;
1898 if (i != size) {
1899 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00001900 tag = PyBytes_FromStringAndSize(NULL, size+1);
1901 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001902 p[0] = '{';
1903 memcpy(p+1, string, size);
1904 size++;
1905 } else {
1906 /* plain name; use key as tag */
1907 Py_INCREF(key);
1908 tag = key;
1909 }
1910
1911 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00001912 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00001913 value = PyUnicode_DecodeUTF8(p, size, "strict");
1914 Py_DECREF(tag);
1915 if (!value) {
1916 Py_DECREF(key);
1917 return NULL;
1918 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001919
1920 /* add to names dictionary */
1921 if (PyDict_SetItem(self->names, key, value) < 0) {
1922 Py_DECREF(key);
1923 Py_DECREF(value);
1924 return NULL;
1925 }
1926 }
1927
1928 Py_DECREF(key);
1929 return value;
1930}
1931
1932/* -------------------------------------------------------------------- */
1933/* handlers */
1934
1935static void
1936expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
1937 int data_len)
1938{
1939 PyObject* key;
1940 PyObject* value;
1941 PyObject* res;
1942
1943 if (data_len < 2 || data_in[0] != '&')
1944 return;
1945
Neal Norwitz0269b912007-08-08 06:56:02 +00001946 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001947 if (!key)
1948 return;
1949
1950 value = PyDict_GetItem(self->entity, key);
1951
1952 if (value) {
1953 if (TreeBuilder_CheckExact(self->target))
1954 res = treebuilder_handle_data(
1955 (TreeBuilderObject*) self->target, value
1956 );
1957 else if (self->handle_data)
1958 res = PyObject_CallFunction(self->handle_data, "O", value);
1959 else
1960 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001961 Py_XDECREF(res);
1962 } else {
1963 PyErr_Format(
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001964 PyExc_SyntaxError, "undefined entity &%s;: line %ld, column %ld",
Christian Heimes72b710a2008-05-26 13:28:38 +00001965 PyBytes_AS_STRING(key),
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001966 EXPAT(GetErrorLineNumber)(self->parser),
1967 EXPAT(GetErrorColumnNumber)(self->parser)
1968 );
1969 }
1970
1971 Py_DECREF(key);
1972}
1973
1974static void
1975expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
1976 const XML_Char **attrib_in)
1977{
1978 PyObject* res;
1979 PyObject* tag;
1980 PyObject* attrib;
1981 int ok;
1982
1983 /* tag name */
1984 tag = makeuniversal(self, tag_in);
1985 if (!tag)
1986 return; /* parser will look for errors */
1987
1988 /* attributes */
1989 if (attrib_in[0]) {
1990 attrib = PyDict_New();
1991 if (!attrib)
1992 return;
1993 while (attrib_in[0] && attrib_in[1]) {
1994 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00001995 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001996 if (!key || !value) {
1997 Py_XDECREF(value);
1998 Py_XDECREF(key);
1999 Py_DECREF(attrib);
2000 return;
2001 }
2002 ok = PyDict_SetItem(attrib, key, value);
2003 Py_DECREF(value);
2004 Py_DECREF(key);
2005 if (ok < 0) {
2006 Py_DECREF(attrib);
2007 return;
2008 }
2009 attrib_in += 2;
2010 }
2011 } else {
2012 Py_INCREF(Py_None);
2013 attrib = Py_None;
2014 }
2015
2016 if (TreeBuilder_CheckExact(self->target))
2017 /* shortcut */
2018 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2019 tag, attrib);
2020 else if (self->handle_start)
2021 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
2022 else
2023 res = NULL;
2024
2025 Py_DECREF(tag);
2026 Py_DECREF(attrib);
2027
2028 Py_XDECREF(res);
2029}
2030
2031static void
2032expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2033 int data_len)
2034{
2035 PyObject* data;
2036 PyObject* res;
2037
Neal Norwitz0269b912007-08-08 06:56:02 +00002038 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002039 if (!data)
2040 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002041
2042 if (TreeBuilder_CheckExact(self->target))
2043 /* shortcut */
2044 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
2045 else if (self->handle_data)
2046 res = PyObject_CallFunction(self->handle_data, "O", data);
2047 else
2048 res = NULL;
2049
2050 Py_DECREF(data);
2051
2052 Py_XDECREF(res);
2053}
2054
2055static void
2056expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
2057{
2058 PyObject* tag;
2059 PyObject* res = NULL;
2060
2061 if (TreeBuilder_CheckExact(self->target))
2062 /* shortcut */
2063 /* the standard tree builder doesn't look at the end tag */
2064 res = treebuilder_handle_end(
2065 (TreeBuilderObject*) self->target, Py_None
2066 );
2067 else if (self->handle_end) {
2068 tag = makeuniversal(self, tag_in);
2069 if (tag) {
2070 res = PyObject_CallFunction(self->handle_end, "O", tag);
2071 Py_DECREF(tag);
2072 }
2073 }
2074
2075 Py_XDECREF(res);
2076}
2077
2078static void
2079expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
2080 const XML_Char *uri)
2081{
2082 treebuilder_handle_namespace(
2083 (TreeBuilderObject*) self->target, 1, prefix, uri
2084 );
2085}
2086
2087static void
2088expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
2089{
2090 treebuilder_handle_namespace(
2091 (TreeBuilderObject*) self->target, 0, NULL, NULL
2092 );
2093}
2094
2095static void
2096expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
2097{
2098 PyObject* comment;
2099 PyObject* res;
2100
2101 if (self->handle_comment) {
Neal Norwitz0269b912007-08-08 06:56:02 +00002102 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002103 if (comment) {
2104 res = PyObject_CallFunction(self->handle_comment, "O", comment);
2105 Py_XDECREF(res);
2106 Py_DECREF(comment);
2107 }
2108 }
2109}
2110
2111static void
2112expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
2113 const XML_Char* data_in)
2114{
2115 PyObject* target;
2116 PyObject* data;
2117 PyObject* res;
2118
2119 if (self->handle_pi) {
Neal Norwitz0269b912007-08-08 06:56:02 +00002120 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
2121 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002122 if (target && data) {
2123 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
2124 Py_XDECREF(res);
2125 Py_DECREF(data);
2126 Py_DECREF(target);
2127 } else {
2128 Py_XDECREF(data);
2129 Py_XDECREF(target);
2130 }
2131 }
2132}
2133
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002134static int
2135expat_unknown_encoding_handler(XMLParserObject *self, const XML_Char *name,
2136 XML_Encoding *info)
2137{
2138 PyObject* u;
2139 Py_UNICODE* p;
2140 unsigned char s[256];
2141 int i;
2142
2143 memset(info, 0, sizeof(XML_Encoding));
2144
2145 for (i = 0; i < 256; i++)
2146 s[i] = i;
2147
Fredrik Lundhc3389992005-12-25 11:40:19 +00002148 u = PyUnicode_Decode((char*) s, 256, name, "replace");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002149 if (!u)
2150 return XML_STATUS_ERROR;
2151
2152 if (PyUnicode_GET_SIZE(u) != 256) {
2153 Py_DECREF(u);
2154 return XML_STATUS_ERROR;
2155 }
2156
2157 p = PyUnicode_AS_UNICODE(u);
2158
2159 for (i = 0; i < 256; i++) {
2160 if (p[i] != Py_UNICODE_REPLACEMENT_CHARACTER)
2161 info->map[i] = p[i];
2162 else
2163 info->map[i] = -1;
2164 }
2165
2166 Py_DECREF(u);
2167
2168 return XML_STATUS_OK;
2169}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002170
2171/* -------------------------------------------------------------------- */
2172/* constructor and destructor */
2173
2174static PyObject*
Thomas Wouters73e5a5b2006-06-08 15:35:45 +00002175xmlparser(PyObject* self_, PyObject* args, PyObject* kw)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002176{
2177 XMLParserObject* self;
2178 /* FIXME: does this need to be static? */
2179 static XML_Memory_Handling_Suite memory_handler;
2180
2181 PyObject* target = NULL;
2182 char* encoding = NULL;
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +00002183 static char* kwlist[] = { "target", "encoding", NULL };
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002184 if (!PyArg_ParseTupleAndKeywords(args, kw, "|Oz:XMLParser", kwlist,
2185 &target, &encoding))
2186 return NULL;
2187
2188#if defined(USE_PYEXPAT_CAPI)
2189 if (!expat_capi) {
2190 PyErr_SetString(
2191 PyExc_RuntimeError, "cannot load dispatch table from pyexpat"
2192 );
2193 return NULL;
2194 }
2195#endif
2196
2197 self = PyObject_New(XMLParserObject, &XMLParser_Type);
2198 if (self == NULL)
2199 return NULL;
2200
2201 self->entity = PyDict_New();
2202 if (!self->entity) {
2203 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002204 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002205 }
2206
2207 self->names = PyDict_New();
2208 if (!self->names) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002209 PyObject_Del(self->entity);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002210 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002211 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002212 }
2213
2214 memory_handler.malloc_fcn = PyObject_Malloc;
2215 memory_handler.realloc_fcn = PyObject_Realloc;
2216 memory_handler.free_fcn = PyObject_Free;
2217
2218 self->parser = EXPAT(ParserCreate_MM)(encoding, &memory_handler, "}");
2219 if (!self->parser) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002220 PyObject_Del(self->names);
2221 PyObject_Del(self->entity);
2222 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002223 PyErr_NoMemory();
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002224 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002225 }
2226
2227 /* setup target handlers */
2228 if (!target) {
2229 target = treebuilder_new();
2230 if (!target) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002231 EXPAT(ParserFree)(self->parser);
2232 PyObject_Del(self->names);
2233 PyObject_Del(self->entity);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002234 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002235 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002236 }
2237 } else
2238 Py_INCREF(target);
2239 self->target = target;
2240
2241 self->handle_xml = PyObject_GetAttrString(target, "xml");
2242 self->handle_start = PyObject_GetAttrString(target, "start");
2243 self->handle_data = PyObject_GetAttrString(target, "data");
2244 self->handle_end = PyObject_GetAttrString(target, "end");
2245 self->handle_comment = PyObject_GetAttrString(target, "comment");
2246 self->handle_pi = PyObject_GetAttrString(target, "pi");
2247
2248 PyErr_Clear();
2249
2250 /* configure parser */
2251 EXPAT(SetUserData)(self->parser, self);
2252 EXPAT(SetElementHandler)(
2253 self->parser,
2254 (XML_StartElementHandler) expat_start_handler,
2255 (XML_EndElementHandler) expat_end_handler
2256 );
2257 EXPAT(SetDefaultHandlerExpand)(
2258 self->parser,
2259 (XML_DefaultHandler) expat_default_handler
2260 );
2261 EXPAT(SetCharacterDataHandler)(
2262 self->parser,
2263 (XML_CharacterDataHandler) expat_data_handler
2264 );
2265 if (self->handle_comment)
2266 EXPAT(SetCommentHandler)(
2267 self->parser,
2268 (XML_CommentHandler) expat_comment_handler
2269 );
2270 if (self->handle_pi)
2271 EXPAT(SetProcessingInstructionHandler)(
2272 self->parser,
2273 (XML_ProcessingInstructionHandler) expat_pi_handler
2274 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002275 EXPAT(SetUnknownEncodingHandler)(
2276 self->parser,
2277 (XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL
2278 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002279
2280 ALLOC(sizeof(XMLParserObject), "create expatparser");
2281
2282 return (PyObject*) self;
2283}
2284
2285static void
2286xmlparser_dealloc(XMLParserObject* self)
2287{
2288 EXPAT(ParserFree)(self->parser);
2289
2290 Py_XDECREF(self->handle_pi);
2291 Py_XDECREF(self->handle_comment);
2292 Py_XDECREF(self->handle_end);
2293 Py_XDECREF(self->handle_data);
2294 Py_XDECREF(self->handle_start);
2295 Py_XDECREF(self->handle_xml);
2296
2297 Py_DECREF(self->target);
2298 Py_DECREF(self->entity);
2299 Py_DECREF(self->names);
2300
2301 RELEASE(sizeof(XMLParserObject), "destroy expatparser");
2302
2303 PyObject_Del(self);
2304}
2305
2306/* -------------------------------------------------------------------- */
2307/* methods (in alphabetical order) */
2308
2309LOCAL(PyObject*)
2310expat_parse(XMLParserObject* self, char* data, int data_len, int final)
2311{
2312 int ok;
2313
2314 ok = EXPAT(Parse)(self->parser, data, data_len, final);
2315
2316 if (PyErr_Occurred())
2317 return NULL;
2318
2319 if (!ok) {
2320 PyErr_Format(
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002321 PyExc_SyntaxError, "%s: line %ld, column %ld",
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002322 EXPAT(ErrorString)(EXPAT(GetErrorCode)(self->parser)),
2323 EXPAT(GetErrorLineNumber)(self->parser),
2324 EXPAT(GetErrorColumnNumber)(self->parser)
2325 );
2326 return NULL;
2327 }
2328
2329 Py_RETURN_NONE;
2330}
2331
2332static PyObject*
2333xmlparser_close(XMLParserObject* self, PyObject* args)
2334{
2335 /* end feeding data to parser */
2336
2337 PyObject* res;
2338 if (!PyArg_ParseTuple(args, ":close"))
2339 return NULL;
2340
2341 res = expat_parse(self, "", 0, 1);
2342
2343 if (res && TreeBuilder_CheckExact(self->target)) {
2344 Py_DECREF(res);
2345 return treebuilder_done((TreeBuilderObject*) self->target);
2346 }
2347
2348 return res;
2349}
2350
2351static PyObject*
2352xmlparser_feed(XMLParserObject* self, PyObject* args)
2353{
2354 /* feed data to parser */
2355
2356 char* data;
2357 int data_len;
2358 if (!PyArg_ParseTuple(args, "s#:feed", &data, &data_len))
2359 return NULL;
2360
2361 return expat_parse(self, data, data_len, 0);
2362}
2363
2364static PyObject*
2365xmlparser_parse(XMLParserObject* self, PyObject* args)
2366{
2367 /* (internal) parse until end of input stream */
2368
2369 PyObject* reader;
2370 PyObject* buffer;
2371 PyObject* res;
2372
2373 PyObject* fileobj;
2374 if (!PyArg_ParseTuple(args, "O:_parse", &fileobj))
2375 return NULL;
2376
2377 reader = PyObject_GetAttrString(fileobj, "read");
2378 if (!reader)
2379 return NULL;
2380
2381 /* read from open file object */
2382 for (;;) {
2383
2384 buffer = PyObject_CallFunction(reader, "i", 64*1024);
2385
2386 if (!buffer) {
2387 /* read failed (e.g. due to KeyboardInterrupt) */
2388 Py_DECREF(reader);
2389 return NULL;
2390 }
2391
Christian Heimes72b710a2008-05-26 13:28:38 +00002392 if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002393 Py_DECREF(buffer);
2394 break;
2395 }
2396
2397 res = expat_parse(
Christian Heimes72b710a2008-05-26 13:28:38 +00002398 self, PyBytes_AS_STRING(buffer), PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002399 );
2400
2401 Py_DECREF(buffer);
2402
2403 if (!res) {
2404 Py_DECREF(reader);
2405 return NULL;
2406 }
2407 Py_DECREF(res);
2408
2409 }
2410
2411 Py_DECREF(reader);
2412
2413 res = expat_parse(self, "", 0, 1);
2414
2415 if (res && TreeBuilder_CheckExact(self->target)) {
2416 Py_DECREF(res);
2417 return treebuilder_done((TreeBuilderObject*) self->target);
2418 }
2419
2420 return res;
2421}
2422
2423static PyObject*
2424xmlparser_setevents(XMLParserObject* self, PyObject* args)
2425{
2426 /* activate element event reporting */
2427
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002428 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002429 TreeBuilderObject* target;
2430
2431 PyObject* events; /* event collector */
2432 PyObject* event_set = Py_None;
2433 if (!PyArg_ParseTuple(args, "O!|O:_setevents", &PyList_Type, &events,
2434 &event_set))
2435 return NULL;
2436
2437 if (!TreeBuilder_CheckExact(self->target)) {
2438 PyErr_SetString(
2439 PyExc_TypeError,
2440 "event handling only supported for cElementTree.Treebuilder "
2441 "targets"
2442 );
2443 return NULL;
2444 }
2445
2446 target = (TreeBuilderObject*) self->target;
2447
2448 Py_INCREF(events);
2449 Py_XDECREF(target->events);
2450 target->events = events;
2451
2452 /* clear out existing events */
2453 Py_XDECREF(target->start_event_obj); target->start_event_obj = NULL;
2454 Py_XDECREF(target->end_event_obj); target->end_event_obj = NULL;
2455 Py_XDECREF(target->start_ns_event_obj); target->start_ns_event_obj = NULL;
2456 Py_XDECREF(target->end_ns_event_obj); target->end_ns_event_obj = NULL;
2457
2458 if (event_set == Py_None) {
2459 /* default is "end" only */
Christian Heimes72b710a2008-05-26 13:28:38 +00002460 target->end_event_obj = PyBytes_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002461 Py_RETURN_NONE;
2462 }
2463
2464 if (!PyTuple_Check(event_set)) /* FIXME: handle arbitrary sequences */
2465 goto error;
2466
2467 for (i = 0; i < PyTuple_GET_SIZE(event_set); i++) {
2468 PyObject* item = PyTuple_GET_ITEM(event_set, i);
2469 char* event;
Christian Heimes72b710a2008-05-26 13:28:38 +00002470 if (!PyBytes_Check(item))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002471 goto error;
Christian Heimes72b710a2008-05-26 13:28:38 +00002472 event = PyBytes_AS_STRING(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002473 if (strcmp(event, "start") == 0) {
2474 Py_INCREF(item);
2475 target->start_event_obj = item;
2476 } else if (strcmp(event, "end") == 0) {
2477 Py_INCREF(item);
2478 Py_XDECREF(target->end_event_obj);
2479 target->end_event_obj = item;
2480 } else if (strcmp(event, "start-ns") == 0) {
2481 Py_INCREF(item);
2482 Py_XDECREF(target->start_ns_event_obj);
2483 target->start_ns_event_obj = item;
2484 EXPAT(SetNamespaceDeclHandler)(
2485 self->parser,
2486 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2487 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2488 );
2489 } else if (strcmp(event, "end-ns") == 0) {
2490 Py_INCREF(item);
2491 Py_XDECREF(target->end_ns_event_obj);
2492 target->end_ns_event_obj = item;
2493 EXPAT(SetNamespaceDeclHandler)(
2494 self->parser,
2495 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2496 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2497 );
2498 } else {
2499 PyErr_Format(
2500 PyExc_ValueError,
2501 "unknown event '%s'", event
2502 );
2503 return NULL;
2504 }
2505 }
2506
2507 Py_RETURN_NONE;
2508
2509 error:
2510 PyErr_SetString(
2511 PyExc_TypeError,
2512 "invalid event tuple"
2513 );
2514 return NULL;
2515}
2516
2517static PyMethodDef xmlparser_methods[] = {
2518 {"feed", (PyCFunction) xmlparser_feed, METH_VARARGS},
2519 {"close", (PyCFunction) xmlparser_close, METH_VARARGS},
2520 {"_parse", (PyCFunction) xmlparser_parse, METH_VARARGS},
2521 {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
2522 {NULL, NULL}
2523};
2524
2525static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002526xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002527{
2528 PyObject* res;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002529 char *name = "";
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002530
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002531 if (PyUnicode_Check(nameobj))
2532 name = PyUnicode_AsString(nameobj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002533
2534 PyErr_Clear();
2535
2536 if (strcmp(name, "entity") == 0)
2537 res = self->entity;
2538 else if (strcmp(name, "target") == 0)
2539 res = self->target;
2540 else if (strcmp(name, "version") == 0) {
2541 char buffer[100];
2542 sprintf(buffer, "Expat %d.%d.%d", XML_MAJOR_VERSION,
2543 XML_MINOR_VERSION, XML_MICRO_VERSION);
Christian Heimes72b710a2008-05-26 13:28:38 +00002544 return PyBytes_FromString(buffer);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002545 } else {
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002546 return PyObject_GenericGetAttr((PyObject*) self, nameobj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002547 }
2548
2549 Py_INCREF(res);
2550 return res;
2551}
2552
Neal Norwitz227b5332006-03-22 09:28:35 +00002553static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002554 PyVarObject_HEAD_INIT(NULL, 0)
2555 "XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002556 /* methods */
2557 (destructor)xmlparser_dealloc, /* tp_dealloc */
2558 0, /* tp_print */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002559 0, /* tp_getattr */
2560 0, /* tp_setattr */
2561 0, /* tp_compare */
2562 0, /* tp_repr */
2563 0, /* tp_as_number */
2564 0, /* tp_as_sequence */
2565 0, /* tp_as_mapping */
2566 0, /* tp_hash */
2567 0, /* tp_call */
2568 0, /* tp_str */
2569 (getattrofunc)xmlparser_getattro, /* tp_getattro */
2570 0, /* tp_setattro */
2571 0, /* tp_as_buffer */
2572 Py_TPFLAGS_DEFAULT, /* tp_flags */
2573 0, /* tp_doc */
2574 0, /* tp_traverse */
2575 0, /* tp_clear */
2576 0, /* tp_richcompare */
2577 0, /* tp_weaklistoffset */
2578 0, /* tp_iter */
2579 0, /* tp_iternext */
2580 xmlparser_methods, /* tp_methods */
2581 0, /* tp_members */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002582};
2583
2584#endif
2585
2586/* ==================================================================== */
2587/* python module interface */
2588
2589static PyMethodDef _functions[] = {
2590 {"Element", (PyCFunction) element, METH_VARARGS|METH_KEYWORDS},
2591 {"SubElement", (PyCFunction) subelement, METH_VARARGS|METH_KEYWORDS},
2592 {"TreeBuilder", (PyCFunction) treebuilder, METH_VARARGS},
2593#if defined(USE_EXPAT)
2594 {"XMLParser", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
2595 {"XMLTreeBuilder", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
2596#endif
2597 {NULL, NULL}
2598};
2599
Martin v. Löwis1a214512008-06-11 05:26:20 +00002600
2601static struct PyModuleDef _elementtreemodule = {
2602 PyModuleDef_HEAD_INIT,
2603 "_elementtree",
2604 NULL,
2605 -1,
2606 _functions,
2607 NULL,
2608 NULL,
2609 NULL,
2610 NULL
2611};
2612
Neal Norwitzf6657e62006-12-28 04:47:50 +00002613PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00002614PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002615{
2616 PyObject* m;
2617 PyObject* g;
2618 char* bootstrap;
2619#if defined(USE_PYEXPAT_CAPI)
2620 struct PyExpat_CAPI* capi;
2621#endif
2622
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002623 /* Initialize object types */
2624 if (PyType_Ready(&TreeBuilder_Type) < 0)
2625 return NULL;
2626 if (PyType_Ready(&Element_Type) < 0)
2627 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002628#if defined(USE_EXPAT)
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002629 if (PyType_Ready(&XMLParser_Type) < 0)
2630 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002631#endif
2632
Martin v. Löwis1a214512008-06-11 05:26:20 +00002633 m = PyModule_Create(&_elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002634 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00002635 return NULL;
2636
2637 /* The code below requires that the module gets already added
2638 to sys.modules. */
2639 PyDict_SetItemString(PyImport_GetModuleDict(),
2640 _elementtreemodule.m_name,
2641 m);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002642
2643 /* python glue code */
2644
2645 g = PyDict_New();
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002646 if (!g)
Martin v. Löwis1a214512008-06-11 05:26:20 +00002647 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002648
2649 PyDict_SetItemString(g, "__builtins__", PyEval_GetBuiltins());
2650
2651 bootstrap = (
2652
2653#if (PY_VERSION_HEX >= 0x02020000 && PY_VERSION_HEX < 0x02030000)
2654 "from __future__ import generators\n" /* enable yield under 2.2 */
2655#endif
2656
2657 "from copy import copy, deepcopy\n"
2658
2659 "try:\n"
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002660 " from xml.etree import ElementTree\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002661 "except ImportError:\n"
2662 " import ElementTree\n"
2663 "ET = ElementTree\n"
2664 "del ElementTree\n"
2665
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002666 "import _elementtree as cElementTree\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002667
2668 "try:\n" /* check if copy works as is */
2669 " copy(cElementTree.Element('x'))\n"
2670 "except:\n"
2671 " def copyelement(elem):\n"
2672 " return elem\n"
2673
2674 "def Comment(text=None):\n" /* public */
2675 " element = cElementTree.Element(ET.Comment)\n"
2676 " element.text = text\n"
2677 " return element\n"
2678 "cElementTree.Comment = Comment\n"
2679
2680 "class ElementTree(ET.ElementTree):\n" /* public */
2681 " def parse(self, source, parser=None):\n"
2682 " if not hasattr(source, 'read'):\n"
2683 " source = open(source, 'rb')\n"
2684 " if parser is not None:\n"
2685 " while 1:\n"
2686 " data = source.read(65536)\n"
2687 " if not data:\n"
2688 " break\n"
2689 " parser.feed(data)\n"
2690 " self._root = parser.close()\n"
2691 " else:\n"
2692 " parser = cElementTree.XMLParser()\n"
2693 " self._root = parser._parse(source)\n"
2694 " return self._root\n"
2695 "cElementTree.ElementTree = ElementTree\n"
2696
2697 "def getiterator(node, tag=None):\n" /* helper */
2698 " if tag == '*':\n"
2699 " tag = None\n"
2700#if (PY_VERSION_HEX < 0x02020000)
2701 " nodes = []\n" /* 2.1 doesn't have yield */
2702 " if tag is None or node.tag == tag:\n"
2703 " nodes.append(node)\n"
2704 " for node in node:\n"
2705 " nodes.extend(getiterator(node, tag))\n"
2706 " return nodes\n"
2707#else
2708 " if tag is None or node.tag == tag:\n"
2709 " yield node\n"
2710 " for node in node:\n"
2711 " for node in getiterator(node, tag):\n"
2712 " yield node\n"
2713#endif
2714
2715 "def parse(source, parser=None):\n" /* public */
2716 " tree = ElementTree()\n"
2717 " tree.parse(source, parser)\n"
2718 " return tree\n"
2719 "cElementTree.parse = parse\n"
2720
2721#if (PY_VERSION_HEX < 0x02020000)
2722 "if hasattr(ET, 'iterparse'):\n"
2723 " cElementTree.iterparse = ET.iterparse\n" /* delegate on 2.1 */
2724#else
2725 "class iterparse(object):\n"
2726 " root = None\n"
2727 " def __init__(self, file, events=None):\n"
2728 " if not hasattr(file, 'read'):\n"
2729 " file = open(file, 'rb')\n"
2730 " self._file = file\n"
2731 " self._events = events\n"
2732 " def __iter__(self):\n"
2733 " events = []\n"
2734 " b = cElementTree.TreeBuilder()\n"
2735 " p = cElementTree.XMLParser(b)\n"
2736 " p._setevents(events, self._events)\n"
2737 " while 1:\n"
2738 " data = self._file.read(16384)\n"
2739 " if not data:\n"
2740 " break\n"
2741 " p.feed(data)\n"
2742 " for event in events:\n"
2743 " yield event\n"
2744 " del events[:]\n"
2745 " root = p.close()\n"
2746 " for event in events:\n"
2747 " yield event\n"
2748 " self.root = root\n"
2749 "cElementTree.iterparse = iterparse\n"
2750#endif
2751
2752 "def PI(target, text=None):\n" /* public */
2753 " element = cElementTree.Element(ET.ProcessingInstruction)\n"
2754 " element.text = target\n"
2755 " if text:\n"
2756 " element.text = element.text + ' ' + text\n"
2757 " return element\n"
2758
2759 " elem = cElementTree.Element(ET.PI)\n"
2760 " elem.text = text\n"
2761 " return elem\n"
2762 "cElementTree.PI = cElementTree.ProcessingInstruction = PI\n"
2763
2764 "def XML(text):\n" /* public */
2765 " parser = cElementTree.XMLParser()\n"
2766 " parser.feed(text)\n"
2767 " return parser.close()\n"
2768 "cElementTree.XML = cElementTree.fromstring = XML\n"
2769
2770 "def XMLID(text):\n" /* public */
2771 " tree = XML(text)\n"
2772 " ids = {}\n"
2773 " for elem in tree.getiterator():\n"
2774 " id = elem.get('id')\n"
2775 " if id:\n"
2776 " ids[id] = elem\n"
2777 " return tree, ids\n"
2778 "cElementTree.XMLID = XMLID\n"
2779
2780 "cElementTree.dump = ET.dump\n"
2781 "cElementTree.ElementPath = ElementPath = ET.ElementPath\n"
2782 "cElementTree.iselement = ET.iselement\n"
2783 "cElementTree.QName = ET.QName\n"
2784 "cElementTree.tostring = ET.tostring\n"
2785 "cElementTree.VERSION = '" VERSION "'\n"
2786 "cElementTree.__version__ = '" VERSION "'\n"
2787 "cElementTree.XMLParserError = SyntaxError\n"
2788
2789 );
2790
2791 PyRun_String(bootstrap, Py_file_input, g, NULL);
2792
2793 elementpath_obj = PyDict_GetItemString(g, "ElementPath");
2794
2795 elementtree_copyelement_obj = PyDict_GetItemString(g, "copyelement");
2796 if (elementtree_copyelement_obj) {
2797 /* reduce hack needed; enable reduce method */
2798 PyMethodDef* mp;
2799 for (mp = element_methods; mp->ml_name; mp++)
2800 if (mp->ml_meth == (PyCFunction) element_reduce) {
2801 mp->ml_name = "__reduce__";
2802 break;
2803 }
2804 } else
2805 PyErr_Clear();
2806 elementtree_deepcopy_obj = PyDict_GetItemString(g, "deepcopy");
2807 elementtree_getiterator_obj = PyDict_GetItemString(g, "getiterator");
2808
2809#if defined(USE_PYEXPAT_CAPI)
2810 /* link against pyexpat, if possible */
2811 capi = PyCObject_Import("pyexpat", "expat_CAPI");
2812 if (capi &&
2813 strcmp(capi->magic, PyExpat_CAPI_MAGIC) == 0 &&
2814 capi->size <= sizeof(*expat_capi) &&
2815 capi->MAJOR_VERSION == XML_MAJOR_VERSION &&
2816 capi->MINOR_VERSION == XML_MINOR_VERSION &&
2817 capi->MICRO_VERSION == XML_MICRO_VERSION)
2818 expat_capi = capi;
2819 else
2820 expat_capi = NULL;
2821#endif
Martin v. Löwis1a214512008-06-11 05:26:20 +00002822 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002823
2824}