blob: ae098932860320007bac7f7252b466f991f4b9cf [file] [log] [blame]
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001/*
2 * ElementTree
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003 * $Id: _elementtree.c 2657 2006-03-12 20:50:32Z fredrik $
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
5 * elementtree accelerator
6 *
7 * History:
8 * 1999-06-20 fl created (as part of sgmlop)
9 * 2001-05-29 fl effdom edition
10 * 2001-06-05 fl backported to unix; fixed bogus free in clear
11 * 2001-07-10 fl added findall helper
12 * 2003-02-27 fl elementtree edition (alpha)
13 * 2004-06-03 fl updates for elementtree 1.2
14 * 2005-01-05 fl added universal name cache, Element/SubElement factories
15 * 2005-01-06 fl moved python helpers into C module; removed 1.5.2 support
16 * 2005-01-07 fl added 2.1 support; work around broken __copy__ in 2.3
17 * 2005-01-08 fl added makeelement method; fixed path support
18 * 2005-01-10 fl optimized memory usage
19 * 2005-01-11 fl first public release (cElementTree 0.8)
20 * 2005-01-12 fl split element object into base and extras
21 * 2005-01-13 fl use tagged pointers for tail/text (cElementTree 0.9)
22 * 2005-01-17 fl added treebuilder close method
23 * 2005-01-17 fl fixed crash in getchildren
24 * 2005-01-18 fl removed observer api, added iterparse (cElementTree 0.9.3)
25 * 2005-01-23 fl revised iterparse api; added namespace event support (0.9.8)
26 * 2005-01-26 fl added VERSION module property (cElementTree 1.0)
27 * 2005-01-28 fl added remove method (1.0.1)
28 * 2005-03-01 fl added iselement function; fixed makeelement aliasing (1.0.2)
29 * 2005-03-13 fl export Comment and ProcessingInstruction/PI helpers
30 * 2005-03-26 fl added Comment and PI support to XMLParser
31 * 2005-03-27 fl event optimizations; complain about bogus events
32 * 2005-08-08 fl fixed read error handling in parse
33 * 2005-08-11 fl added runtime test for copy workaround (1.0.3)
34 * 2005-12-13 fl added expat_capi support (for xml.etree) (1.0.4)
35 * 2005-12-16 fl added support for non-standard encodings
Fredrik Lundh44ed4db2006-03-12 21:06:35 +000036 * 2006-03-08 fl fixed a couple of potential null-refs and leaks
37 * 2006-03-12 fl merge in 2.5 ssize_t changes
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000038 *
Fredrik Lundh44ed4db2006-03-12 21:06:35 +000039 * Copyright (c) 1999-2006 by Secret Labs AB. All rights reserved.
40 * Copyright (c) 1999-2006 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000041 *
42 * info@pythonware.com
43 * http://www.pythonware.com
44 */
45
Fredrik Lundh6d52b552005-12-16 22:06:43 +000046/* Licensed to PSF under a Contributor Agreement. */
47/* See http://www.python.org/2.4/license for licensing details. */
48
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000049#include "Python.h"
50
Thomas Wouters00ee7ba2006-08-21 19:07:27 +000051#define VERSION "1.0.6"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000052
53/* -------------------------------------------------------------------- */
54/* configuration */
55
56/* Leave defined to include the expat-based XMLParser type */
57#define USE_EXPAT
58
59/* Define to to all expat calls via pyexpat's embedded expat library */
60/* #define USE_PYEXPAT_CAPI */
61
62/* An element can hold this many children without extra memory
63 allocations. */
64#define STATIC_CHILDREN 4
65
66/* For best performance, chose a value so that 80-90% of all nodes
67 have no more than the given number of children. Set this to zero
68 to minimize the size of the element structure itself (this only
69 helps if you have lots of leaf nodes with attributes). */
70
71/* Also note that pymalloc always allocates blocks in multiples of
72 eight bytes. For the current version of cElementTree, this means
73 that the number of children should be an even number, at least on
74 32-bit platforms. */
75
76/* -------------------------------------------------------------------- */
77
78#if 0
79static int memory = 0;
80#define ALLOC(size, comment)\
81do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
82#define RELEASE(size, comment)\
83do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
84#else
85#define ALLOC(size, comment)
86#define RELEASE(size, comment)
87#endif
88
89/* compiler tweaks */
90#if defined(_MSC_VER)
91#define LOCAL(type) static __inline type __fastcall
92#else
93#define LOCAL(type) static type
94#endif
95
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000096/* macros used to store 'join' flags in string object pointers. note
97 that all use of text and tail as object pointers must be wrapped in
98 JOIN_OBJ. see comments in the ElementObject definition for more
99 info. */
100#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
101#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
102#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~1))
103
104/* glue functions (see the init function for details) */
105static PyObject* elementtree_copyelement_obj;
106static PyObject* elementtree_deepcopy_obj;
107static PyObject* elementtree_getiterator_obj;
108static PyObject* elementpath_obj;
109
110/* helpers */
111
112LOCAL(PyObject*)
113deepcopy(PyObject* object, PyObject* memo)
114{
115 /* do a deep copy of the given object */
116
117 PyObject* args;
118 PyObject* result;
119
120 if (!elementtree_deepcopy_obj) {
121 PyErr_SetString(
122 PyExc_RuntimeError,
123 "deepcopy helper not found"
124 );
125 return NULL;
126 }
127
128 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000129 if (!args)
130 return NULL;
131
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000132 Py_INCREF(object); PyTuple_SET_ITEM(args, 0, (PyObject*) object);
133 Py_INCREF(memo); PyTuple_SET_ITEM(args, 1, (PyObject*) memo);
134
135 result = PyObject_CallObject(elementtree_deepcopy_obj, args);
136
137 Py_DECREF(args);
138
139 return result;
140}
141
142LOCAL(PyObject*)
143list_join(PyObject* list)
144{
145 /* join list elements (destroying the list in the process) */
146
147 PyObject* joiner;
148 PyObject* function;
149 PyObject* args;
150 PyObject* result;
151
152 switch (PyList_GET_SIZE(list)) {
153 case 0:
154 Py_DECREF(list);
Christian Heimes72b710a2008-05-26 13:28:38 +0000155 return PyBytes_FromString("");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000156 case 1:
157 result = PyList_GET_ITEM(list, 0);
158 Py_INCREF(result);
159 Py_DECREF(list);
160 return result;
161 }
162
163 /* two or more elements: slice out a suitable separator from the
164 first member, and use that to join the entire list */
165
166 joiner = PySequence_GetSlice(PyList_GET_ITEM(list, 0), 0, 0);
167 if (!joiner)
168 return NULL;
169
170 function = PyObject_GetAttrString(joiner, "join");
171 if (!function) {
172 Py_DECREF(joiner);
173 return NULL;
174 }
175
176 args = PyTuple_New(1);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000177 if (!args)
178 return NULL;
179
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000180 PyTuple_SET_ITEM(args, 0, list);
181
182 result = PyObject_CallObject(function, args);
183
184 Py_DECREF(args); /* also removes list */
185 Py_DECREF(function);
186 Py_DECREF(joiner);
187
188 return result;
189}
190
191#if (PY_VERSION_HEX < 0x02020000)
192LOCAL(int)
193PyDict_Update(PyObject* dict, PyObject* other)
194{
195 /* PyDict_Update emulation for 2.1 and earlier */
196
197 PyObject* res;
198
199 res = PyObject_CallMethod(dict, "update", "O", other);
200 if (!res)
201 return -1;
202
203 Py_DECREF(res);
204 return 0;
205}
206#endif
207
208/* -------------------------------------------------------------------- */
209/* the element type */
210
211typedef struct {
212
213 /* attributes (a dictionary object), or None if no attributes */
214 PyObject* attrib;
215
216 /* child elements */
217 int length; /* actual number of items */
218 int allocated; /* allocated items */
219
220 /* this either points to _children or to a malloced buffer */
221 PyObject* *children;
222
223 PyObject* _children[STATIC_CHILDREN];
224
225} ElementObjectExtra;
226
227typedef struct {
228 PyObject_HEAD
229
230 /* element tag (a string). */
231 PyObject* tag;
232
233 /* text before first child. note that this is a tagged pointer;
234 use JOIN_OBJ to get the object pointer. the join flag is used
235 to distinguish lists created by the tree builder from lists
236 assigned to the attribute by application code; the former
237 should be joined before being returned to the user, the latter
238 should be left intact. */
239 PyObject* text;
240
241 /* text after this element, in parent. note that this is a tagged
242 pointer; use JOIN_OBJ to get the object pointer. */
243 PyObject* tail;
244
245 ElementObjectExtra* extra;
246
247} ElementObject;
248
Neal Norwitz227b5332006-03-22 09:28:35 +0000249static PyTypeObject Element_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000250
Christian Heimes90aa7642007-12-19 02:45:37 +0000251#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000252
253/* -------------------------------------------------------------------- */
254/* element constructor and destructor */
255
256LOCAL(int)
257element_new_extra(ElementObject* self, PyObject* attrib)
258{
259 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
260 if (!self->extra)
261 return -1;
262
263 if (!attrib)
264 attrib = Py_None;
265
266 Py_INCREF(attrib);
267 self->extra->attrib = attrib;
268
269 self->extra->length = 0;
270 self->extra->allocated = STATIC_CHILDREN;
271 self->extra->children = self->extra->_children;
272
273 return 0;
274}
275
276LOCAL(void)
277element_dealloc_extra(ElementObject* self)
278{
279 int i;
280
281 Py_DECREF(self->extra->attrib);
282
283 for (i = 0; i < self->extra->length; i++)
284 Py_DECREF(self->extra->children[i]);
285
286 if (self->extra->children != self->extra->_children)
287 PyObject_Free(self->extra->children);
288
289 PyObject_Free(self->extra);
290}
291
292LOCAL(PyObject*)
293element_new(PyObject* tag, PyObject* attrib)
294{
295 ElementObject* self;
296
297 self = PyObject_New(ElementObject, &Element_Type);
298 if (self == NULL)
299 return NULL;
300
301 /* use None for empty dictionaries */
302 if (PyDict_CheckExact(attrib) && !PyDict_Size(attrib))
303 attrib = Py_None;
304
305 self->extra = NULL;
306
307 if (attrib != Py_None) {
308
Thomas Wouters477c8d52006-05-27 19:21:47 +0000309 if (element_new_extra(self, attrib) < 0) {
310 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000311 return NULL;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000312 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000313
314 self->extra->length = 0;
315 self->extra->allocated = STATIC_CHILDREN;
316 self->extra->children = self->extra->_children;
317
318 }
319
320 Py_INCREF(tag);
321 self->tag = tag;
322
323 Py_INCREF(Py_None);
324 self->text = Py_None;
325
326 Py_INCREF(Py_None);
327 self->tail = Py_None;
328
329 ALLOC(sizeof(ElementObject), "create element");
330
331 return (PyObject*) self;
332}
333
334LOCAL(int)
335element_resize(ElementObject* self, int extra)
336{
337 int size;
338 PyObject* *children;
339
340 /* make sure self->children can hold the given number of extra
341 elements. set an exception and return -1 if allocation failed */
342
343 if (!self->extra)
344 element_new_extra(self, NULL);
345
346 size = self->extra->length + extra;
347
348 if (size > self->extra->allocated) {
349 /* use Python 2.4's list growth strategy */
350 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000351 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
352 * which needs at least 4 bytes.
353 * Although it's a false alarm always assume at least one child to
354 * be safe.
355 */
356 size = size ? size : 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000357 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000358 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
359 * "children", which needs at least 4 bytes. Although it's a
360 * false alarm always assume at least one child to be safe.
361 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000362 children = PyObject_Realloc(self->extra->children,
363 size * sizeof(PyObject*));
364 if (!children)
365 goto nomemory;
366 } else {
367 children = PyObject_Malloc(size * sizeof(PyObject*));
368 if (!children)
369 goto nomemory;
370 /* copy existing children from static area to malloc buffer */
371 memcpy(children, self->extra->children,
372 self->extra->length * sizeof(PyObject*));
373 }
374 self->extra->children = children;
375 self->extra->allocated = size;
376 }
377
378 return 0;
379
380 nomemory:
381 PyErr_NoMemory();
382 return -1;
383}
384
385LOCAL(int)
386element_add_subelement(ElementObject* self, PyObject* element)
387{
388 /* add a child element to a parent */
389
390 if (element_resize(self, 1) < 0)
391 return -1;
392
393 Py_INCREF(element);
394 self->extra->children[self->extra->length] = element;
395
396 self->extra->length++;
397
398 return 0;
399}
400
401LOCAL(PyObject*)
402element_get_attrib(ElementObject* self)
403{
404 /* return borrowed reference to attrib dictionary */
405 /* note: this function assumes that the extra section exists */
406
407 PyObject* res = self->extra->attrib;
408
409 if (res == Py_None) {
410 /* create missing dictionary */
411 res = PyDict_New();
412 if (!res)
413 return NULL;
414 self->extra->attrib = res;
415 }
416
417 return res;
418}
419
420LOCAL(PyObject*)
421element_get_text(ElementObject* self)
422{
423 /* return borrowed reference to text attribute */
424
425 PyObject* res = self->text;
426
427 if (JOIN_GET(res)) {
428 res = JOIN_OBJ(res);
429 if (PyList_CheckExact(res)) {
430 res = list_join(res);
431 if (!res)
432 return NULL;
433 self->text = res;
434 }
435 }
436
437 return res;
438}
439
440LOCAL(PyObject*)
441element_get_tail(ElementObject* self)
442{
443 /* return borrowed reference to text attribute */
444
445 PyObject* res = self->tail;
446
447 if (JOIN_GET(res)) {
448 res = JOIN_OBJ(res);
449 if (PyList_CheckExact(res)) {
450 res = list_join(res);
451 if (!res)
452 return NULL;
453 self->tail = res;
454 }
455 }
456
457 return res;
458}
459
460static PyObject*
461element(PyObject* self, PyObject* args, PyObject* kw)
462{
463 PyObject* elem;
464
465 PyObject* tag;
466 PyObject* attrib = NULL;
467 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag,
468 &PyDict_Type, &attrib))
469 return NULL;
470
471 if (attrib || kw) {
472 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
473 if (!attrib)
474 return NULL;
475 if (kw)
476 PyDict_Update(attrib, kw);
477 } else {
478 Py_INCREF(Py_None);
479 attrib = Py_None;
480 }
481
482 elem = element_new(tag, attrib);
483
484 Py_DECREF(attrib);
485
486 return elem;
487}
488
489static PyObject*
490subelement(PyObject* self, PyObject* args, PyObject* kw)
491{
492 PyObject* elem;
493
494 ElementObject* parent;
495 PyObject* tag;
496 PyObject* attrib = NULL;
497 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
498 &Element_Type, &parent, &tag,
499 &PyDict_Type, &attrib))
500 return NULL;
501
502 if (attrib || kw) {
503 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
504 if (!attrib)
505 return NULL;
506 if (kw)
507 PyDict_Update(attrib, kw);
508 } else {
509 Py_INCREF(Py_None);
510 attrib = Py_None;
511 }
512
513 elem = element_new(tag, attrib);
514
515 Py_DECREF(attrib);
516
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000517 if (element_add_subelement(parent, elem) < 0) {
518 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000519 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000520 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000521
522 return elem;
523}
524
525static void
526element_dealloc(ElementObject* self)
527{
528 if (self->extra)
529 element_dealloc_extra(self);
530
531 /* discard attributes */
532 Py_DECREF(self->tag);
533 Py_DECREF(JOIN_OBJ(self->text));
534 Py_DECREF(JOIN_OBJ(self->tail));
535
536 RELEASE(sizeof(ElementObject), "destroy element");
537
538 PyObject_Del(self);
539}
540
541/* -------------------------------------------------------------------- */
542/* methods (in alphabetical order) */
543
544static PyObject*
545element_append(ElementObject* self, PyObject* args)
546{
547 PyObject* element;
548 if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element))
549 return NULL;
550
551 if (element_add_subelement(self, element) < 0)
552 return NULL;
553
554 Py_RETURN_NONE;
555}
556
557static PyObject*
558element_clear(ElementObject* self, PyObject* args)
559{
560 if (!PyArg_ParseTuple(args, ":clear"))
561 return NULL;
562
563 if (self->extra) {
564 element_dealloc_extra(self);
565 self->extra = NULL;
566 }
567
568 Py_INCREF(Py_None);
569 Py_DECREF(JOIN_OBJ(self->text));
570 self->text = Py_None;
571
572 Py_INCREF(Py_None);
573 Py_DECREF(JOIN_OBJ(self->tail));
574 self->tail = Py_None;
575
576 Py_RETURN_NONE;
577}
578
579static PyObject*
580element_copy(ElementObject* self, PyObject* args)
581{
582 int i;
583 ElementObject* element;
584
585 if (!PyArg_ParseTuple(args, ":__copy__"))
586 return NULL;
587
588 element = (ElementObject*) element_new(
589 self->tag, (self->extra) ? self->extra->attrib : Py_None
590 );
591 if (!element)
592 return NULL;
593
594 Py_DECREF(JOIN_OBJ(element->text));
595 element->text = self->text;
596 Py_INCREF(JOIN_OBJ(element->text));
597
598 Py_DECREF(JOIN_OBJ(element->tail));
599 element->tail = self->tail;
600 Py_INCREF(JOIN_OBJ(element->tail));
601
602 if (self->extra) {
603
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000604 if (element_resize(element, self->extra->length) < 0) {
605 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000606 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000607 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000608
609 for (i = 0; i < self->extra->length; i++) {
610 Py_INCREF(self->extra->children[i]);
611 element->extra->children[i] = self->extra->children[i];
612 }
613
614 element->extra->length = self->extra->length;
615
616 }
617
618 return (PyObject*) element;
619}
620
621static PyObject*
622element_deepcopy(ElementObject* self, PyObject* args)
623{
624 int i;
625 ElementObject* element;
626 PyObject* tag;
627 PyObject* attrib;
628 PyObject* text;
629 PyObject* tail;
630 PyObject* id;
631
632 PyObject* memo;
633 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
634 return NULL;
635
636 tag = deepcopy(self->tag, memo);
637 if (!tag)
638 return NULL;
639
640 if (self->extra) {
641 attrib = deepcopy(self->extra->attrib, memo);
642 if (!attrib) {
643 Py_DECREF(tag);
644 return NULL;
645 }
646 } else {
647 Py_INCREF(Py_None);
648 attrib = Py_None;
649 }
650
651 element = (ElementObject*) element_new(tag, attrib);
652
653 Py_DECREF(tag);
654 Py_DECREF(attrib);
655
656 if (!element)
657 return NULL;
658
659 text = deepcopy(JOIN_OBJ(self->text), memo);
660 if (!text)
661 goto error;
662 Py_DECREF(element->text);
663 element->text = JOIN_SET(text, JOIN_GET(self->text));
664
665 tail = deepcopy(JOIN_OBJ(self->tail), memo);
666 if (!tail)
667 goto error;
668 Py_DECREF(element->tail);
669 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
670
671 if (self->extra) {
672
673 if (element_resize(element, self->extra->length) < 0)
674 goto error;
675
676 for (i = 0; i < self->extra->length; i++) {
677 PyObject* child = deepcopy(self->extra->children[i], memo);
678 if (!child) {
679 element->extra->length = i;
680 goto error;
681 }
682 element->extra->children[i] = child;
683 }
684
685 element->extra->length = self->extra->length;
686
687 }
688
689 /* add object to memo dictionary (so deepcopy won't visit it again) */
Christian Heimes217cfd12007-12-02 14:31:20 +0000690 id = PyLong_FromLong((Py_uintptr_t) self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000691
692 i = PyDict_SetItem(memo, id, (PyObject*) element);
693
694 Py_DECREF(id);
695
696 if (i < 0)
697 goto error;
698
699 return (PyObject*) element;
700
701 error:
702 Py_DECREF(element);
703 return NULL;
704}
705
706LOCAL(int)
707checkpath(PyObject* tag)
708{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000709 Py_ssize_t i;
710 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000711
712 /* check if a tag contains an xpath character */
713
714#define PATHCHAR(ch) (ch == '/' || ch == '*' || ch == '[' || ch == '@')
715
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000716 if (PyUnicode_Check(tag)) {
717 Py_UNICODE *p = PyUnicode_AS_UNICODE(tag);
718 for (i = 0; i < PyUnicode_GET_SIZE(tag); i++) {
719 if (p[i] == '{')
720 check = 0;
721 else if (p[i] == '}')
722 check = 1;
723 else if (check && PATHCHAR(p[i]))
724 return 1;
725 }
726 return 0;
727 }
Christian Heimes72b710a2008-05-26 13:28:38 +0000728 if (PyBytes_Check(tag)) {
729 char *p = PyBytes_AS_STRING(tag);
730 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000731 if (p[i] == '{')
732 check = 0;
733 else if (p[i] == '}')
734 check = 1;
735 else if (check && PATHCHAR(p[i]))
736 return 1;
737 }
738 return 0;
739 }
740
741 return 1; /* unknown type; might be path expression */
742}
743
744static PyObject*
745element_find(ElementObject* self, PyObject* args)
746{
747 int i;
748
749 PyObject* tag;
750 if (!PyArg_ParseTuple(args, "O:find", &tag))
751 return NULL;
752
753 if (checkpath(tag))
754 return PyObject_CallMethod(
755 elementpath_obj, "find", "OO", self, tag
756 );
757
758 if (!self->extra)
759 Py_RETURN_NONE;
760
761 for (i = 0; i < self->extra->length; i++) {
762 PyObject* item = self->extra->children[i];
763 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +0000764 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000765 Py_INCREF(item);
766 return item;
767 }
768 }
769
770 Py_RETURN_NONE;
771}
772
773static PyObject*
774element_findtext(ElementObject* self, PyObject* args)
775{
776 int i;
777
778 PyObject* tag;
779 PyObject* default_value = Py_None;
780 if (!PyArg_ParseTuple(args, "O|O:findtext", &tag, &default_value))
781 return NULL;
782
783 if (checkpath(tag))
784 return PyObject_CallMethod(
785 elementpath_obj, "findtext", "OOO", self, tag, default_value
786 );
787
788 if (!self->extra) {
789 Py_INCREF(default_value);
790 return default_value;
791 }
792
793 for (i = 0; i < self->extra->length; i++) {
794 ElementObject* item = (ElementObject*) self->extra->children[i];
Mark Dickinson211c6252009-02-01 10:28:51 +0000795 if (Element_CheckExact(item) && (PyObject_RichCompareBool(item->tag, tag, Py_EQ) == 1)) {
796
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000797 PyObject* text = element_get_text(item);
798 if (text == Py_None)
Christian Heimes72b710a2008-05-26 13:28:38 +0000799 return PyBytes_FromString("");
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000800 Py_XINCREF(text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000801 return text;
802 }
803 }
804
805 Py_INCREF(default_value);
806 return default_value;
807}
808
809static PyObject*
810element_findall(ElementObject* self, PyObject* args)
811{
812 int i;
813 PyObject* out;
814
815 PyObject* tag;
816 if (!PyArg_ParseTuple(args, "O:findall", &tag))
817 return NULL;
818
819 if (checkpath(tag))
820 return PyObject_CallMethod(
821 elementpath_obj, "findall", "OO", self, tag
822 );
823
824 out = PyList_New(0);
825 if (!out)
826 return NULL;
827
828 if (!self->extra)
829 return out;
830
831 for (i = 0; i < self->extra->length; i++) {
832 PyObject* item = self->extra->children[i];
833 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +0000834 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000835 if (PyList_Append(out, item) < 0) {
836 Py_DECREF(out);
837 return NULL;
838 }
839 }
840 }
841
842 return out;
843}
844
845static PyObject*
846element_get(ElementObject* self, PyObject* args)
847{
848 PyObject* value;
849
850 PyObject* key;
851 PyObject* default_value = Py_None;
852 if (!PyArg_ParseTuple(args, "O|O:get", &key, &default_value))
853 return NULL;
854
855 if (!self->extra || self->extra->attrib == Py_None)
856 value = default_value;
857 else {
858 value = PyDict_GetItem(self->extra->attrib, key);
859 if (!value)
860 value = default_value;
861 }
862
863 Py_INCREF(value);
864 return value;
865}
866
867static PyObject*
868element_getchildren(ElementObject* self, PyObject* args)
869{
870 int i;
871 PyObject* list;
872
873 if (!PyArg_ParseTuple(args, ":getchildren"))
874 return NULL;
875
876 if (!self->extra)
877 return PyList_New(0);
878
879 list = PyList_New(self->extra->length);
880 if (!list)
881 return NULL;
882
883 for (i = 0; i < self->extra->length; i++) {
884 PyObject* item = self->extra->children[i];
885 Py_INCREF(item);
886 PyList_SET_ITEM(list, i, item);
887 }
888
889 return list;
890}
891
892static PyObject*
893element_getiterator(ElementObject* self, PyObject* args)
894{
895 PyObject* result;
896
897 PyObject* tag = Py_None;
898 if (!PyArg_ParseTuple(args, "|O:getiterator", &tag))
899 return NULL;
900
901 if (!elementtree_getiterator_obj) {
902 PyErr_SetString(
903 PyExc_RuntimeError,
904 "getiterator helper not found"
905 );
906 return NULL;
907 }
908
909 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000910 if (!args)
911 return NULL;
Neal Norwitz02876df2006-02-07 06:58:52 +0000912
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000913 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
914 Py_INCREF(tag); PyTuple_SET_ITEM(args, 1, (PyObject*) tag);
915
916 result = PyObject_CallObject(elementtree_getiterator_obj, args);
917
918 Py_DECREF(args);
919
920 return result;
921}
922
923static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000924element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000925{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000926 ElementObject* self = (ElementObject*) self_;
927
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000928 if (!self->extra || index < 0 || index >= self->extra->length) {
929 PyErr_SetString(
930 PyExc_IndexError,
931 "child index out of range"
932 );
933 return NULL;
934 }
935
936 Py_INCREF(self->extra->children[index]);
937 return self->extra->children[index];
938}
939
940static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000941element_getslice(PyObject* self_, Py_ssize_t start, Py_ssize_t end)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000942{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000943 ElementObject* self = (ElementObject*) self_;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000944 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000945 PyObject* list;
946
947 if (!self->extra)
948 return PyList_New(0);
949
950 /* standard clamping */
951 if (start < 0)
952 start = 0;
953 if (end < 0)
954 end = 0;
955 if (end > self->extra->length)
956 end = self->extra->length;
957 if (start > end)
958 start = end;
959
960 list = PyList_New(end - start);
961 if (!list)
962 return NULL;
963
964 for (i = start; i < end; i++) {
965 PyObject* item = self->extra->children[i];
966 Py_INCREF(item);
967 PyList_SET_ITEM(list, i - start, item);
968 }
969
970 return list;
971}
972
973static PyObject*
974element_insert(ElementObject* self, PyObject* args)
975{
976 int i;
977
978 int index;
979 PyObject* element;
980 if (!PyArg_ParseTuple(args, "iO!:insert", &index,
981 &Element_Type, &element))
982 return NULL;
983
984 if (!self->extra)
985 element_new_extra(self, NULL);
986
987 if (index < 0)
988 index = 0;
989 if (index > self->extra->length)
990 index = self->extra->length;
991
992 if (element_resize(self, 1) < 0)
993 return NULL;
994
995 for (i = self->extra->length; i > index; i--)
996 self->extra->children[i] = self->extra->children[i-1];
997
998 Py_INCREF(element);
999 self->extra->children[index] = element;
1000
1001 self->extra->length++;
1002
1003 Py_RETURN_NONE;
1004}
1005
1006static PyObject*
1007element_items(ElementObject* self, PyObject* args)
1008{
1009 if (!PyArg_ParseTuple(args, ":items"))
1010 return NULL;
1011
1012 if (!self->extra || self->extra->attrib == Py_None)
1013 return PyList_New(0);
1014
1015 return PyDict_Items(self->extra->attrib);
1016}
1017
1018static PyObject*
1019element_keys(ElementObject* self, PyObject* args)
1020{
1021 if (!PyArg_ParseTuple(args, ":keys"))
1022 return NULL;
1023
1024 if (!self->extra || self->extra->attrib == Py_None)
1025 return PyList_New(0);
1026
1027 return PyDict_Keys(self->extra->attrib);
1028}
1029
Martin v. Löwis18e16552006-02-15 17:27:45 +00001030static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001031element_length(ElementObject* self)
1032{
1033 if (!self->extra)
1034 return 0;
1035
1036 return self->extra->length;
1037}
1038
1039static PyObject*
1040element_makeelement(PyObject* self, PyObject* args, PyObject* kw)
1041{
1042 PyObject* elem;
1043
1044 PyObject* tag;
1045 PyObject* attrib;
1046 if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib))
1047 return NULL;
1048
1049 attrib = PyDict_Copy(attrib);
1050 if (!attrib)
1051 return NULL;
1052
1053 elem = element_new(tag, attrib);
1054
1055 Py_DECREF(attrib);
1056
1057 return elem;
1058}
1059
1060static PyObject*
1061element_reduce(ElementObject* self, PyObject* args)
1062{
1063 if (!PyArg_ParseTuple(args, ":__reduce__"))
1064 return NULL;
1065
1066 /* Hack alert: This method is used to work around a __copy__
1067 problem on certain 2.3 and 2.4 versions. To save time and
1068 simplify the code, we create the copy in here, and use a dummy
1069 copyelement helper to trick the copy module into doing the
1070 right thing. */
1071
1072 if (!elementtree_copyelement_obj) {
1073 PyErr_SetString(
1074 PyExc_RuntimeError,
1075 "copyelement helper not found"
1076 );
1077 return NULL;
1078 }
1079
1080 return Py_BuildValue(
1081 "O(N)", elementtree_copyelement_obj, element_copy(self, args)
1082 );
1083}
1084
1085static PyObject*
1086element_remove(ElementObject* self, PyObject* args)
1087{
1088 int i;
1089
1090 PyObject* element;
1091 if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element))
1092 return NULL;
1093
1094 if (!self->extra) {
1095 /* element has no children, so raise exception */
1096 PyErr_SetString(
1097 PyExc_ValueError,
1098 "list.remove(x): x not in list"
1099 );
1100 return NULL;
1101 }
1102
1103 for (i = 0; i < self->extra->length; i++) {
1104 if (self->extra->children[i] == element)
1105 break;
Mark Dickinson211c6252009-02-01 10:28:51 +00001106 if (PyObject_RichCompareBool(self->extra->children[i], element, Py_EQ) == 1)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001107 break;
1108 }
1109
1110 if (i == self->extra->length) {
1111 /* element is not in children, so raise exception */
1112 PyErr_SetString(
1113 PyExc_ValueError,
1114 "list.remove(x): x not in list"
1115 );
1116 return NULL;
1117 }
1118
1119 Py_DECREF(self->extra->children[i]);
1120
1121 self->extra->length--;
1122
1123 for (; i < self->extra->length; i++)
1124 self->extra->children[i] = self->extra->children[i+1];
1125
1126 Py_RETURN_NONE;
1127}
1128
1129static PyObject*
1130element_repr(ElementObject* self)
1131{
Walter Dörwald7569dfe2007-05-19 21:49:49 +00001132 return PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001133}
1134
1135static PyObject*
1136element_set(ElementObject* self, PyObject* args)
1137{
1138 PyObject* attrib;
1139
1140 PyObject* key;
1141 PyObject* value;
1142 if (!PyArg_ParseTuple(args, "OO:set", &key, &value))
1143 return NULL;
1144
1145 if (!self->extra)
1146 element_new_extra(self, NULL);
1147
1148 attrib = element_get_attrib(self);
1149 if (!attrib)
1150 return NULL;
1151
1152 if (PyDict_SetItem(attrib, key, value) < 0)
1153 return NULL;
1154
1155 Py_RETURN_NONE;
1156}
1157
1158static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001159element_setslice(PyObject* self_, Py_ssize_t start, Py_ssize_t end, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001160{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001161 ElementObject* self = (ElementObject*) self_;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001162 Py_ssize_t i, new, old;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001163 PyObject* recycle = NULL;
1164
1165 if (!self->extra)
1166 element_new_extra(self, NULL);
1167
1168 /* standard clamping */
1169 if (start < 0)
1170 start = 0;
1171 if (end < 0)
1172 end = 0;
1173 if (end > self->extra->length)
1174 end = self->extra->length;
1175 if (start > end)
1176 start = end;
1177
1178 old = end - start;
1179
1180 if (item == NULL)
1181 new = 0;
1182 else if (PyList_CheckExact(item)) {
1183 new = PyList_GET_SIZE(item);
1184 } else {
1185 /* FIXME: support arbitrary sequences? */
1186 PyErr_Format(
1187 PyExc_TypeError,
Christian Heimes90aa7642007-12-19 02:45:37 +00001188 "expected list, not \"%.200s\"", Py_TYPE(item)->tp_name
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001189 );
1190 return -1;
1191 }
1192
1193 if (old > 0) {
1194 /* to avoid recursive calls to this method (via decref), move
1195 old items to the recycle bin here, and get rid of them when
1196 we're done modifying the element */
1197 recycle = PyList_New(old);
1198 for (i = 0; i < old; i++)
1199 PyList_SET_ITEM(recycle, i, self->extra->children[i + start]);
1200 }
1201
1202 if (new < old) {
1203 /* delete slice */
1204 for (i = end; i < self->extra->length; i++)
1205 self->extra->children[i + new - old] = self->extra->children[i];
1206 } else if (new > old) {
1207 /* insert slice */
1208 if (element_resize(self, new - old) < 0)
1209 return -1;
1210 for (i = self->extra->length-1; i >= end; i--)
1211 self->extra->children[i + new - old] = self->extra->children[i];
1212 }
1213
1214 /* replace the slice */
1215 for (i = 0; i < new; i++) {
1216 PyObject* element = PyList_GET_ITEM(item, i);
1217 Py_INCREF(element);
1218 self->extra->children[i + start] = element;
1219 }
1220
1221 self->extra->length += new - old;
1222
1223 /* discard the recycle bin, and everything in it */
1224 Py_XDECREF(recycle);
1225
1226 return 0;
1227}
1228
1229static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001230element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001231{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001232 ElementObject* self = (ElementObject*) self_;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001233 int i;
1234 PyObject* old;
1235
1236 if (!self->extra || index < 0 || index >= self->extra->length) {
1237 PyErr_SetString(
1238 PyExc_IndexError,
1239 "child assignment index out of range");
1240 return -1;
1241 }
1242
1243 old = self->extra->children[index];
1244
1245 if (item) {
1246 Py_INCREF(item);
1247 self->extra->children[index] = item;
1248 } else {
1249 self->extra->length--;
1250 for (i = index; i < self->extra->length; i++)
1251 self->extra->children[i] = self->extra->children[i+1];
1252 }
1253
1254 Py_DECREF(old);
1255
1256 return 0;
1257}
1258
1259static PyMethodDef element_methods[] = {
1260
1261 {"clear", (PyCFunction) element_clear, METH_VARARGS},
1262
1263 {"get", (PyCFunction) element_get, METH_VARARGS},
1264 {"set", (PyCFunction) element_set, METH_VARARGS},
1265
1266 {"find", (PyCFunction) element_find, METH_VARARGS},
1267 {"findtext", (PyCFunction) element_findtext, METH_VARARGS},
1268 {"findall", (PyCFunction) element_findall, METH_VARARGS},
1269
1270 {"append", (PyCFunction) element_append, METH_VARARGS},
1271 {"insert", (PyCFunction) element_insert, METH_VARARGS},
1272 {"remove", (PyCFunction) element_remove, METH_VARARGS},
1273
1274 {"getiterator", (PyCFunction) element_getiterator, METH_VARARGS},
1275 {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS},
1276
1277 {"items", (PyCFunction) element_items, METH_VARARGS},
1278 {"keys", (PyCFunction) element_keys, METH_VARARGS},
1279
1280 {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS},
1281
1282 {"__copy__", (PyCFunction) element_copy, METH_VARARGS},
1283 {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS},
1284
1285 /* Some 2.3 and 2.4 versions do not handle the __copy__ method on
1286 C objects correctly, so we have to fake it using a __reduce__-
1287 based hack (see the element_reduce implementation above for
1288 details). */
1289
1290 /* The behaviour has been changed in 2.3.5 and 2.4.1, so we're
1291 using a runtime test to figure out if we need to fake things
1292 or now (see the init code below). The following entry is
1293 enabled only if the hack is needed. */
1294
1295 {"!__reduce__", (PyCFunction) element_reduce, METH_VARARGS},
1296
1297 {NULL, NULL}
1298};
1299
1300static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001301element_getattro(ElementObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001302{
1303 PyObject* res;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001304 char *name = "";
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001305
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001306 if (PyUnicode_Check(nameobj))
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +00001307 name = _PyUnicode_AsString(nameobj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001308
1309 if (strcmp(name, "tag") == 0)
1310 res = self->tag;
1311 else if (strcmp(name, "text") == 0)
1312 res = element_get_text(self);
1313 else if (strcmp(name, "tail") == 0) {
1314 res = element_get_tail(self);
1315 } else if (strcmp(name, "attrib") == 0) {
1316 if (!self->extra)
1317 element_new_extra(self, NULL);
1318 res = element_get_attrib(self);
1319 } else {
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001320 return PyObject_GenericGetAttr((PyObject*) self, nameobj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001321 }
1322
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001323 Py_XINCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001324 return res;
1325}
1326
1327static int
1328element_setattr(ElementObject* self, const char* name, PyObject* value)
1329{
1330 if (value == NULL) {
1331 PyErr_SetString(
1332 PyExc_AttributeError,
1333 "can't delete element attributes"
1334 );
1335 return -1;
1336 }
1337
1338 if (strcmp(name, "tag") == 0) {
1339 Py_DECREF(self->tag);
1340 self->tag = value;
1341 Py_INCREF(self->tag);
1342 } else if (strcmp(name, "text") == 0) {
1343 Py_DECREF(JOIN_OBJ(self->text));
1344 self->text = value;
1345 Py_INCREF(self->text);
1346 } else if (strcmp(name, "tail") == 0) {
1347 Py_DECREF(JOIN_OBJ(self->tail));
1348 self->tail = value;
1349 Py_INCREF(self->tail);
1350 } else if (strcmp(name, "attrib") == 0) {
1351 if (!self->extra)
1352 element_new_extra(self, NULL);
1353 Py_DECREF(self->extra->attrib);
1354 self->extra->attrib = value;
1355 Py_INCREF(self->extra->attrib);
1356 } else {
1357 PyErr_SetString(PyExc_AttributeError, name);
1358 return -1;
1359 }
1360
1361 return 0;
1362}
1363
1364static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001365 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001366 0, /* sq_concat */
1367 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001368 element_getitem,
1369 element_getslice,
1370 element_setitem,
1371 element_setslice,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001372};
1373
Neal Norwitz227b5332006-03-22 09:28:35 +00001374static PyTypeObject Element_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001375 PyVarObject_HEAD_INIT(NULL, 0)
1376 "Element", sizeof(ElementObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001377 /* methods */
1378 (destructor)element_dealloc, /* tp_dealloc */
1379 0, /* tp_print */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001380 0, /* tp_getattr */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001381 (setattrfunc)element_setattr, /* tp_setattr */
Mark Dickinsone94c6792009-02-02 20:36:42 +00001382 0, /* tp_reserved */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001383 (reprfunc)element_repr, /* tp_repr */
1384 0, /* tp_as_number */
1385 &element_as_sequence, /* tp_as_sequence */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001386 0, /* tp_as_mapping */
1387 0, /* tp_hash */
1388 0, /* tp_call */
1389 0, /* tp_str */
1390 (getattrofunc)element_getattro, /* tp_getattro */
1391 0, /* tp_setattro */
1392 0, /* tp_as_buffer */
1393 Py_TPFLAGS_DEFAULT, /* tp_flags */
1394 0, /* tp_doc */
1395 0, /* tp_traverse */
1396 0, /* tp_clear */
1397 0, /* tp_richcompare */
1398 0, /* tp_weaklistoffset */
1399 0, /* tp_iter */
1400 0, /* tp_iternext */
1401 element_methods, /* tp_methods */
1402 0, /* tp_members */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001403};
1404
1405/* ==================================================================== */
1406/* the tree builder type */
1407
1408typedef struct {
1409 PyObject_HEAD
1410
1411 PyObject* root; /* root node (first created node) */
1412
1413 ElementObject* this; /* current node */
1414 ElementObject* last; /* most recently created node */
1415
1416 PyObject* data; /* data collector (string or list), or NULL */
1417
1418 PyObject* stack; /* element stack */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001419 Py_ssize_t index; /* current stack size (0=empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001420
1421 /* element tracing */
1422 PyObject* events; /* list of events, or NULL if not collecting */
1423 PyObject* start_event_obj; /* event objects (NULL to ignore) */
1424 PyObject* end_event_obj;
1425 PyObject* start_ns_event_obj;
1426 PyObject* end_ns_event_obj;
1427
1428} TreeBuilderObject;
1429
Neal Norwitz227b5332006-03-22 09:28:35 +00001430static PyTypeObject TreeBuilder_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001431
Christian Heimes90aa7642007-12-19 02:45:37 +00001432#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001433
1434/* -------------------------------------------------------------------- */
1435/* constructor and destructor */
1436
1437LOCAL(PyObject*)
1438treebuilder_new(void)
1439{
1440 TreeBuilderObject* self;
1441
1442 self = PyObject_New(TreeBuilderObject, &TreeBuilder_Type);
1443 if (self == NULL)
1444 return NULL;
1445
1446 self->root = NULL;
1447
1448 Py_INCREF(Py_None);
1449 self->this = (ElementObject*) Py_None;
1450
1451 Py_INCREF(Py_None);
1452 self->last = (ElementObject*) Py_None;
1453
1454 self->data = NULL;
1455
1456 self->stack = PyList_New(20);
1457 self->index = 0;
1458
1459 self->events = NULL;
1460 self->start_event_obj = self->end_event_obj = NULL;
1461 self->start_ns_event_obj = self->end_ns_event_obj = NULL;
1462
1463 ALLOC(sizeof(TreeBuilderObject), "create treebuilder");
1464
1465 return (PyObject*) self;
1466}
1467
1468static PyObject*
Thomas Wouters73e5a5b2006-06-08 15:35:45 +00001469treebuilder(PyObject* self_, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001470{
1471 if (!PyArg_ParseTuple(args, ":TreeBuilder"))
1472 return NULL;
1473
1474 return treebuilder_new();
1475}
1476
1477static void
1478treebuilder_dealloc(TreeBuilderObject* self)
1479{
1480 Py_XDECREF(self->end_ns_event_obj);
1481 Py_XDECREF(self->start_ns_event_obj);
1482 Py_XDECREF(self->end_event_obj);
1483 Py_XDECREF(self->start_event_obj);
1484 Py_XDECREF(self->events);
1485 Py_DECREF(self->stack);
1486 Py_XDECREF(self->data);
1487 Py_DECREF(self->last);
1488 Py_DECREF(self->this);
1489 Py_XDECREF(self->root);
1490
1491 RELEASE(sizeof(TreeBuilderObject), "destroy treebuilder");
1492
1493 PyObject_Del(self);
1494}
1495
1496/* -------------------------------------------------------------------- */
1497/* handlers */
1498
1499LOCAL(PyObject*)
1500treebuilder_handle_xml(TreeBuilderObject* self, PyObject* encoding,
1501 PyObject* standalone)
1502{
1503 Py_RETURN_NONE;
1504}
1505
1506LOCAL(PyObject*)
1507treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
1508 PyObject* attrib)
1509{
1510 PyObject* node;
1511 PyObject* this;
1512
1513 if (self->data) {
1514 if (self->this == self->last) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001515 Py_DECREF(JOIN_OBJ(self->last->text));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001516 self->last->text = JOIN_SET(
1517 self->data, PyList_CheckExact(self->data)
1518 );
1519 } else {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001520 Py_DECREF(JOIN_OBJ(self->last->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001521 self->last->tail = JOIN_SET(
1522 self->data, PyList_CheckExact(self->data)
1523 );
1524 }
1525 self->data = NULL;
1526 }
1527
1528 node = element_new(tag, attrib);
1529 if (!node)
1530 return NULL;
1531
1532 this = (PyObject*) self->this;
1533
1534 if (this != Py_None) {
1535 if (element_add_subelement((ElementObject*) this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001536 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001537 } else {
1538 if (self->root) {
1539 PyErr_SetString(
1540 PyExc_SyntaxError,
1541 "multiple elements on top level"
1542 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001543 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001544 }
1545 Py_INCREF(node);
1546 self->root = node;
1547 }
1548
1549 if (self->index < PyList_GET_SIZE(self->stack)) {
1550 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001551 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001552 Py_INCREF(this);
1553 } else {
1554 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001555 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001556 }
1557 self->index++;
1558
1559 Py_DECREF(this);
1560 Py_INCREF(node);
1561 self->this = (ElementObject*) node;
1562
1563 Py_DECREF(self->last);
1564 Py_INCREF(node);
1565 self->last = (ElementObject*) node;
1566
1567 if (self->start_event_obj) {
1568 PyObject* res;
1569 PyObject* action = self->start_event_obj;
1570 res = PyTuple_New(2);
1571 if (res) {
1572 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
1573 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
1574 PyList_Append(self->events, res);
1575 Py_DECREF(res);
1576 } else
1577 PyErr_Clear(); /* FIXME: propagate error */
1578 }
1579
1580 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001581
1582 error:
1583 Py_DECREF(node);
1584 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001585}
1586
1587LOCAL(PyObject*)
1588treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
1589{
1590 if (!self->data) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001591 if (self->last == (ElementObject*) Py_None) {
1592 /* ignore calls to data before the first call to start */
1593 Py_RETURN_NONE;
1594 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001595 /* store the first item as is */
1596 Py_INCREF(data); self->data = data;
1597 } else {
1598 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00001599 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
1600 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001601 /* expat often generates single character data sections; handle
1602 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00001603 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
1604 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001605 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00001606 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001607 } else if (PyList_CheckExact(self->data)) {
1608 if (PyList_Append(self->data, data) < 0)
1609 return NULL;
1610 } else {
1611 PyObject* list = PyList_New(2);
1612 if (!list)
1613 return NULL;
1614 PyList_SET_ITEM(list, 0, self->data);
1615 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
1616 self->data = list;
1617 }
1618 }
1619
1620 Py_RETURN_NONE;
1621}
1622
1623LOCAL(PyObject*)
1624treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
1625{
1626 PyObject* item;
1627
1628 if (self->data) {
1629 if (self->this == self->last) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001630 Py_DECREF(JOIN_OBJ(self->last->text));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001631 self->last->text = JOIN_SET(
1632 self->data, PyList_CheckExact(self->data)
1633 );
1634 } else {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001635 Py_DECREF(JOIN_OBJ(self->last->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001636 self->last->tail = JOIN_SET(
1637 self->data, PyList_CheckExact(self->data)
1638 );
1639 }
1640 self->data = NULL;
1641 }
1642
1643 if (self->index == 0) {
1644 PyErr_SetString(
1645 PyExc_IndexError,
1646 "pop from empty stack"
1647 );
1648 return NULL;
1649 }
1650
1651 self->index--;
1652
1653 item = PyList_GET_ITEM(self->stack, self->index);
1654 Py_INCREF(item);
1655
1656 Py_DECREF(self->last);
1657
1658 self->last = (ElementObject*) self->this;
1659 self->this = (ElementObject*) item;
1660
1661 if (self->end_event_obj) {
1662 PyObject* res;
1663 PyObject* action = self->end_event_obj;
1664 PyObject* node = (PyObject*) self->last;
1665 res = PyTuple_New(2);
1666 if (res) {
1667 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
1668 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
1669 PyList_Append(self->events, res);
1670 Py_DECREF(res);
1671 } else
1672 PyErr_Clear(); /* FIXME: propagate error */
1673 }
1674
1675 Py_INCREF(self->last);
1676 return (PyObject*) self->last;
1677}
1678
1679LOCAL(void)
1680treebuilder_handle_namespace(TreeBuilderObject* self, int start,
1681 const char* prefix, const char *uri)
1682{
1683 PyObject* res;
1684 PyObject* action;
1685 PyObject* parcel;
1686
1687 if (!self->events)
1688 return;
1689
1690 if (start) {
1691 if (!self->start_ns_event_obj)
1692 return;
1693 action = self->start_ns_event_obj;
1694 /* FIXME: prefix and uri use utf-8 encoding! */
1695 parcel = Py_BuildValue("ss", (prefix) ? prefix : "", uri);
1696 if (!parcel)
1697 return;
1698 Py_INCREF(action);
1699 } else {
1700 if (!self->end_ns_event_obj)
1701 return;
1702 action = self->end_ns_event_obj;
1703 Py_INCREF(action);
1704 parcel = Py_None;
1705 Py_INCREF(parcel);
1706 }
1707
1708 res = PyTuple_New(2);
1709
1710 if (res) {
1711 PyTuple_SET_ITEM(res, 0, action);
1712 PyTuple_SET_ITEM(res, 1, parcel);
1713 PyList_Append(self->events, res);
1714 Py_DECREF(res);
1715 } else
1716 PyErr_Clear(); /* FIXME: propagate error */
1717}
1718
1719/* -------------------------------------------------------------------- */
1720/* methods (in alphabetical order) */
1721
1722static PyObject*
1723treebuilder_data(TreeBuilderObject* self, PyObject* args)
1724{
1725 PyObject* data;
1726 if (!PyArg_ParseTuple(args, "O:data", &data))
1727 return NULL;
1728
1729 return treebuilder_handle_data(self, data);
1730}
1731
1732static PyObject*
1733treebuilder_end(TreeBuilderObject* self, PyObject* args)
1734{
1735 PyObject* tag;
1736 if (!PyArg_ParseTuple(args, "O:end", &tag))
1737 return NULL;
1738
1739 return treebuilder_handle_end(self, tag);
1740}
1741
1742LOCAL(PyObject*)
1743treebuilder_done(TreeBuilderObject* self)
1744{
1745 PyObject* res;
1746
1747 /* FIXME: check stack size? */
1748
1749 if (self->root)
1750 res = self->root;
1751 else
1752 res = Py_None;
1753
1754 Py_INCREF(res);
1755 return res;
1756}
1757
1758static PyObject*
1759treebuilder_close(TreeBuilderObject* self, PyObject* args)
1760{
1761 if (!PyArg_ParseTuple(args, ":close"))
1762 return NULL;
1763
1764 return treebuilder_done(self);
1765}
1766
1767static PyObject*
1768treebuilder_start(TreeBuilderObject* self, PyObject* args)
1769{
1770 PyObject* tag;
1771 PyObject* attrib = Py_None;
1772 if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib))
1773 return NULL;
1774
1775 return treebuilder_handle_start(self, tag, attrib);
1776}
1777
1778static PyObject*
1779treebuilder_xml(TreeBuilderObject* self, PyObject* args)
1780{
1781 PyObject* encoding;
1782 PyObject* standalone;
1783 if (!PyArg_ParseTuple(args, "OO:xml", &encoding, &standalone))
1784 return NULL;
1785
1786 return treebuilder_handle_xml(self, encoding, standalone);
1787}
1788
1789static PyMethodDef treebuilder_methods[] = {
1790 {"data", (PyCFunction) treebuilder_data, METH_VARARGS},
1791 {"start", (PyCFunction) treebuilder_start, METH_VARARGS},
1792 {"end", (PyCFunction) treebuilder_end, METH_VARARGS},
1793 {"xml", (PyCFunction) treebuilder_xml, METH_VARARGS},
1794 {"close", (PyCFunction) treebuilder_close, METH_VARARGS},
1795 {NULL, NULL}
1796};
1797
Neal Norwitz227b5332006-03-22 09:28:35 +00001798static PyTypeObject TreeBuilder_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001799 PyVarObject_HEAD_INIT(NULL, 0)
1800 "TreeBuilder", sizeof(TreeBuilderObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001801 /* methods */
1802 (destructor)treebuilder_dealloc, /* tp_dealloc */
1803 0, /* tp_print */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001804 0, /* tp_getattr */
1805 0, /* tp_setattr */
Mark Dickinsone94c6792009-02-02 20:36:42 +00001806 0, /* tp_reserved */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001807 0, /* tp_repr */
1808 0, /* tp_as_number */
1809 0, /* tp_as_sequence */
1810 0, /* tp_as_mapping */
1811 0, /* tp_hash */
1812 0, /* tp_call */
1813 0, /* tp_str */
1814 0, /* tp_getattro */
1815 0, /* tp_setattro */
1816 0, /* tp_as_buffer */
1817 Py_TPFLAGS_DEFAULT, /* tp_flags */
1818 0, /* tp_doc */
1819 0, /* tp_traverse */
1820 0, /* tp_clear */
1821 0, /* tp_richcompare */
1822 0, /* tp_weaklistoffset */
1823 0, /* tp_iter */
1824 0, /* tp_iternext */
1825 treebuilder_methods, /* tp_methods */
1826 0, /* tp_members */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001827};
1828
1829/* ==================================================================== */
1830/* the expat interface */
1831
1832#if defined(USE_EXPAT)
1833
1834#include "expat.h"
1835
1836#if defined(USE_PYEXPAT_CAPI)
1837#include "pyexpat.h"
1838static struct PyExpat_CAPI* expat_capi;
1839#define EXPAT(func) (expat_capi->func)
1840#else
1841#define EXPAT(func) (XML_##func)
1842#endif
1843
1844typedef struct {
1845 PyObject_HEAD
1846
1847 XML_Parser parser;
1848
1849 PyObject* target;
1850 PyObject* entity;
1851
1852 PyObject* names;
1853
1854 PyObject* handle_xml;
1855 PyObject* handle_start;
1856 PyObject* handle_data;
1857 PyObject* handle_end;
1858
1859 PyObject* handle_comment;
1860 PyObject* handle_pi;
1861
1862} XMLParserObject;
1863
Neal Norwitz227b5332006-03-22 09:28:35 +00001864static PyTypeObject XMLParser_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001865
1866/* helpers */
1867
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001868LOCAL(PyObject*)
1869makeuniversal(XMLParserObject* self, const char* string)
1870{
1871 /* convert a UTF-8 tag/attribute name from the expat parser
1872 to a universal name string */
1873
1874 int size = strlen(string);
1875 PyObject* key;
1876 PyObject* value;
1877
1878 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00001879 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001880 if (!key)
1881 return NULL;
1882
1883 value = PyDict_GetItem(self->names, key);
1884
1885 if (value) {
1886 Py_INCREF(value);
1887 } else {
1888 /* new name. convert to universal name, and decode as
1889 necessary */
1890
1891 PyObject* tag;
1892 char* p;
1893 int i;
1894
1895 /* look for namespace separator */
1896 for (i = 0; i < size; i++)
1897 if (string[i] == '}')
1898 break;
1899 if (i != size) {
1900 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00001901 tag = PyBytes_FromStringAndSize(NULL, size+1);
1902 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001903 p[0] = '{';
1904 memcpy(p+1, string, size);
1905 size++;
1906 } else {
1907 /* plain name; use key as tag */
1908 Py_INCREF(key);
1909 tag = key;
1910 }
1911
1912 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00001913 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00001914 value = PyUnicode_DecodeUTF8(p, size, "strict");
1915 Py_DECREF(tag);
1916 if (!value) {
1917 Py_DECREF(key);
1918 return NULL;
1919 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001920
1921 /* add to names dictionary */
1922 if (PyDict_SetItem(self->names, key, value) < 0) {
1923 Py_DECREF(key);
1924 Py_DECREF(value);
1925 return NULL;
1926 }
1927 }
1928
1929 Py_DECREF(key);
1930 return value;
1931}
1932
1933/* -------------------------------------------------------------------- */
1934/* handlers */
1935
1936static void
1937expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
1938 int data_len)
1939{
1940 PyObject* key;
1941 PyObject* value;
1942 PyObject* res;
1943
1944 if (data_len < 2 || data_in[0] != '&')
1945 return;
1946
Neal Norwitz0269b912007-08-08 06:56:02 +00001947 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001948 if (!key)
1949 return;
1950
1951 value = PyDict_GetItem(self->entity, key);
1952
1953 if (value) {
1954 if (TreeBuilder_CheckExact(self->target))
1955 res = treebuilder_handle_data(
1956 (TreeBuilderObject*) self->target, value
1957 );
1958 else if (self->handle_data)
1959 res = PyObject_CallFunction(self->handle_data, "O", value);
1960 else
1961 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001962 Py_XDECREF(res);
1963 } else {
1964 PyErr_Format(
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001965 PyExc_SyntaxError, "undefined entity &%s;: line %ld, column %ld",
Christian Heimes72b710a2008-05-26 13:28:38 +00001966 PyBytes_AS_STRING(key),
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001967 EXPAT(GetErrorLineNumber)(self->parser),
1968 EXPAT(GetErrorColumnNumber)(self->parser)
1969 );
1970 }
1971
1972 Py_DECREF(key);
1973}
1974
1975static void
1976expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
1977 const XML_Char **attrib_in)
1978{
1979 PyObject* res;
1980 PyObject* tag;
1981 PyObject* attrib;
1982 int ok;
1983
1984 /* tag name */
1985 tag = makeuniversal(self, tag_in);
1986 if (!tag)
1987 return; /* parser will look for errors */
1988
1989 /* attributes */
1990 if (attrib_in[0]) {
1991 attrib = PyDict_New();
1992 if (!attrib)
1993 return;
1994 while (attrib_in[0] && attrib_in[1]) {
1995 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00001996 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001997 if (!key || !value) {
1998 Py_XDECREF(value);
1999 Py_XDECREF(key);
2000 Py_DECREF(attrib);
2001 return;
2002 }
2003 ok = PyDict_SetItem(attrib, key, value);
2004 Py_DECREF(value);
2005 Py_DECREF(key);
2006 if (ok < 0) {
2007 Py_DECREF(attrib);
2008 return;
2009 }
2010 attrib_in += 2;
2011 }
2012 } else {
2013 Py_INCREF(Py_None);
2014 attrib = Py_None;
2015 }
2016
2017 if (TreeBuilder_CheckExact(self->target))
2018 /* shortcut */
2019 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2020 tag, attrib);
2021 else if (self->handle_start)
2022 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
2023 else
2024 res = NULL;
2025
2026 Py_DECREF(tag);
2027 Py_DECREF(attrib);
2028
2029 Py_XDECREF(res);
2030}
2031
2032static void
2033expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2034 int data_len)
2035{
2036 PyObject* data;
2037 PyObject* res;
2038
Neal Norwitz0269b912007-08-08 06:56:02 +00002039 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002040 if (!data)
2041 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002042
2043 if (TreeBuilder_CheckExact(self->target))
2044 /* shortcut */
2045 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
2046 else if (self->handle_data)
2047 res = PyObject_CallFunction(self->handle_data, "O", data);
2048 else
2049 res = NULL;
2050
2051 Py_DECREF(data);
2052
2053 Py_XDECREF(res);
2054}
2055
2056static void
2057expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
2058{
2059 PyObject* tag;
2060 PyObject* res = NULL;
2061
2062 if (TreeBuilder_CheckExact(self->target))
2063 /* shortcut */
2064 /* the standard tree builder doesn't look at the end tag */
2065 res = treebuilder_handle_end(
2066 (TreeBuilderObject*) self->target, Py_None
2067 );
2068 else if (self->handle_end) {
2069 tag = makeuniversal(self, tag_in);
2070 if (tag) {
2071 res = PyObject_CallFunction(self->handle_end, "O", tag);
2072 Py_DECREF(tag);
2073 }
2074 }
2075
2076 Py_XDECREF(res);
2077}
2078
2079static void
2080expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
2081 const XML_Char *uri)
2082{
2083 treebuilder_handle_namespace(
2084 (TreeBuilderObject*) self->target, 1, prefix, uri
2085 );
2086}
2087
2088static void
2089expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
2090{
2091 treebuilder_handle_namespace(
2092 (TreeBuilderObject*) self->target, 0, NULL, NULL
2093 );
2094}
2095
2096static void
2097expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
2098{
2099 PyObject* comment;
2100 PyObject* res;
2101
2102 if (self->handle_comment) {
Neal Norwitz0269b912007-08-08 06:56:02 +00002103 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002104 if (comment) {
2105 res = PyObject_CallFunction(self->handle_comment, "O", comment);
2106 Py_XDECREF(res);
2107 Py_DECREF(comment);
2108 }
2109 }
2110}
2111
2112static void
2113expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
2114 const XML_Char* data_in)
2115{
2116 PyObject* target;
2117 PyObject* data;
2118 PyObject* res;
2119
2120 if (self->handle_pi) {
Neal Norwitz0269b912007-08-08 06:56:02 +00002121 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
2122 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002123 if (target && data) {
2124 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
2125 Py_XDECREF(res);
2126 Py_DECREF(data);
2127 Py_DECREF(target);
2128 } else {
2129 Py_XDECREF(data);
2130 Py_XDECREF(target);
2131 }
2132 }
2133}
2134
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002135static int
2136expat_unknown_encoding_handler(XMLParserObject *self, const XML_Char *name,
2137 XML_Encoding *info)
2138{
2139 PyObject* u;
2140 Py_UNICODE* p;
2141 unsigned char s[256];
2142 int i;
2143
2144 memset(info, 0, sizeof(XML_Encoding));
2145
2146 for (i = 0; i < 256; i++)
2147 s[i] = i;
2148
Fredrik Lundhc3389992005-12-25 11:40:19 +00002149 u = PyUnicode_Decode((char*) s, 256, name, "replace");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002150 if (!u)
2151 return XML_STATUS_ERROR;
2152
2153 if (PyUnicode_GET_SIZE(u) != 256) {
2154 Py_DECREF(u);
2155 return XML_STATUS_ERROR;
2156 }
2157
2158 p = PyUnicode_AS_UNICODE(u);
2159
2160 for (i = 0; i < 256; i++) {
2161 if (p[i] != Py_UNICODE_REPLACEMENT_CHARACTER)
2162 info->map[i] = p[i];
2163 else
2164 info->map[i] = -1;
2165 }
2166
2167 Py_DECREF(u);
2168
2169 return XML_STATUS_OK;
2170}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002171
2172/* -------------------------------------------------------------------- */
2173/* constructor and destructor */
2174
2175static PyObject*
Thomas Wouters73e5a5b2006-06-08 15:35:45 +00002176xmlparser(PyObject* self_, PyObject* args, PyObject* kw)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002177{
2178 XMLParserObject* self;
2179 /* FIXME: does this need to be static? */
2180 static XML_Memory_Handling_Suite memory_handler;
2181
2182 PyObject* target = NULL;
2183 char* encoding = NULL;
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +00002184 static char* kwlist[] = { "target", "encoding", NULL };
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002185 if (!PyArg_ParseTupleAndKeywords(args, kw, "|Oz:XMLParser", kwlist,
2186 &target, &encoding))
2187 return NULL;
2188
2189#if defined(USE_PYEXPAT_CAPI)
2190 if (!expat_capi) {
2191 PyErr_SetString(
2192 PyExc_RuntimeError, "cannot load dispatch table from pyexpat"
2193 );
2194 return NULL;
2195 }
2196#endif
2197
2198 self = PyObject_New(XMLParserObject, &XMLParser_Type);
2199 if (self == NULL)
2200 return NULL;
2201
2202 self->entity = PyDict_New();
2203 if (!self->entity) {
2204 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002205 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002206 }
2207
2208 self->names = PyDict_New();
2209 if (!self->names) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002210 PyObject_Del(self->entity);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002211 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002212 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002213 }
2214
2215 memory_handler.malloc_fcn = PyObject_Malloc;
2216 memory_handler.realloc_fcn = PyObject_Realloc;
2217 memory_handler.free_fcn = PyObject_Free;
2218
2219 self->parser = EXPAT(ParserCreate_MM)(encoding, &memory_handler, "}");
2220 if (!self->parser) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002221 PyObject_Del(self->names);
2222 PyObject_Del(self->entity);
2223 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002224 PyErr_NoMemory();
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002225 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002226 }
2227
2228 /* setup target handlers */
2229 if (!target) {
2230 target = treebuilder_new();
2231 if (!target) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002232 EXPAT(ParserFree)(self->parser);
2233 PyObject_Del(self->names);
2234 PyObject_Del(self->entity);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002235 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002236 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002237 }
2238 } else
2239 Py_INCREF(target);
2240 self->target = target;
2241
2242 self->handle_xml = PyObject_GetAttrString(target, "xml");
2243 self->handle_start = PyObject_GetAttrString(target, "start");
2244 self->handle_data = PyObject_GetAttrString(target, "data");
2245 self->handle_end = PyObject_GetAttrString(target, "end");
2246 self->handle_comment = PyObject_GetAttrString(target, "comment");
2247 self->handle_pi = PyObject_GetAttrString(target, "pi");
2248
2249 PyErr_Clear();
2250
2251 /* configure parser */
2252 EXPAT(SetUserData)(self->parser, self);
2253 EXPAT(SetElementHandler)(
2254 self->parser,
2255 (XML_StartElementHandler) expat_start_handler,
2256 (XML_EndElementHandler) expat_end_handler
2257 );
2258 EXPAT(SetDefaultHandlerExpand)(
2259 self->parser,
2260 (XML_DefaultHandler) expat_default_handler
2261 );
2262 EXPAT(SetCharacterDataHandler)(
2263 self->parser,
2264 (XML_CharacterDataHandler) expat_data_handler
2265 );
2266 if (self->handle_comment)
2267 EXPAT(SetCommentHandler)(
2268 self->parser,
2269 (XML_CommentHandler) expat_comment_handler
2270 );
2271 if (self->handle_pi)
2272 EXPAT(SetProcessingInstructionHandler)(
2273 self->parser,
2274 (XML_ProcessingInstructionHandler) expat_pi_handler
2275 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002276 EXPAT(SetUnknownEncodingHandler)(
2277 self->parser,
2278 (XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL
2279 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002280
2281 ALLOC(sizeof(XMLParserObject), "create expatparser");
2282
2283 return (PyObject*) self;
2284}
2285
2286static void
2287xmlparser_dealloc(XMLParserObject* self)
2288{
2289 EXPAT(ParserFree)(self->parser);
2290
2291 Py_XDECREF(self->handle_pi);
2292 Py_XDECREF(self->handle_comment);
2293 Py_XDECREF(self->handle_end);
2294 Py_XDECREF(self->handle_data);
2295 Py_XDECREF(self->handle_start);
2296 Py_XDECREF(self->handle_xml);
2297
2298 Py_DECREF(self->target);
2299 Py_DECREF(self->entity);
2300 Py_DECREF(self->names);
2301
2302 RELEASE(sizeof(XMLParserObject), "destroy expatparser");
2303
2304 PyObject_Del(self);
2305}
2306
2307/* -------------------------------------------------------------------- */
2308/* methods (in alphabetical order) */
2309
2310LOCAL(PyObject*)
2311expat_parse(XMLParserObject* self, char* data, int data_len, int final)
2312{
2313 int ok;
2314
2315 ok = EXPAT(Parse)(self->parser, data, data_len, final);
2316
2317 if (PyErr_Occurred())
2318 return NULL;
2319
2320 if (!ok) {
2321 PyErr_Format(
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002322 PyExc_SyntaxError, "%s: line %ld, column %ld",
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002323 EXPAT(ErrorString)(EXPAT(GetErrorCode)(self->parser)),
2324 EXPAT(GetErrorLineNumber)(self->parser),
2325 EXPAT(GetErrorColumnNumber)(self->parser)
2326 );
2327 return NULL;
2328 }
2329
2330 Py_RETURN_NONE;
2331}
2332
2333static PyObject*
2334xmlparser_close(XMLParserObject* self, PyObject* args)
2335{
2336 /* end feeding data to parser */
2337
2338 PyObject* res;
2339 if (!PyArg_ParseTuple(args, ":close"))
2340 return NULL;
2341
2342 res = expat_parse(self, "", 0, 1);
2343
2344 if (res && TreeBuilder_CheckExact(self->target)) {
2345 Py_DECREF(res);
2346 return treebuilder_done((TreeBuilderObject*) self->target);
2347 }
2348
2349 return res;
2350}
2351
2352static PyObject*
2353xmlparser_feed(XMLParserObject* self, PyObject* args)
2354{
2355 /* feed data to parser */
2356
2357 char* data;
2358 int data_len;
2359 if (!PyArg_ParseTuple(args, "s#:feed", &data, &data_len))
2360 return NULL;
2361
2362 return expat_parse(self, data, data_len, 0);
2363}
2364
2365static PyObject*
2366xmlparser_parse(XMLParserObject* self, PyObject* args)
2367{
2368 /* (internal) parse until end of input stream */
2369
2370 PyObject* reader;
2371 PyObject* buffer;
2372 PyObject* res;
2373
2374 PyObject* fileobj;
2375 if (!PyArg_ParseTuple(args, "O:_parse", &fileobj))
2376 return NULL;
2377
2378 reader = PyObject_GetAttrString(fileobj, "read");
2379 if (!reader)
2380 return NULL;
2381
2382 /* read from open file object */
2383 for (;;) {
2384
2385 buffer = PyObject_CallFunction(reader, "i", 64*1024);
2386
2387 if (!buffer) {
2388 /* read failed (e.g. due to KeyboardInterrupt) */
2389 Py_DECREF(reader);
2390 return NULL;
2391 }
2392
Christian Heimes72b710a2008-05-26 13:28:38 +00002393 if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002394 Py_DECREF(buffer);
2395 break;
2396 }
2397
2398 res = expat_parse(
Christian Heimes72b710a2008-05-26 13:28:38 +00002399 self, PyBytes_AS_STRING(buffer), PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002400 );
2401
2402 Py_DECREF(buffer);
2403
2404 if (!res) {
2405 Py_DECREF(reader);
2406 return NULL;
2407 }
2408 Py_DECREF(res);
2409
2410 }
2411
2412 Py_DECREF(reader);
2413
2414 res = expat_parse(self, "", 0, 1);
2415
2416 if (res && TreeBuilder_CheckExact(self->target)) {
2417 Py_DECREF(res);
2418 return treebuilder_done((TreeBuilderObject*) self->target);
2419 }
2420
2421 return res;
2422}
2423
2424static PyObject*
2425xmlparser_setevents(XMLParserObject* self, PyObject* args)
2426{
2427 /* activate element event reporting */
2428
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002429 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002430 TreeBuilderObject* target;
2431
2432 PyObject* events; /* event collector */
2433 PyObject* event_set = Py_None;
2434 if (!PyArg_ParseTuple(args, "O!|O:_setevents", &PyList_Type, &events,
2435 &event_set))
2436 return NULL;
2437
2438 if (!TreeBuilder_CheckExact(self->target)) {
2439 PyErr_SetString(
2440 PyExc_TypeError,
2441 "event handling only supported for cElementTree.Treebuilder "
2442 "targets"
2443 );
2444 return NULL;
2445 }
2446
2447 target = (TreeBuilderObject*) self->target;
2448
2449 Py_INCREF(events);
2450 Py_XDECREF(target->events);
2451 target->events = events;
2452
2453 /* clear out existing events */
2454 Py_XDECREF(target->start_event_obj); target->start_event_obj = NULL;
2455 Py_XDECREF(target->end_event_obj); target->end_event_obj = NULL;
2456 Py_XDECREF(target->start_ns_event_obj); target->start_ns_event_obj = NULL;
2457 Py_XDECREF(target->end_ns_event_obj); target->end_ns_event_obj = NULL;
2458
2459 if (event_set == Py_None) {
2460 /* default is "end" only */
Christian Heimes72b710a2008-05-26 13:28:38 +00002461 target->end_event_obj = PyBytes_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002462 Py_RETURN_NONE;
2463 }
2464
2465 if (!PyTuple_Check(event_set)) /* FIXME: handle arbitrary sequences */
2466 goto error;
2467
2468 for (i = 0; i < PyTuple_GET_SIZE(event_set); i++) {
2469 PyObject* item = PyTuple_GET_ITEM(event_set, i);
2470 char* event;
Christian Heimes72b710a2008-05-26 13:28:38 +00002471 if (!PyBytes_Check(item))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002472 goto error;
Christian Heimes72b710a2008-05-26 13:28:38 +00002473 event = PyBytes_AS_STRING(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002474 if (strcmp(event, "start") == 0) {
2475 Py_INCREF(item);
2476 target->start_event_obj = item;
2477 } else if (strcmp(event, "end") == 0) {
2478 Py_INCREF(item);
2479 Py_XDECREF(target->end_event_obj);
2480 target->end_event_obj = item;
2481 } else if (strcmp(event, "start-ns") == 0) {
2482 Py_INCREF(item);
2483 Py_XDECREF(target->start_ns_event_obj);
2484 target->start_ns_event_obj = item;
2485 EXPAT(SetNamespaceDeclHandler)(
2486 self->parser,
2487 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2488 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2489 );
2490 } else if (strcmp(event, "end-ns") == 0) {
2491 Py_INCREF(item);
2492 Py_XDECREF(target->end_ns_event_obj);
2493 target->end_ns_event_obj = item;
2494 EXPAT(SetNamespaceDeclHandler)(
2495 self->parser,
2496 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2497 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2498 );
2499 } else {
2500 PyErr_Format(
2501 PyExc_ValueError,
2502 "unknown event '%s'", event
2503 );
2504 return NULL;
2505 }
2506 }
2507
2508 Py_RETURN_NONE;
2509
2510 error:
2511 PyErr_SetString(
2512 PyExc_TypeError,
2513 "invalid event tuple"
2514 );
2515 return NULL;
2516}
2517
2518static PyMethodDef xmlparser_methods[] = {
2519 {"feed", (PyCFunction) xmlparser_feed, METH_VARARGS},
2520 {"close", (PyCFunction) xmlparser_close, METH_VARARGS},
2521 {"_parse", (PyCFunction) xmlparser_parse, METH_VARARGS},
2522 {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
2523 {NULL, NULL}
2524};
2525
2526static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002527xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002528{
2529 PyObject* res;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002530 char *name = "";
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002531
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002532 if (PyUnicode_Check(nameobj))
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +00002533 name = _PyUnicode_AsString(nameobj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002534
2535 PyErr_Clear();
2536
2537 if (strcmp(name, "entity") == 0)
2538 res = self->entity;
2539 else if (strcmp(name, "target") == 0)
2540 res = self->target;
2541 else if (strcmp(name, "version") == 0) {
2542 char buffer[100];
2543 sprintf(buffer, "Expat %d.%d.%d", XML_MAJOR_VERSION,
2544 XML_MINOR_VERSION, XML_MICRO_VERSION);
Christian Heimes72b710a2008-05-26 13:28:38 +00002545 return PyBytes_FromString(buffer);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002546 } else {
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002547 return PyObject_GenericGetAttr((PyObject*) self, nameobj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002548 }
2549
2550 Py_INCREF(res);
2551 return res;
2552}
2553
Neal Norwitz227b5332006-03-22 09:28:35 +00002554static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002555 PyVarObject_HEAD_INIT(NULL, 0)
2556 "XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002557 /* methods */
2558 (destructor)xmlparser_dealloc, /* tp_dealloc */
2559 0, /* tp_print */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002560 0, /* tp_getattr */
2561 0, /* tp_setattr */
Mark Dickinsone94c6792009-02-02 20:36:42 +00002562 0, /* tp_reserved */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002563 0, /* tp_repr */
2564 0, /* tp_as_number */
2565 0, /* tp_as_sequence */
2566 0, /* tp_as_mapping */
2567 0, /* tp_hash */
2568 0, /* tp_call */
2569 0, /* tp_str */
2570 (getattrofunc)xmlparser_getattro, /* tp_getattro */
2571 0, /* tp_setattro */
2572 0, /* tp_as_buffer */
2573 Py_TPFLAGS_DEFAULT, /* tp_flags */
2574 0, /* tp_doc */
2575 0, /* tp_traverse */
2576 0, /* tp_clear */
2577 0, /* tp_richcompare */
2578 0, /* tp_weaklistoffset */
2579 0, /* tp_iter */
2580 0, /* tp_iternext */
2581 xmlparser_methods, /* tp_methods */
2582 0, /* tp_members */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002583};
2584
2585#endif
2586
2587/* ==================================================================== */
2588/* python module interface */
2589
2590static PyMethodDef _functions[] = {
2591 {"Element", (PyCFunction) element, METH_VARARGS|METH_KEYWORDS},
2592 {"SubElement", (PyCFunction) subelement, METH_VARARGS|METH_KEYWORDS},
2593 {"TreeBuilder", (PyCFunction) treebuilder, METH_VARARGS},
2594#if defined(USE_EXPAT)
2595 {"XMLParser", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
2596 {"XMLTreeBuilder", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
2597#endif
2598 {NULL, NULL}
2599};
2600
Martin v. Löwis1a214512008-06-11 05:26:20 +00002601
2602static struct PyModuleDef _elementtreemodule = {
2603 PyModuleDef_HEAD_INIT,
2604 "_elementtree",
2605 NULL,
2606 -1,
2607 _functions,
2608 NULL,
2609 NULL,
2610 NULL,
2611 NULL
2612};
2613
Neal Norwitzf6657e62006-12-28 04:47:50 +00002614PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00002615PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002616{
2617 PyObject* m;
2618 PyObject* g;
2619 char* bootstrap;
2620#if defined(USE_PYEXPAT_CAPI)
2621 struct PyExpat_CAPI* capi;
2622#endif
2623
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002624 /* Initialize object types */
2625 if (PyType_Ready(&TreeBuilder_Type) < 0)
2626 return NULL;
2627 if (PyType_Ready(&Element_Type) < 0)
2628 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002629#if defined(USE_EXPAT)
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002630 if (PyType_Ready(&XMLParser_Type) < 0)
2631 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002632#endif
2633
Martin v. Löwis1a214512008-06-11 05:26:20 +00002634 m = PyModule_Create(&_elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002635 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00002636 return NULL;
2637
2638 /* The code below requires that the module gets already added
2639 to sys.modules. */
2640 PyDict_SetItemString(PyImport_GetModuleDict(),
2641 _elementtreemodule.m_name,
2642 m);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002643
2644 /* python glue code */
2645
2646 g = PyDict_New();
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002647 if (!g)
Martin v. Löwis1a214512008-06-11 05:26:20 +00002648 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002649
2650 PyDict_SetItemString(g, "__builtins__", PyEval_GetBuiltins());
2651
2652 bootstrap = (
2653
2654#if (PY_VERSION_HEX >= 0x02020000 && PY_VERSION_HEX < 0x02030000)
2655 "from __future__ import generators\n" /* enable yield under 2.2 */
2656#endif
2657
2658 "from copy import copy, deepcopy\n"
2659
2660 "try:\n"
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002661 " from xml.etree import ElementTree\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002662 "except ImportError:\n"
2663 " import ElementTree\n"
2664 "ET = ElementTree\n"
2665 "del ElementTree\n"
2666
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002667 "import _elementtree as cElementTree\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002668
2669 "try:\n" /* check if copy works as is */
2670 " copy(cElementTree.Element('x'))\n"
2671 "except:\n"
2672 " def copyelement(elem):\n"
2673 " return elem\n"
2674
2675 "def Comment(text=None):\n" /* public */
2676 " element = cElementTree.Element(ET.Comment)\n"
2677 " element.text = text\n"
2678 " return element\n"
2679 "cElementTree.Comment = Comment\n"
2680
2681 "class ElementTree(ET.ElementTree):\n" /* public */
2682 " def parse(self, source, parser=None):\n"
2683 " if not hasattr(source, 'read'):\n"
2684 " source = open(source, 'rb')\n"
2685 " if parser is not None:\n"
2686 " while 1:\n"
2687 " data = source.read(65536)\n"
2688 " if not data:\n"
2689 " break\n"
2690 " parser.feed(data)\n"
2691 " self._root = parser.close()\n"
2692 " else:\n"
2693 " parser = cElementTree.XMLParser()\n"
2694 " self._root = parser._parse(source)\n"
2695 " return self._root\n"
2696 "cElementTree.ElementTree = ElementTree\n"
2697
2698 "def getiterator(node, tag=None):\n" /* helper */
2699 " if tag == '*':\n"
2700 " tag = None\n"
2701#if (PY_VERSION_HEX < 0x02020000)
2702 " nodes = []\n" /* 2.1 doesn't have yield */
2703 " if tag is None or node.tag == tag:\n"
2704 " nodes.append(node)\n"
2705 " for node in node:\n"
2706 " nodes.extend(getiterator(node, tag))\n"
2707 " return nodes\n"
2708#else
2709 " if tag is None or node.tag == tag:\n"
2710 " yield node\n"
2711 " for node in node:\n"
2712 " for node in getiterator(node, tag):\n"
2713 " yield node\n"
2714#endif
2715
2716 "def parse(source, parser=None):\n" /* public */
2717 " tree = ElementTree()\n"
2718 " tree.parse(source, parser)\n"
2719 " return tree\n"
2720 "cElementTree.parse = parse\n"
2721
2722#if (PY_VERSION_HEX < 0x02020000)
2723 "if hasattr(ET, 'iterparse'):\n"
2724 " cElementTree.iterparse = ET.iterparse\n" /* delegate on 2.1 */
2725#else
2726 "class iterparse(object):\n"
2727 " root = None\n"
2728 " def __init__(self, file, events=None):\n"
2729 " if not hasattr(file, 'read'):\n"
2730 " file = open(file, 'rb')\n"
2731 " self._file = file\n"
2732 " self._events = events\n"
2733 " def __iter__(self):\n"
2734 " events = []\n"
2735 " b = cElementTree.TreeBuilder()\n"
2736 " p = cElementTree.XMLParser(b)\n"
2737 " p._setevents(events, self._events)\n"
2738 " while 1:\n"
2739 " data = self._file.read(16384)\n"
2740 " if not data:\n"
2741 " break\n"
2742 " p.feed(data)\n"
2743 " for event in events:\n"
2744 " yield event\n"
2745 " del events[:]\n"
2746 " root = p.close()\n"
2747 " for event in events:\n"
2748 " yield event\n"
2749 " self.root = root\n"
2750 "cElementTree.iterparse = iterparse\n"
2751#endif
2752
2753 "def PI(target, text=None):\n" /* public */
2754 " element = cElementTree.Element(ET.ProcessingInstruction)\n"
2755 " element.text = target\n"
2756 " if text:\n"
2757 " element.text = element.text + ' ' + text\n"
2758 " return element\n"
2759
2760 " elem = cElementTree.Element(ET.PI)\n"
2761 " elem.text = text\n"
2762 " return elem\n"
2763 "cElementTree.PI = cElementTree.ProcessingInstruction = PI\n"
2764
2765 "def XML(text):\n" /* public */
2766 " parser = cElementTree.XMLParser()\n"
2767 " parser.feed(text)\n"
2768 " return parser.close()\n"
2769 "cElementTree.XML = cElementTree.fromstring = XML\n"
2770
2771 "def XMLID(text):\n" /* public */
2772 " tree = XML(text)\n"
2773 " ids = {}\n"
2774 " for elem in tree.getiterator():\n"
2775 " id = elem.get('id')\n"
2776 " if id:\n"
2777 " ids[id] = elem\n"
2778 " return tree, ids\n"
2779 "cElementTree.XMLID = XMLID\n"
2780
2781 "cElementTree.dump = ET.dump\n"
2782 "cElementTree.ElementPath = ElementPath = ET.ElementPath\n"
2783 "cElementTree.iselement = ET.iselement\n"
2784 "cElementTree.QName = ET.QName\n"
2785 "cElementTree.tostring = ET.tostring\n"
2786 "cElementTree.VERSION = '" VERSION "'\n"
2787 "cElementTree.__version__ = '" VERSION "'\n"
2788 "cElementTree.XMLParserError = SyntaxError\n"
2789
2790 );
2791
2792 PyRun_String(bootstrap, Py_file_input, g, NULL);
2793
2794 elementpath_obj = PyDict_GetItemString(g, "ElementPath");
2795
2796 elementtree_copyelement_obj = PyDict_GetItemString(g, "copyelement");
2797 if (elementtree_copyelement_obj) {
2798 /* reduce hack needed; enable reduce method */
2799 PyMethodDef* mp;
2800 for (mp = element_methods; mp->ml_name; mp++)
2801 if (mp->ml_meth == (PyCFunction) element_reduce) {
2802 mp->ml_name = "__reduce__";
2803 break;
2804 }
2805 } else
2806 PyErr_Clear();
2807 elementtree_deepcopy_obj = PyDict_GetItemString(g, "deepcopy");
2808 elementtree_getiterator_obj = PyDict_GetItemString(g, "getiterator");
2809
2810#if defined(USE_PYEXPAT_CAPI)
2811 /* link against pyexpat, if possible */
Benjamin Petersonb173f782009-05-05 22:31:58 +00002812 capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002813 if (capi &&
2814 strcmp(capi->magic, PyExpat_CAPI_MAGIC) == 0 &&
2815 capi->size <= sizeof(*expat_capi) &&
2816 capi->MAJOR_VERSION == XML_MAJOR_VERSION &&
2817 capi->MINOR_VERSION == XML_MINOR_VERSION &&
2818 capi->MICRO_VERSION == XML_MICRO_VERSION)
2819 expat_capi = capi;
2820 else
2821 expat_capi = NULL;
2822#endif
Martin v. Löwis1a214512008-06-11 05:26:20 +00002823 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002824
2825}