blob: 2766b41758ebd98887621088fa1f89dc333b4ee3 [file] [log] [blame]
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001/*
2 * ElementTree
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003 * $Id: _elementtree.c 2657 2006-03-12 20:50:32Z fredrik $
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
5 * elementtree accelerator
6 *
7 * History:
8 * 1999-06-20 fl created (as part of sgmlop)
9 * 2001-05-29 fl effdom edition
10 * 2001-06-05 fl backported to unix; fixed bogus free in clear
11 * 2001-07-10 fl added findall helper
12 * 2003-02-27 fl elementtree edition (alpha)
13 * 2004-06-03 fl updates for elementtree 1.2
14 * 2005-01-05 fl added universal name cache, Element/SubElement factories
15 * 2005-01-06 fl moved python helpers into C module; removed 1.5.2 support
16 * 2005-01-07 fl added 2.1 support; work around broken __copy__ in 2.3
17 * 2005-01-08 fl added makeelement method; fixed path support
18 * 2005-01-10 fl optimized memory usage
19 * 2005-01-11 fl first public release (cElementTree 0.8)
20 * 2005-01-12 fl split element object into base and extras
21 * 2005-01-13 fl use tagged pointers for tail/text (cElementTree 0.9)
22 * 2005-01-17 fl added treebuilder close method
23 * 2005-01-17 fl fixed crash in getchildren
24 * 2005-01-18 fl removed observer api, added iterparse (cElementTree 0.9.3)
25 * 2005-01-23 fl revised iterparse api; added namespace event support (0.9.8)
26 * 2005-01-26 fl added VERSION module property (cElementTree 1.0)
27 * 2005-01-28 fl added remove method (1.0.1)
28 * 2005-03-01 fl added iselement function; fixed makeelement aliasing (1.0.2)
29 * 2005-03-13 fl export Comment and ProcessingInstruction/PI helpers
30 * 2005-03-26 fl added Comment and PI support to XMLParser
31 * 2005-03-27 fl event optimizations; complain about bogus events
32 * 2005-08-08 fl fixed read error handling in parse
33 * 2005-08-11 fl added runtime test for copy workaround (1.0.3)
34 * 2005-12-13 fl added expat_capi support (for xml.etree) (1.0.4)
35 * 2005-12-16 fl added support for non-standard encodings
Fredrik Lundh44ed4db2006-03-12 21:06:35 +000036 * 2006-03-08 fl fixed a couple of potential null-refs and leaks
37 * 2006-03-12 fl merge in 2.5 ssize_t changes
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000038 *
Fredrik Lundh44ed4db2006-03-12 21:06:35 +000039 * Copyright (c) 1999-2006 by Secret Labs AB. All rights reserved.
40 * Copyright (c) 1999-2006 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000041 *
42 * info@pythonware.com
43 * http://www.pythonware.com
44 */
45
Fredrik Lundh6d52b552005-12-16 22:06:43 +000046/* Licensed to PSF under a Contributor Agreement. */
47/* See http://www.python.org/2.4/license for licensing details. */
48
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000049#include "Python.h"
50
Thomas Wouters00ee7ba2006-08-21 19:07:27 +000051#define VERSION "1.0.6"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000052
53/* -------------------------------------------------------------------- */
54/* configuration */
55
56/* Leave defined to include the expat-based XMLParser type */
57#define USE_EXPAT
58
59/* Define to to all expat calls via pyexpat's embedded expat library */
60/* #define USE_PYEXPAT_CAPI */
61
62/* An element can hold this many children without extra memory
63 allocations. */
64#define STATIC_CHILDREN 4
65
66/* For best performance, chose a value so that 80-90% of all nodes
67 have no more than the given number of children. Set this to zero
68 to minimize the size of the element structure itself (this only
69 helps if you have lots of leaf nodes with attributes). */
70
71/* Also note that pymalloc always allocates blocks in multiples of
72 eight bytes. For the current version of cElementTree, this means
73 that the number of children should be an even number, at least on
74 32-bit platforms. */
75
76/* -------------------------------------------------------------------- */
77
78#if 0
79static int memory = 0;
80#define ALLOC(size, comment)\
81do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
82#define RELEASE(size, comment)\
83do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
84#else
85#define ALLOC(size, comment)
86#define RELEASE(size, comment)
87#endif
88
89/* compiler tweaks */
90#if defined(_MSC_VER)
91#define LOCAL(type) static __inline type __fastcall
92#else
93#define LOCAL(type) static type
94#endif
95
96/* compatibility macros */
Martin v. Löwis18e16552006-02-15 17:27:45 +000097#if (PY_VERSION_HEX < 0x02050000)
98typedef int Py_ssize_t;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +000099#define lenfunc inquiry
Martin v. Löwis18e16552006-02-15 17:27:45 +0000100#endif
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000101
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000102#if (PY_VERSION_HEX < 0x02040000)
103#define PyDict_CheckExact PyDict_Check
104#if (PY_VERSION_HEX < 0x02020000)
105#define PyList_CheckExact PyList_Check
106#define PyString_CheckExact PyString_Check
107#if (PY_VERSION_HEX >= 0x01060000)
108#define Py_USING_UNICODE /* always enabled for 2.0 and 2.1 */
109#endif
110#endif
111#endif
112
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000113#if !defined(Py_RETURN_NONE)
114#define Py_RETURN_NONE return Py_INCREF(Py_None), Py_None
115#endif
116
117/* macros used to store 'join' flags in string object pointers. note
118 that all use of text and tail as object pointers must be wrapped in
119 JOIN_OBJ. see comments in the ElementObject definition for more
120 info. */
121#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
122#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
123#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~1))
124
125/* glue functions (see the init function for details) */
126static PyObject* elementtree_copyelement_obj;
127static PyObject* elementtree_deepcopy_obj;
128static PyObject* elementtree_getiterator_obj;
129static PyObject* elementpath_obj;
130
131/* helpers */
132
133LOCAL(PyObject*)
134deepcopy(PyObject* object, PyObject* memo)
135{
136 /* do a deep copy of the given object */
137
138 PyObject* args;
139 PyObject* result;
140
141 if (!elementtree_deepcopy_obj) {
142 PyErr_SetString(
143 PyExc_RuntimeError,
144 "deepcopy helper not found"
145 );
146 return NULL;
147 }
148
149 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000150 if (!args)
151 return NULL;
152
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000153 Py_INCREF(object); PyTuple_SET_ITEM(args, 0, (PyObject*) object);
154 Py_INCREF(memo); PyTuple_SET_ITEM(args, 1, (PyObject*) memo);
155
156 result = PyObject_CallObject(elementtree_deepcopy_obj, args);
157
158 Py_DECREF(args);
159
160 return result;
161}
162
163LOCAL(PyObject*)
164list_join(PyObject* list)
165{
166 /* join list elements (destroying the list in the process) */
167
168 PyObject* joiner;
169 PyObject* function;
170 PyObject* args;
171 PyObject* result;
172
173 switch (PyList_GET_SIZE(list)) {
174 case 0:
175 Py_DECREF(list);
176 return PyString_FromString("");
177 case 1:
178 result = PyList_GET_ITEM(list, 0);
179 Py_INCREF(result);
180 Py_DECREF(list);
181 return result;
182 }
183
184 /* two or more elements: slice out a suitable separator from the
185 first member, and use that to join the entire list */
186
187 joiner = PySequence_GetSlice(PyList_GET_ITEM(list, 0), 0, 0);
188 if (!joiner)
189 return NULL;
190
191 function = PyObject_GetAttrString(joiner, "join");
192 if (!function) {
193 Py_DECREF(joiner);
194 return NULL;
195 }
196
197 args = PyTuple_New(1);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000198 if (!args)
199 return NULL;
200
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000201 PyTuple_SET_ITEM(args, 0, list);
202
203 result = PyObject_CallObject(function, args);
204
205 Py_DECREF(args); /* also removes list */
206 Py_DECREF(function);
207 Py_DECREF(joiner);
208
209 return result;
210}
211
212#if (PY_VERSION_HEX < 0x02020000)
213LOCAL(int)
214PyDict_Update(PyObject* dict, PyObject* other)
215{
216 /* PyDict_Update emulation for 2.1 and earlier */
217
218 PyObject* res;
219
220 res = PyObject_CallMethod(dict, "update", "O", other);
221 if (!res)
222 return -1;
223
224 Py_DECREF(res);
225 return 0;
226}
227#endif
228
229/* -------------------------------------------------------------------- */
230/* the element type */
231
232typedef struct {
233
234 /* attributes (a dictionary object), or None if no attributes */
235 PyObject* attrib;
236
237 /* child elements */
238 int length; /* actual number of items */
239 int allocated; /* allocated items */
240
241 /* this either points to _children or to a malloced buffer */
242 PyObject* *children;
243
244 PyObject* _children[STATIC_CHILDREN];
245
246} ElementObjectExtra;
247
248typedef struct {
249 PyObject_HEAD
250
251 /* element tag (a string). */
252 PyObject* tag;
253
254 /* text before first child. note that this is a tagged pointer;
255 use JOIN_OBJ to get the object pointer. the join flag is used
256 to distinguish lists created by the tree builder from lists
257 assigned to the attribute by application code; the former
258 should be joined before being returned to the user, the latter
259 should be left intact. */
260 PyObject* text;
261
262 /* text after this element, in parent. note that this is a tagged
263 pointer; use JOIN_OBJ to get the object pointer. */
264 PyObject* tail;
265
266 ElementObjectExtra* extra;
267
268} ElementObject;
269
Neal Norwitz227b5332006-03-22 09:28:35 +0000270static PyTypeObject Element_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000271
272#define Element_CheckExact(op) ((op)->ob_type == &Element_Type)
273
274/* -------------------------------------------------------------------- */
275/* element constructor and destructor */
276
277LOCAL(int)
278element_new_extra(ElementObject* self, PyObject* attrib)
279{
280 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
281 if (!self->extra)
282 return -1;
283
284 if (!attrib)
285 attrib = Py_None;
286
287 Py_INCREF(attrib);
288 self->extra->attrib = attrib;
289
290 self->extra->length = 0;
291 self->extra->allocated = STATIC_CHILDREN;
292 self->extra->children = self->extra->_children;
293
294 return 0;
295}
296
297LOCAL(void)
298element_dealloc_extra(ElementObject* self)
299{
300 int i;
301
302 Py_DECREF(self->extra->attrib);
303
304 for (i = 0; i < self->extra->length; i++)
305 Py_DECREF(self->extra->children[i]);
306
307 if (self->extra->children != self->extra->_children)
308 PyObject_Free(self->extra->children);
309
310 PyObject_Free(self->extra);
311}
312
313LOCAL(PyObject*)
314element_new(PyObject* tag, PyObject* attrib)
315{
316 ElementObject* self;
317
318 self = PyObject_New(ElementObject, &Element_Type);
319 if (self == NULL)
320 return NULL;
321
322 /* use None for empty dictionaries */
323 if (PyDict_CheckExact(attrib) && !PyDict_Size(attrib))
324 attrib = Py_None;
325
326 self->extra = NULL;
327
328 if (attrib != Py_None) {
329
Thomas Wouters477c8d52006-05-27 19:21:47 +0000330 if (element_new_extra(self, attrib) < 0) {
331 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000332 return NULL;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000333 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000334
335 self->extra->length = 0;
336 self->extra->allocated = STATIC_CHILDREN;
337 self->extra->children = self->extra->_children;
338
339 }
340
341 Py_INCREF(tag);
342 self->tag = tag;
343
344 Py_INCREF(Py_None);
345 self->text = Py_None;
346
347 Py_INCREF(Py_None);
348 self->tail = Py_None;
349
350 ALLOC(sizeof(ElementObject), "create element");
351
352 return (PyObject*) self;
353}
354
355LOCAL(int)
356element_resize(ElementObject* self, int extra)
357{
358 int size;
359 PyObject* *children;
360
361 /* make sure self->children can hold the given number of extra
362 elements. set an exception and return -1 if allocation failed */
363
364 if (!self->extra)
365 element_new_extra(self, NULL);
366
367 size = self->extra->length + extra;
368
369 if (size > self->extra->allocated) {
370 /* use Python 2.4's list growth strategy */
371 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
372 if (self->extra->children != self->extra->_children) {
373 children = PyObject_Realloc(self->extra->children,
374 size * sizeof(PyObject*));
375 if (!children)
376 goto nomemory;
377 } else {
378 children = PyObject_Malloc(size * sizeof(PyObject*));
379 if (!children)
380 goto nomemory;
381 /* copy existing children from static area to malloc buffer */
382 memcpy(children, self->extra->children,
383 self->extra->length * sizeof(PyObject*));
384 }
385 self->extra->children = children;
386 self->extra->allocated = size;
387 }
388
389 return 0;
390
391 nomemory:
392 PyErr_NoMemory();
393 return -1;
394}
395
396LOCAL(int)
397element_add_subelement(ElementObject* self, PyObject* element)
398{
399 /* add a child element to a parent */
400
401 if (element_resize(self, 1) < 0)
402 return -1;
403
404 Py_INCREF(element);
405 self->extra->children[self->extra->length] = element;
406
407 self->extra->length++;
408
409 return 0;
410}
411
412LOCAL(PyObject*)
413element_get_attrib(ElementObject* self)
414{
415 /* return borrowed reference to attrib dictionary */
416 /* note: this function assumes that the extra section exists */
417
418 PyObject* res = self->extra->attrib;
419
420 if (res == Py_None) {
421 /* create missing dictionary */
422 res = PyDict_New();
423 if (!res)
424 return NULL;
425 self->extra->attrib = res;
426 }
427
428 return res;
429}
430
431LOCAL(PyObject*)
432element_get_text(ElementObject* self)
433{
434 /* return borrowed reference to text attribute */
435
436 PyObject* res = self->text;
437
438 if (JOIN_GET(res)) {
439 res = JOIN_OBJ(res);
440 if (PyList_CheckExact(res)) {
441 res = list_join(res);
442 if (!res)
443 return NULL;
444 self->text = res;
445 }
446 }
447
448 return res;
449}
450
451LOCAL(PyObject*)
452element_get_tail(ElementObject* self)
453{
454 /* return borrowed reference to text attribute */
455
456 PyObject* res = self->tail;
457
458 if (JOIN_GET(res)) {
459 res = JOIN_OBJ(res);
460 if (PyList_CheckExact(res)) {
461 res = list_join(res);
462 if (!res)
463 return NULL;
464 self->tail = res;
465 }
466 }
467
468 return res;
469}
470
471static PyObject*
472element(PyObject* self, PyObject* args, PyObject* kw)
473{
474 PyObject* elem;
475
476 PyObject* tag;
477 PyObject* attrib = NULL;
478 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag,
479 &PyDict_Type, &attrib))
480 return NULL;
481
482 if (attrib || kw) {
483 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
484 if (!attrib)
485 return NULL;
486 if (kw)
487 PyDict_Update(attrib, kw);
488 } else {
489 Py_INCREF(Py_None);
490 attrib = Py_None;
491 }
492
493 elem = element_new(tag, attrib);
494
495 Py_DECREF(attrib);
496
497 return elem;
498}
499
500static PyObject*
501subelement(PyObject* self, PyObject* args, PyObject* kw)
502{
503 PyObject* elem;
504
505 ElementObject* parent;
506 PyObject* tag;
507 PyObject* attrib = NULL;
508 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
509 &Element_Type, &parent, &tag,
510 &PyDict_Type, &attrib))
511 return NULL;
512
513 if (attrib || kw) {
514 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
515 if (!attrib)
516 return NULL;
517 if (kw)
518 PyDict_Update(attrib, kw);
519 } else {
520 Py_INCREF(Py_None);
521 attrib = Py_None;
522 }
523
524 elem = element_new(tag, attrib);
525
526 Py_DECREF(attrib);
527
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000528 if (element_add_subelement(parent, elem) < 0) {
529 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000530 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000531 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000532
533 return elem;
534}
535
536static void
537element_dealloc(ElementObject* self)
538{
539 if (self->extra)
540 element_dealloc_extra(self);
541
542 /* discard attributes */
543 Py_DECREF(self->tag);
544 Py_DECREF(JOIN_OBJ(self->text));
545 Py_DECREF(JOIN_OBJ(self->tail));
546
547 RELEASE(sizeof(ElementObject), "destroy element");
548
549 PyObject_Del(self);
550}
551
552/* -------------------------------------------------------------------- */
553/* methods (in alphabetical order) */
554
555static PyObject*
556element_append(ElementObject* self, PyObject* args)
557{
558 PyObject* element;
559 if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element))
560 return NULL;
561
562 if (element_add_subelement(self, element) < 0)
563 return NULL;
564
565 Py_RETURN_NONE;
566}
567
568static PyObject*
569element_clear(ElementObject* self, PyObject* args)
570{
571 if (!PyArg_ParseTuple(args, ":clear"))
572 return NULL;
573
574 if (self->extra) {
575 element_dealloc_extra(self);
576 self->extra = NULL;
577 }
578
579 Py_INCREF(Py_None);
580 Py_DECREF(JOIN_OBJ(self->text));
581 self->text = Py_None;
582
583 Py_INCREF(Py_None);
584 Py_DECREF(JOIN_OBJ(self->tail));
585 self->tail = Py_None;
586
587 Py_RETURN_NONE;
588}
589
590static PyObject*
591element_copy(ElementObject* self, PyObject* args)
592{
593 int i;
594 ElementObject* element;
595
596 if (!PyArg_ParseTuple(args, ":__copy__"))
597 return NULL;
598
599 element = (ElementObject*) element_new(
600 self->tag, (self->extra) ? self->extra->attrib : Py_None
601 );
602 if (!element)
603 return NULL;
604
605 Py_DECREF(JOIN_OBJ(element->text));
606 element->text = self->text;
607 Py_INCREF(JOIN_OBJ(element->text));
608
609 Py_DECREF(JOIN_OBJ(element->tail));
610 element->tail = self->tail;
611 Py_INCREF(JOIN_OBJ(element->tail));
612
613 if (self->extra) {
614
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000615 if (element_resize(element, self->extra->length) < 0) {
616 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000617 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000618 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000619
620 for (i = 0; i < self->extra->length; i++) {
621 Py_INCREF(self->extra->children[i]);
622 element->extra->children[i] = self->extra->children[i];
623 }
624
625 element->extra->length = self->extra->length;
626
627 }
628
629 return (PyObject*) element;
630}
631
632static PyObject*
633element_deepcopy(ElementObject* self, PyObject* args)
634{
635 int i;
636 ElementObject* element;
637 PyObject* tag;
638 PyObject* attrib;
639 PyObject* text;
640 PyObject* tail;
641 PyObject* id;
642
643 PyObject* memo;
644 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
645 return NULL;
646
647 tag = deepcopy(self->tag, memo);
648 if (!tag)
649 return NULL;
650
651 if (self->extra) {
652 attrib = deepcopy(self->extra->attrib, memo);
653 if (!attrib) {
654 Py_DECREF(tag);
655 return NULL;
656 }
657 } else {
658 Py_INCREF(Py_None);
659 attrib = Py_None;
660 }
661
662 element = (ElementObject*) element_new(tag, attrib);
663
664 Py_DECREF(tag);
665 Py_DECREF(attrib);
666
667 if (!element)
668 return NULL;
669
670 text = deepcopy(JOIN_OBJ(self->text), memo);
671 if (!text)
672 goto error;
673 Py_DECREF(element->text);
674 element->text = JOIN_SET(text, JOIN_GET(self->text));
675
676 tail = deepcopy(JOIN_OBJ(self->tail), memo);
677 if (!tail)
678 goto error;
679 Py_DECREF(element->tail);
680 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
681
682 if (self->extra) {
683
684 if (element_resize(element, self->extra->length) < 0)
685 goto error;
686
687 for (i = 0; i < self->extra->length; i++) {
688 PyObject* child = deepcopy(self->extra->children[i], memo);
689 if (!child) {
690 element->extra->length = i;
691 goto error;
692 }
693 element->extra->children[i] = child;
694 }
695
696 element->extra->length = self->extra->length;
697
698 }
699
700 /* add object to memo dictionary (so deepcopy won't visit it again) */
701 id = PyInt_FromLong((Py_uintptr_t) self);
702
703 i = PyDict_SetItem(memo, id, (PyObject*) element);
704
705 Py_DECREF(id);
706
707 if (i < 0)
708 goto error;
709
710 return (PyObject*) element;
711
712 error:
713 Py_DECREF(element);
714 return NULL;
715}
716
717LOCAL(int)
718checkpath(PyObject* tag)
719{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000720 Py_ssize_t i;
721 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000722
723 /* check if a tag contains an xpath character */
724
725#define PATHCHAR(ch) (ch == '/' || ch == '*' || ch == '[' || ch == '@')
726
727#if defined(Py_USING_UNICODE)
728 if (PyUnicode_Check(tag)) {
729 Py_UNICODE *p = PyUnicode_AS_UNICODE(tag);
730 for (i = 0; i < PyUnicode_GET_SIZE(tag); i++) {
731 if (p[i] == '{')
732 check = 0;
733 else if (p[i] == '}')
734 check = 1;
735 else if (check && PATHCHAR(p[i]))
736 return 1;
737 }
738 return 0;
739 }
740#endif
741 if (PyString_Check(tag)) {
742 char *p = PyString_AS_STRING(tag);
743 for (i = 0; i < PyString_GET_SIZE(tag); i++) {
744 if (p[i] == '{')
745 check = 0;
746 else if (p[i] == '}')
747 check = 1;
748 else if (check && PATHCHAR(p[i]))
749 return 1;
750 }
751 return 0;
752 }
753
754 return 1; /* unknown type; might be path expression */
755}
756
757static PyObject*
758element_find(ElementObject* self, PyObject* args)
759{
760 int i;
761
762 PyObject* tag;
763 if (!PyArg_ParseTuple(args, "O:find", &tag))
764 return NULL;
765
766 if (checkpath(tag))
767 return PyObject_CallMethod(
768 elementpath_obj, "find", "OO", self, tag
769 );
770
771 if (!self->extra)
772 Py_RETURN_NONE;
773
774 for (i = 0; i < self->extra->length; i++) {
775 PyObject* item = self->extra->children[i];
776 if (Element_CheckExact(item) &&
777 PyObject_Compare(((ElementObject*)item)->tag, tag) == 0) {
778 Py_INCREF(item);
779 return item;
780 }
781 }
782
783 Py_RETURN_NONE;
784}
785
786static PyObject*
787element_findtext(ElementObject* self, PyObject* args)
788{
789 int i;
790
791 PyObject* tag;
792 PyObject* default_value = Py_None;
793 if (!PyArg_ParseTuple(args, "O|O:findtext", &tag, &default_value))
794 return NULL;
795
796 if (checkpath(tag))
797 return PyObject_CallMethod(
798 elementpath_obj, "findtext", "OOO", self, tag, default_value
799 );
800
801 if (!self->extra) {
802 Py_INCREF(default_value);
803 return default_value;
804 }
805
806 for (i = 0; i < self->extra->length; i++) {
807 ElementObject* item = (ElementObject*) self->extra->children[i];
808 if (Element_CheckExact(item) && !PyObject_Compare(item->tag, tag)) {
809 PyObject* text = element_get_text(item);
810 if (text == Py_None)
811 return PyString_FromString("");
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000812 Py_XINCREF(text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000813 return text;
814 }
815 }
816
817 Py_INCREF(default_value);
818 return default_value;
819}
820
821static PyObject*
822element_findall(ElementObject* self, PyObject* args)
823{
824 int i;
825 PyObject* out;
826
827 PyObject* tag;
828 if (!PyArg_ParseTuple(args, "O:findall", &tag))
829 return NULL;
830
831 if (checkpath(tag))
832 return PyObject_CallMethod(
833 elementpath_obj, "findall", "OO", self, tag
834 );
835
836 out = PyList_New(0);
837 if (!out)
838 return NULL;
839
840 if (!self->extra)
841 return out;
842
843 for (i = 0; i < self->extra->length; i++) {
844 PyObject* item = self->extra->children[i];
845 if (Element_CheckExact(item) &&
846 PyObject_Compare(((ElementObject*)item)->tag, tag) == 0) {
847 if (PyList_Append(out, item) < 0) {
848 Py_DECREF(out);
849 return NULL;
850 }
851 }
852 }
853
854 return out;
855}
856
857static PyObject*
858element_get(ElementObject* self, PyObject* args)
859{
860 PyObject* value;
861
862 PyObject* key;
863 PyObject* default_value = Py_None;
864 if (!PyArg_ParseTuple(args, "O|O:get", &key, &default_value))
865 return NULL;
866
867 if (!self->extra || self->extra->attrib == Py_None)
868 value = default_value;
869 else {
870 value = PyDict_GetItem(self->extra->attrib, key);
871 if (!value)
872 value = default_value;
873 }
874
875 Py_INCREF(value);
876 return value;
877}
878
879static PyObject*
880element_getchildren(ElementObject* self, PyObject* args)
881{
882 int i;
883 PyObject* list;
884
885 if (!PyArg_ParseTuple(args, ":getchildren"))
886 return NULL;
887
888 if (!self->extra)
889 return PyList_New(0);
890
891 list = PyList_New(self->extra->length);
892 if (!list)
893 return NULL;
894
895 for (i = 0; i < self->extra->length; i++) {
896 PyObject* item = self->extra->children[i];
897 Py_INCREF(item);
898 PyList_SET_ITEM(list, i, item);
899 }
900
901 return list;
902}
903
904static PyObject*
905element_getiterator(ElementObject* self, PyObject* args)
906{
907 PyObject* result;
908
909 PyObject* tag = Py_None;
910 if (!PyArg_ParseTuple(args, "|O:getiterator", &tag))
911 return NULL;
912
913 if (!elementtree_getiterator_obj) {
914 PyErr_SetString(
915 PyExc_RuntimeError,
916 "getiterator helper not found"
917 );
918 return NULL;
919 }
920
921 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000922 if (!args)
923 return NULL;
Neal Norwitz02876df2006-02-07 06:58:52 +0000924
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000925 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
926 Py_INCREF(tag); PyTuple_SET_ITEM(args, 1, (PyObject*) tag);
927
928 result = PyObject_CallObject(elementtree_getiterator_obj, args);
929
930 Py_DECREF(args);
931
932 return result;
933}
934
935static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000936element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000937{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000938 ElementObject* self = (ElementObject*) self_;
939
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000940 if (!self->extra || index < 0 || index >= self->extra->length) {
941 PyErr_SetString(
942 PyExc_IndexError,
943 "child index out of range"
944 );
945 return NULL;
946 }
947
948 Py_INCREF(self->extra->children[index]);
949 return self->extra->children[index];
950}
951
952static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000953element_getslice(PyObject* self_, Py_ssize_t start, Py_ssize_t end)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000954{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000955 ElementObject* self = (ElementObject*) self_;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000956 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000957 PyObject* list;
958
959 if (!self->extra)
960 return PyList_New(0);
961
962 /* standard clamping */
963 if (start < 0)
964 start = 0;
965 if (end < 0)
966 end = 0;
967 if (end > self->extra->length)
968 end = self->extra->length;
969 if (start > end)
970 start = end;
971
972 list = PyList_New(end - start);
973 if (!list)
974 return NULL;
975
976 for (i = start; i < end; i++) {
977 PyObject* item = self->extra->children[i];
978 Py_INCREF(item);
979 PyList_SET_ITEM(list, i - start, item);
980 }
981
982 return list;
983}
984
985static PyObject*
986element_insert(ElementObject* self, PyObject* args)
987{
988 int i;
989
990 int index;
991 PyObject* element;
992 if (!PyArg_ParseTuple(args, "iO!:insert", &index,
993 &Element_Type, &element))
994 return NULL;
995
996 if (!self->extra)
997 element_new_extra(self, NULL);
998
999 if (index < 0)
1000 index = 0;
1001 if (index > self->extra->length)
1002 index = self->extra->length;
1003
1004 if (element_resize(self, 1) < 0)
1005 return NULL;
1006
1007 for (i = self->extra->length; i > index; i--)
1008 self->extra->children[i] = self->extra->children[i-1];
1009
1010 Py_INCREF(element);
1011 self->extra->children[index] = element;
1012
1013 self->extra->length++;
1014
1015 Py_RETURN_NONE;
1016}
1017
1018static PyObject*
1019element_items(ElementObject* self, PyObject* args)
1020{
1021 if (!PyArg_ParseTuple(args, ":items"))
1022 return NULL;
1023
1024 if (!self->extra || self->extra->attrib == Py_None)
1025 return PyList_New(0);
1026
1027 return PyDict_Items(self->extra->attrib);
1028}
1029
1030static PyObject*
1031element_keys(ElementObject* self, PyObject* args)
1032{
1033 if (!PyArg_ParseTuple(args, ":keys"))
1034 return NULL;
1035
1036 if (!self->extra || self->extra->attrib == Py_None)
1037 return PyList_New(0);
1038
1039 return PyDict_Keys(self->extra->attrib);
1040}
1041
Martin v. Löwis18e16552006-02-15 17:27:45 +00001042static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001043element_length(ElementObject* self)
1044{
1045 if (!self->extra)
1046 return 0;
1047
1048 return self->extra->length;
1049}
1050
1051static PyObject*
1052element_makeelement(PyObject* self, PyObject* args, PyObject* kw)
1053{
1054 PyObject* elem;
1055
1056 PyObject* tag;
1057 PyObject* attrib;
1058 if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib))
1059 return NULL;
1060
1061 attrib = PyDict_Copy(attrib);
1062 if (!attrib)
1063 return NULL;
1064
1065 elem = element_new(tag, attrib);
1066
1067 Py_DECREF(attrib);
1068
1069 return elem;
1070}
1071
1072static PyObject*
1073element_reduce(ElementObject* self, PyObject* args)
1074{
1075 if (!PyArg_ParseTuple(args, ":__reduce__"))
1076 return NULL;
1077
1078 /* Hack alert: This method is used to work around a __copy__
1079 problem on certain 2.3 and 2.4 versions. To save time and
1080 simplify the code, we create the copy in here, and use a dummy
1081 copyelement helper to trick the copy module into doing the
1082 right thing. */
1083
1084 if (!elementtree_copyelement_obj) {
1085 PyErr_SetString(
1086 PyExc_RuntimeError,
1087 "copyelement helper not found"
1088 );
1089 return NULL;
1090 }
1091
1092 return Py_BuildValue(
1093 "O(N)", elementtree_copyelement_obj, element_copy(self, args)
1094 );
1095}
1096
1097static PyObject*
1098element_remove(ElementObject* self, PyObject* args)
1099{
1100 int i;
1101
1102 PyObject* element;
1103 if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element))
1104 return NULL;
1105
1106 if (!self->extra) {
1107 /* element has no children, so raise exception */
1108 PyErr_SetString(
1109 PyExc_ValueError,
1110 "list.remove(x): x not in list"
1111 );
1112 return NULL;
1113 }
1114
1115 for (i = 0; i < self->extra->length; i++) {
1116 if (self->extra->children[i] == element)
1117 break;
1118 if (PyObject_Compare(self->extra->children[i], element) == 0)
1119 break;
1120 }
1121
1122 if (i == self->extra->length) {
1123 /* element is not in children, so raise exception */
1124 PyErr_SetString(
1125 PyExc_ValueError,
1126 "list.remove(x): x not in list"
1127 );
1128 return NULL;
1129 }
1130
1131 Py_DECREF(self->extra->children[i]);
1132
1133 self->extra->length--;
1134
1135 for (; i < self->extra->length; i++)
1136 self->extra->children[i] = self->extra->children[i+1];
1137
1138 Py_RETURN_NONE;
1139}
1140
1141static PyObject*
1142element_repr(ElementObject* self)
1143{
1144 PyObject* repr;
1145 char buffer[100];
1146
1147 repr = PyString_FromString("<Element ");
1148
1149 PyString_ConcatAndDel(&repr, PyObject_Repr(self->tag));
1150
1151 sprintf(buffer, " at %p>", self);
1152 PyString_ConcatAndDel(&repr, PyString_FromString(buffer));
1153
1154 return repr;
1155}
1156
1157static PyObject*
1158element_set(ElementObject* self, PyObject* args)
1159{
1160 PyObject* attrib;
1161
1162 PyObject* key;
1163 PyObject* value;
1164 if (!PyArg_ParseTuple(args, "OO:set", &key, &value))
1165 return NULL;
1166
1167 if (!self->extra)
1168 element_new_extra(self, NULL);
1169
1170 attrib = element_get_attrib(self);
1171 if (!attrib)
1172 return NULL;
1173
1174 if (PyDict_SetItem(attrib, key, value) < 0)
1175 return NULL;
1176
1177 Py_RETURN_NONE;
1178}
1179
1180static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001181element_setslice(PyObject* self_, Py_ssize_t start, Py_ssize_t end, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001182{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001183 ElementObject* self = (ElementObject*) self_;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001184 Py_ssize_t i, new, old;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001185 PyObject* recycle = NULL;
1186
1187 if (!self->extra)
1188 element_new_extra(self, NULL);
1189
1190 /* standard clamping */
1191 if (start < 0)
1192 start = 0;
1193 if (end < 0)
1194 end = 0;
1195 if (end > self->extra->length)
1196 end = self->extra->length;
1197 if (start > end)
1198 start = end;
1199
1200 old = end - start;
1201
1202 if (item == NULL)
1203 new = 0;
1204 else if (PyList_CheckExact(item)) {
1205 new = PyList_GET_SIZE(item);
1206 } else {
1207 /* FIXME: support arbitrary sequences? */
1208 PyErr_Format(
1209 PyExc_TypeError,
1210 "expected list, not \"%.200s\"", item->ob_type->tp_name
1211 );
1212 return -1;
1213 }
1214
1215 if (old > 0) {
1216 /* to avoid recursive calls to this method (via decref), move
1217 old items to the recycle bin here, and get rid of them when
1218 we're done modifying the element */
1219 recycle = PyList_New(old);
1220 for (i = 0; i < old; i++)
1221 PyList_SET_ITEM(recycle, i, self->extra->children[i + start]);
1222 }
1223
1224 if (new < old) {
1225 /* delete slice */
1226 for (i = end; i < self->extra->length; i++)
1227 self->extra->children[i + new - old] = self->extra->children[i];
1228 } else if (new > old) {
1229 /* insert slice */
1230 if (element_resize(self, new - old) < 0)
1231 return -1;
1232 for (i = self->extra->length-1; i >= end; i--)
1233 self->extra->children[i + new - old] = self->extra->children[i];
1234 }
1235
1236 /* replace the slice */
1237 for (i = 0; i < new; i++) {
1238 PyObject* element = PyList_GET_ITEM(item, i);
1239 Py_INCREF(element);
1240 self->extra->children[i + start] = element;
1241 }
1242
1243 self->extra->length += new - old;
1244
1245 /* discard the recycle bin, and everything in it */
1246 Py_XDECREF(recycle);
1247
1248 return 0;
1249}
1250
1251static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001252element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001253{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001254 ElementObject* self = (ElementObject*) self_;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001255 int i;
1256 PyObject* old;
1257
1258 if (!self->extra || index < 0 || index >= self->extra->length) {
1259 PyErr_SetString(
1260 PyExc_IndexError,
1261 "child assignment index out of range");
1262 return -1;
1263 }
1264
1265 old = self->extra->children[index];
1266
1267 if (item) {
1268 Py_INCREF(item);
1269 self->extra->children[index] = item;
1270 } else {
1271 self->extra->length--;
1272 for (i = index; i < self->extra->length; i++)
1273 self->extra->children[i] = self->extra->children[i+1];
1274 }
1275
1276 Py_DECREF(old);
1277
1278 return 0;
1279}
1280
1281static PyMethodDef element_methods[] = {
1282
1283 {"clear", (PyCFunction) element_clear, METH_VARARGS},
1284
1285 {"get", (PyCFunction) element_get, METH_VARARGS},
1286 {"set", (PyCFunction) element_set, METH_VARARGS},
1287
1288 {"find", (PyCFunction) element_find, METH_VARARGS},
1289 {"findtext", (PyCFunction) element_findtext, METH_VARARGS},
1290 {"findall", (PyCFunction) element_findall, METH_VARARGS},
1291
1292 {"append", (PyCFunction) element_append, METH_VARARGS},
1293 {"insert", (PyCFunction) element_insert, METH_VARARGS},
1294 {"remove", (PyCFunction) element_remove, METH_VARARGS},
1295
1296 {"getiterator", (PyCFunction) element_getiterator, METH_VARARGS},
1297 {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS},
1298
1299 {"items", (PyCFunction) element_items, METH_VARARGS},
1300 {"keys", (PyCFunction) element_keys, METH_VARARGS},
1301
1302 {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS},
1303
1304 {"__copy__", (PyCFunction) element_copy, METH_VARARGS},
1305 {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS},
1306
1307 /* Some 2.3 and 2.4 versions do not handle the __copy__ method on
1308 C objects correctly, so we have to fake it using a __reduce__-
1309 based hack (see the element_reduce implementation above for
1310 details). */
1311
1312 /* The behaviour has been changed in 2.3.5 and 2.4.1, so we're
1313 using a runtime test to figure out if we need to fake things
1314 or now (see the init code below). The following entry is
1315 enabled only if the hack is needed. */
1316
1317 {"!__reduce__", (PyCFunction) element_reduce, METH_VARARGS},
1318
1319 {NULL, NULL}
1320};
1321
1322static PyObject*
1323element_getattr(ElementObject* self, char* name)
1324{
1325 PyObject* res;
1326
1327 res = Py_FindMethod(element_methods, (PyObject*) self, name);
1328 if (res)
1329 return res;
1330
1331 PyErr_Clear();
1332
1333 if (strcmp(name, "tag") == 0)
1334 res = self->tag;
1335 else if (strcmp(name, "text") == 0)
1336 res = element_get_text(self);
1337 else if (strcmp(name, "tail") == 0) {
1338 res = element_get_tail(self);
1339 } else if (strcmp(name, "attrib") == 0) {
1340 if (!self->extra)
1341 element_new_extra(self, NULL);
1342 res = element_get_attrib(self);
1343 } else {
1344 PyErr_SetString(PyExc_AttributeError, name);
1345 return NULL;
1346 }
1347
1348 if (!res)
1349 return NULL;
1350
1351 Py_INCREF(res);
1352 return res;
1353}
1354
1355static int
1356element_setattr(ElementObject* self, const char* name, PyObject* value)
1357{
1358 if (value == NULL) {
1359 PyErr_SetString(
1360 PyExc_AttributeError,
1361 "can't delete element attributes"
1362 );
1363 return -1;
1364 }
1365
1366 if (strcmp(name, "tag") == 0) {
1367 Py_DECREF(self->tag);
1368 self->tag = value;
1369 Py_INCREF(self->tag);
1370 } else if (strcmp(name, "text") == 0) {
1371 Py_DECREF(JOIN_OBJ(self->text));
1372 self->text = value;
1373 Py_INCREF(self->text);
1374 } else if (strcmp(name, "tail") == 0) {
1375 Py_DECREF(JOIN_OBJ(self->tail));
1376 self->tail = value;
1377 Py_INCREF(self->tail);
1378 } else if (strcmp(name, "attrib") == 0) {
1379 if (!self->extra)
1380 element_new_extra(self, NULL);
1381 Py_DECREF(self->extra->attrib);
1382 self->extra->attrib = value;
1383 Py_INCREF(self->extra->attrib);
1384 } else {
1385 PyErr_SetString(PyExc_AttributeError, name);
1386 return -1;
1387 }
1388
1389 return 0;
1390}
1391
1392static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001393 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001394 0, /* sq_concat */
1395 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001396 element_getitem,
1397 element_getslice,
1398 element_setitem,
1399 element_setslice,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001400};
1401
Neal Norwitz227b5332006-03-22 09:28:35 +00001402static PyTypeObject Element_Type = {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001403 PyObject_HEAD_INIT(NULL)
1404 0, "Element", sizeof(ElementObject), 0,
1405 /* methods */
1406 (destructor)element_dealloc, /* tp_dealloc */
1407 0, /* tp_print */
1408 (getattrfunc)element_getattr, /* tp_getattr */
1409 (setattrfunc)element_setattr, /* tp_setattr */
1410 0, /* tp_compare */
1411 (reprfunc)element_repr, /* tp_repr */
1412 0, /* tp_as_number */
1413 &element_as_sequence, /* tp_as_sequence */
1414};
1415
1416/* ==================================================================== */
1417/* the tree builder type */
1418
1419typedef struct {
1420 PyObject_HEAD
1421
1422 PyObject* root; /* root node (first created node) */
1423
1424 ElementObject* this; /* current node */
1425 ElementObject* last; /* most recently created node */
1426
1427 PyObject* data; /* data collector (string or list), or NULL */
1428
1429 PyObject* stack; /* element stack */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001430 Py_ssize_t index; /* current stack size (0=empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001431
1432 /* element tracing */
1433 PyObject* events; /* list of events, or NULL if not collecting */
1434 PyObject* start_event_obj; /* event objects (NULL to ignore) */
1435 PyObject* end_event_obj;
1436 PyObject* start_ns_event_obj;
1437 PyObject* end_ns_event_obj;
1438
1439} TreeBuilderObject;
1440
Neal Norwitz227b5332006-03-22 09:28:35 +00001441static PyTypeObject TreeBuilder_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001442
1443#define TreeBuilder_CheckExact(op) ((op)->ob_type == &TreeBuilder_Type)
1444
1445/* -------------------------------------------------------------------- */
1446/* constructor and destructor */
1447
1448LOCAL(PyObject*)
1449treebuilder_new(void)
1450{
1451 TreeBuilderObject* self;
1452
1453 self = PyObject_New(TreeBuilderObject, &TreeBuilder_Type);
1454 if (self == NULL)
1455 return NULL;
1456
1457 self->root = NULL;
1458
1459 Py_INCREF(Py_None);
1460 self->this = (ElementObject*) Py_None;
1461
1462 Py_INCREF(Py_None);
1463 self->last = (ElementObject*) Py_None;
1464
1465 self->data = NULL;
1466
1467 self->stack = PyList_New(20);
1468 self->index = 0;
1469
1470 self->events = NULL;
1471 self->start_event_obj = self->end_event_obj = NULL;
1472 self->start_ns_event_obj = self->end_ns_event_obj = NULL;
1473
1474 ALLOC(sizeof(TreeBuilderObject), "create treebuilder");
1475
1476 return (PyObject*) self;
1477}
1478
1479static PyObject*
Thomas Wouters73e5a5b2006-06-08 15:35:45 +00001480treebuilder(PyObject* self_, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001481{
1482 if (!PyArg_ParseTuple(args, ":TreeBuilder"))
1483 return NULL;
1484
1485 return treebuilder_new();
1486}
1487
1488static void
1489treebuilder_dealloc(TreeBuilderObject* self)
1490{
1491 Py_XDECREF(self->end_ns_event_obj);
1492 Py_XDECREF(self->start_ns_event_obj);
1493 Py_XDECREF(self->end_event_obj);
1494 Py_XDECREF(self->start_event_obj);
1495 Py_XDECREF(self->events);
1496 Py_DECREF(self->stack);
1497 Py_XDECREF(self->data);
1498 Py_DECREF(self->last);
1499 Py_DECREF(self->this);
1500 Py_XDECREF(self->root);
1501
1502 RELEASE(sizeof(TreeBuilderObject), "destroy treebuilder");
1503
1504 PyObject_Del(self);
1505}
1506
1507/* -------------------------------------------------------------------- */
1508/* handlers */
1509
1510LOCAL(PyObject*)
1511treebuilder_handle_xml(TreeBuilderObject* self, PyObject* encoding,
1512 PyObject* standalone)
1513{
1514 Py_RETURN_NONE;
1515}
1516
1517LOCAL(PyObject*)
1518treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
1519 PyObject* attrib)
1520{
1521 PyObject* node;
1522 PyObject* this;
1523
1524 if (self->data) {
1525 if (self->this == self->last) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001526 Py_DECREF(JOIN_OBJ(self->last->text));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001527 self->last->text = JOIN_SET(
1528 self->data, PyList_CheckExact(self->data)
1529 );
1530 } else {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001531 Py_DECREF(JOIN_OBJ(self->last->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001532 self->last->tail = JOIN_SET(
1533 self->data, PyList_CheckExact(self->data)
1534 );
1535 }
1536 self->data = NULL;
1537 }
1538
1539 node = element_new(tag, attrib);
1540 if (!node)
1541 return NULL;
1542
1543 this = (PyObject*) self->this;
1544
1545 if (this != Py_None) {
1546 if (element_add_subelement((ElementObject*) this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001547 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001548 } else {
1549 if (self->root) {
1550 PyErr_SetString(
1551 PyExc_SyntaxError,
1552 "multiple elements on top level"
1553 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001554 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001555 }
1556 Py_INCREF(node);
1557 self->root = node;
1558 }
1559
1560 if (self->index < PyList_GET_SIZE(self->stack)) {
1561 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001562 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001563 Py_INCREF(this);
1564 } else {
1565 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001566 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001567 }
1568 self->index++;
1569
1570 Py_DECREF(this);
1571 Py_INCREF(node);
1572 self->this = (ElementObject*) node;
1573
1574 Py_DECREF(self->last);
1575 Py_INCREF(node);
1576 self->last = (ElementObject*) node;
1577
1578 if (self->start_event_obj) {
1579 PyObject* res;
1580 PyObject* action = self->start_event_obj;
1581 res = PyTuple_New(2);
1582 if (res) {
1583 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
1584 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
1585 PyList_Append(self->events, res);
1586 Py_DECREF(res);
1587 } else
1588 PyErr_Clear(); /* FIXME: propagate error */
1589 }
1590
1591 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001592
1593 error:
1594 Py_DECREF(node);
1595 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001596}
1597
1598LOCAL(PyObject*)
1599treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
1600{
1601 if (!self->data) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001602 if (self->last == (ElementObject*) Py_None) {
1603 /* ignore calls to data before the first call to start */
1604 Py_RETURN_NONE;
1605 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001606 /* store the first item as is */
1607 Py_INCREF(data); self->data = data;
1608 } else {
1609 /* more than one item; use a list to collect items */
1610 if (PyString_CheckExact(self->data) && self->data->ob_refcnt == 1 &&
1611 PyString_CheckExact(data) && PyString_GET_SIZE(data) == 1) {
1612 /* expat often generates single character data sections; handle
1613 the most common case by resizing the existing string... */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001614 Py_ssize_t size = PyString_GET_SIZE(self->data);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001615 if (_PyString_Resize(&self->data, size + 1) < 0)
1616 return NULL;
1617 PyString_AS_STRING(self->data)[size] = PyString_AS_STRING(data)[0];
1618 } else if (PyList_CheckExact(self->data)) {
1619 if (PyList_Append(self->data, data) < 0)
1620 return NULL;
1621 } else {
1622 PyObject* list = PyList_New(2);
1623 if (!list)
1624 return NULL;
1625 PyList_SET_ITEM(list, 0, self->data);
1626 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
1627 self->data = list;
1628 }
1629 }
1630
1631 Py_RETURN_NONE;
1632}
1633
1634LOCAL(PyObject*)
1635treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
1636{
1637 PyObject* item;
1638
1639 if (self->data) {
1640 if (self->this == self->last) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001641 Py_DECREF(JOIN_OBJ(self->last->text));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001642 self->last->text = JOIN_SET(
1643 self->data, PyList_CheckExact(self->data)
1644 );
1645 } else {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001646 Py_DECREF(JOIN_OBJ(self->last->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001647 self->last->tail = JOIN_SET(
1648 self->data, PyList_CheckExact(self->data)
1649 );
1650 }
1651 self->data = NULL;
1652 }
1653
1654 if (self->index == 0) {
1655 PyErr_SetString(
1656 PyExc_IndexError,
1657 "pop from empty stack"
1658 );
1659 return NULL;
1660 }
1661
1662 self->index--;
1663
1664 item = PyList_GET_ITEM(self->stack, self->index);
1665 Py_INCREF(item);
1666
1667 Py_DECREF(self->last);
1668
1669 self->last = (ElementObject*) self->this;
1670 self->this = (ElementObject*) item;
1671
1672 if (self->end_event_obj) {
1673 PyObject* res;
1674 PyObject* action = self->end_event_obj;
1675 PyObject* node = (PyObject*) self->last;
1676 res = PyTuple_New(2);
1677 if (res) {
1678 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
1679 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
1680 PyList_Append(self->events, res);
1681 Py_DECREF(res);
1682 } else
1683 PyErr_Clear(); /* FIXME: propagate error */
1684 }
1685
1686 Py_INCREF(self->last);
1687 return (PyObject*) self->last;
1688}
1689
1690LOCAL(void)
1691treebuilder_handle_namespace(TreeBuilderObject* self, int start,
1692 const char* prefix, const char *uri)
1693{
1694 PyObject* res;
1695 PyObject* action;
1696 PyObject* parcel;
1697
1698 if (!self->events)
1699 return;
1700
1701 if (start) {
1702 if (!self->start_ns_event_obj)
1703 return;
1704 action = self->start_ns_event_obj;
1705 /* FIXME: prefix and uri use utf-8 encoding! */
1706 parcel = Py_BuildValue("ss", (prefix) ? prefix : "", uri);
1707 if (!parcel)
1708 return;
1709 Py_INCREF(action);
1710 } else {
1711 if (!self->end_ns_event_obj)
1712 return;
1713 action = self->end_ns_event_obj;
1714 Py_INCREF(action);
1715 parcel = Py_None;
1716 Py_INCREF(parcel);
1717 }
1718
1719 res = PyTuple_New(2);
1720
1721 if (res) {
1722 PyTuple_SET_ITEM(res, 0, action);
1723 PyTuple_SET_ITEM(res, 1, parcel);
1724 PyList_Append(self->events, res);
1725 Py_DECREF(res);
1726 } else
1727 PyErr_Clear(); /* FIXME: propagate error */
1728}
1729
1730/* -------------------------------------------------------------------- */
1731/* methods (in alphabetical order) */
1732
1733static PyObject*
1734treebuilder_data(TreeBuilderObject* self, PyObject* args)
1735{
1736 PyObject* data;
1737 if (!PyArg_ParseTuple(args, "O:data", &data))
1738 return NULL;
1739
1740 return treebuilder_handle_data(self, data);
1741}
1742
1743static PyObject*
1744treebuilder_end(TreeBuilderObject* self, PyObject* args)
1745{
1746 PyObject* tag;
1747 if (!PyArg_ParseTuple(args, "O:end", &tag))
1748 return NULL;
1749
1750 return treebuilder_handle_end(self, tag);
1751}
1752
1753LOCAL(PyObject*)
1754treebuilder_done(TreeBuilderObject* self)
1755{
1756 PyObject* res;
1757
1758 /* FIXME: check stack size? */
1759
1760 if (self->root)
1761 res = self->root;
1762 else
1763 res = Py_None;
1764
1765 Py_INCREF(res);
1766 return res;
1767}
1768
1769static PyObject*
1770treebuilder_close(TreeBuilderObject* self, PyObject* args)
1771{
1772 if (!PyArg_ParseTuple(args, ":close"))
1773 return NULL;
1774
1775 return treebuilder_done(self);
1776}
1777
1778static PyObject*
1779treebuilder_start(TreeBuilderObject* self, PyObject* args)
1780{
1781 PyObject* tag;
1782 PyObject* attrib = Py_None;
1783 if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib))
1784 return NULL;
1785
1786 return treebuilder_handle_start(self, tag, attrib);
1787}
1788
1789static PyObject*
1790treebuilder_xml(TreeBuilderObject* self, PyObject* args)
1791{
1792 PyObject* encoding;
1793 PyObject* standalone;
1794 if (!PyArg_ParseTuple(args, "OO:xml", &encoding, &standalone))
1795 return NULL;
1796
1797 return treebuilder_handle_xml(self, encoding, standalone);
1798}
1799
1800static PyMethodDef treebuilder_methods[] = {
1801 {"data", (PyCFunction) treebuilder_data, METH_VARARGS},
1802 {"start", (PyCFunction) treebuilder_start, METH_VARARGS},
1803 {"end", (PyCFunction) treebuilder_end, METH_VARARGS},
1804 {"xml", (PyCFunction) treebuilder_xml, METH_VARARGS},
1805 {"close", (PyCFunction) treebuilder_close, METH_VARARGS},
1806 {NULL, NULL}
1807};
1808
1809static PyObject*
1810treebuilder_getattr(TreeBuilderObject* self, char* name)
1811{
1812 return Py_FindMethod(treebuilder_methods, (PyObject*) self, name);
1813}
1814
Neal Norwitz227b5332006-03-22 09:28:35 +00001815static PyTypeObject TreeBuilder_Type = {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001816 PyObject_HEAD_INIT(NULL)
1817 0, "TreeBuilder", sizeof(TreeBuilderObject), 0,
1818 /* methods */
1819 (destructor)treebuilder_dealloc, /* tp_dealloc */
1820 0, /* tp_print */
1821 (getattrfunc)treebuilder_getattr, /* tp_getattr */
1822};
1823
1824/* ==================================================================== */
1825/* the expat interface */
1826
1827#if defined(USE_EXPAT)
1828
1829#include "expat.h"
1830
1831#if defined(USE_PYEXPAT_CAPI)
1832#include "pyexpat.h"
1833static struct PyExpat_CAPI* expat_capi;
1834#define EXPAT(func) (expat_capi->func)
1835#else
1836#define EXPAT(func) (XML_##func)
1837#endif
1838
1839typedef struct {
1840 PyObject_HEAD
1841
1842 XML_Parser parser;
1843
1844 PyObject* target;
1845 PyObject* entity;
1846
1847 PyObject* names;
1848
1849 PyObject* handle_xml;
1850 PyObject* handle_start;
1851 PyObject* handle_data;
1852 PyObject* handle_end;
1853
1854 PyObject* handle_comment;
1855 PyObject* handle_pi;
1856
1857} XMLParserObject;
1858
Neal Norwitz227b5332006-03-22 09:28:35 +00001859static PyTypeObject XMLParser_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001860
1861/* helpers */
1862
1863#if defined(Py_USING_UNICODE)
1864LOCAL(int)
1865checkstring(const char* string, int size)
1866{
1867 int i;
1868
1869 /* check if an 8-bit string contains UTF-8 characters */
1870 for (i = 0; i < size; i++)
1871 if (string[i] & 0x80)
1872 return 1;
1873
1874 return 0;
1875}
1876#endif
1877
1878LOCAL(PyObject*)
1879makestring(const char* string, int size)
1880{
1881 /* convert a UTF-8 string to either a 7-bit ascii string or a
1882 Unicode string */
1883
1884#if defined(Py_USING_UNICODE)
1885 if (checkstring(string, size))
1886 return PyUnicode_DecodeUTF8(string, size, "strict");
1887#endif
1888
1889 return PyString_FromStringAndSize(string, size);
1890}
1891
1892LOCAL(PyObject*)
1893makeuniversal(XMLParserObject* self, const char* string)
1894{
1895 /* convert a UTF-8 tag/attribute name from the expat parser
1896 to a universal name string */
1897
1898 int size = strlen(string);
1899 PyObject* key;
1900 PyObject* value;
1901
1902 /* look the 'raw' name up in the names dictionary */
1903 key = PyString_FromStringAndSize(string, size);
1904 if (!key)
1905 return NULL;
1906
1907 value = PyDict_GetItem(self->names, key);
1908
1909 if (value) {
1910 Py_INCREF(value);
1911 } else {
1912 /* new name. convert to universal name, and decode as
1913 necessary */
1914
1915 PyObject* tag;
1916 char* p;
1917 int i;
1918
1919 /* look for namespace separator */
1920 for (i = 0; i < size; i++)
1921 if (string[i] == '}')
1922 break;
1923 if (i != size) {
1924 /* convert to universal name */
1925 tag = PyString_FromStringAndSize(NULL, size+1);
1926 p = PyString_AS_STRING(tag);
1927 p[0] = '{';
1928 memcpy(p+1, string, size);
1929 size++;
1930 } else {
1931 /* plain name; use key as tag */
1932 Py_INCREF(key);
1933 tag = key;
1934 }
1935
1936 /* decode universal name */
1937#if defined(Py_USING_UNICODE)
1938 /* inline makestring, to avoid duplicating the source string if
1939 it's not an utf-8 string */
1940 p = PyString_AS_STRING(tag);
1941 if (checkstring(p, size)) {
1942 value = PyUnicode_DecodeUTF8(p, size, "strict");
1943 Py_DECREF(tag);
1944 if (!value) {
1945 Py_DECREF(key);
1946 return NULL;
1947 }
1948 } else
1949#endif
1950 value = tag; /* use tag as is */
1951
1952 /* add to names dictionary */
1953 if (PyDict_SetItem(self->names, key, value) < 0) {
1954 Py_DECREF(key);
1955 Py_DECREF(value);
1956 return NULL;
1957 }
1958 }
1959
1960 Py_DECREF(key);
1961 return value;
1962}
1963
1964/* -------------------------------------------------------------------- */
1965/* handlers */
1966
1967static void
1968expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
1969 int data_len)
1970{
1971 PyObject* key;
1972 PyObject* value;
1973 PyObject* res;
1974
1975 if (data_len < 2 || data_in[0] != '&')
1976 return;
1977
1978 key = makestring(data_in + 1, data_len - 2);
1979 if (!key)
1980 return;
1981
1982 value = PyDict_GetItem(self->entity, key);
1983
1984 if (value) {
1985 if (TreeBuilder_CheckExact(self->target))
1986 res = treebuilder_handle_data(
1987 (TreeBuilderObject*) self->target, value
1988 );
1989 else if (self->handle_data)
1990 res = PyObject_CallFunction(self->handle_data, "O", value);
1991 else
1992 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001993 Py_XDECREF(res);
1994 } else {
1995 PyErr_Format(
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001996 PyExc_SyntaxError, "undefined entity &%s;: line %ld, column %ld",
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001997 PyString_AS_STRING(key),
1998 EXPAT(GetErrorLineNumber)(self->parser),
1999 EXPAT(GetErrorColumnNumber)(self->parser)
2000 );
2001 }
2002
2003 Py_DECREF(key);
2004}
2005
2006static void
2007expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2008 const XML_Char **attrib_in)
2009{
2010 PyObject* res;
2011 PyObject* tag;
2012 PyObject* attrib;
2013 int ok;
2014
2015 /* tag name */
2016 tag = makeuniversal(self, tag_in);
2017 if (!tag)
2018 return; /* parser will look for errors */
2019
2020 /* attributes */
2021 if (attrib_in[0]) {
2022 attrib = PyDict_New();
2023 if (!attrib)
2024 return;
2025 while (attrib_in[0] && attrib_in[1]) {
2026 PyObject* key = makeuniversal(self, attrib_in[0]);
2027 PyObject* value = makestring(attrib_in[1], strlen(attrib_in[1]));
2028 if (!key || !value) {
2029 Py_XDECREF(value);
2030 Py_XDECREF(key);
2031 Py_DECREF(attrib);
2032 return;
2033 }
2034 ok = PyDict_SetItem(attrib, key, value);
2035 Py_DECREF(value);
2036 Py_DECREF(key);
2037 if (ok < 0) {
2038 Py_DECREF(attrib);
2039 return;
2040 }
2041 attrib_in += 2;
2042 }
2043 } else {
2044 Py_INCREF(Py_None);
2045 attrib = Py_None;
2046 }
2047
2048 if (TreeBuilder_CheckExact(self->target))
2049 /* shortcut */
2050 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2051 tag, attrib);
2052 else if (self->handle_start)
2053 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
2054 else
2055 res = NULL;
2056
2057 Py_DECREF(tag);
2058 Py_DECREF(attrib);
2059
2060 Py_XDECREF(res);
2061}
2062
2063static void
2064expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2065 int data_len)
2066{
2067 PyObject* data;
2068 PyObject* res;
2069
2070 data = makestring(data_in, data_len);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002071 if (!data)
2072 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002073
2074 if (TreeBuilder_CheckExact(self->target))
2075 /* shortcut */
2076 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
2077 else if (self->handle_data)
2078 res = PyObject_CallFunction(self->handle_data, "O", data);
2079 else
2080 res = NULL;
2081
2082 Py_DECREF(data);
2083
2084 Py_XDECREF(res);
2085}
2086
2087static void
2088expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
2089{
2090 PyObject* tag;
2091 PyObject* res = NULL;
2092
2093 if (TreeBuilder_CheckExact(self->target))
2094 /* shortcut */
2095 /* the standard tree builder doesn't look at the end tag */
2096 res = treebuilder_handle_end(
2097 (TreeBuilderObject*) self->target, Py_None
2098 );
2099 else if (self->handle_end) {
2100 tag = makeuniversal(self, tag_in);
2101 if (tag) {
2102 res = PyObject_CallFunction(self->handle_end, "O", tag);
2103 Py_DECREF(tag);
2104 }
2105 }
2106
2107 Py_XDECREF(res);
2108}
2109
2110static void
2111expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
2112 const XML_Char *uri)
2113{
2114 treebuilder_handle_namespace(
2115 (TreeBuilderObject*) self->target, 1, prefix, uri
2116 );
2117}
2118
2119static void
2120expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
2121{
2122 treebuilder_handle_namespace(
2123 (TreeBuilderObject*) self->target, 0, NULL, NULL
2124 );
2125}
2126
2127static void
2128expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
2129{
2130 PyObject* comment;
2131 PyObject* res;
2132
2133 if (self->handle_comment) {
2134 comment = makestring(comment_in, strlen(comment_in));
2135 if (comment) {
2136 res = PyObject_CallFunction(self->handle_comment, "O", comment);
2137 Py_XDECREF(res);
2138 Py_DECREF(comment);
2139 }
2140 }
2141}
2142
2143static void
2144expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
2145 const XML_Char* data_in)
2146{
2147 PyObject* target;
2148 PyObject* data;
2149 PyObject* res;
2150
2151 if (self->handle_pi) {
2152 target = makestring(target_in, strlen(target_in));
2153 data = makestring(data_in, strlen(data_in));
2154 if (target && data) {
2155 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
2156 Py_XDECREF(res);
2157 Py_DECREF(data);
2158 Py_DECREF(target);
2159 } else {
2160 Py_XDECREF(data);
2161 Py_XDECREF(target);
2162 }
2163 }
2164}
2165
2166#if defined(Py_USING_UNICODE)
2167static int
2168expat_unknown_encoding_handler(XMLParserObject *self, const XML_Char *name,
2169 XML_Encoding *info)
2170{
2171 PyObject* u;
2172 Py_UNICODE* p;
2173 unsigned char s[256];
2174 int i;
2175
2176 memset(info, 0, sizeof(XML_Encoding));
2177
2178 for (i = 0; i < 256; i++)
2179 s[i] = i;
2180
Fredrik Lundhc3389992005-12-25 11:40:19 +00002181 u = PyUnicode_Decode((char*) s, 256, name, "replace");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002182 if (!u)
2183 return XML_STATUS_ERROR;
2184
2185 if (PyUnicode_GET_SIZE(u) != 256) {
2186 Py_DECREF(u);
2187 return XML_STATUS_ERROR;
2188 }
2189
2190 p = PyUnicode_AS_UNICODE(u);
2191
2192 for (i = 0; i < 256; i++) {
2193 if (p[i] != Py_UNICODE_REPLACEMENT_CHARACTER)
2194 info->map[i] = p[i];
2195 else
2196 info->map[i] = -1;
2197 }
2198
2199 Py_DECREF(u);
2200
2201 return XML_STATUS_OK;
2202}
2203#endif
2204
2205/* -------------------------------------------------------------------- */
2206/* constructor and destructor */
2207
2208static PyObject*
Thomas Wouters73e5a5b2006-06-08 15:35:45 +00002209xmlparser(PyObject* self_, PyObject* args, PyObject* kw)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002210{
2211 XMLParserObject* self;
2212 /* FIXME: does this need to be static? */
2213 static XML_Memory_Handling_Suite memory_handler;
2214
2215 PyObject* target = NULL;
2216 char* encoding = NULL;
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +00002217 static char* kwlist[] = { "target", "encoding", NULL };
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002218 if (!PyArg_ParseTupleAndKeywords(args, kw, "|Oz:XMLParser", kwlist,
2219 &target, &encoding))
2220 return NULL;
2221
2222#if defined(USE_PYEXPAT_CAPI)
2223 if (!expat_capi) {
2224 PyErr_SetString(
2225 PyExc_RuntimeError, "cannot load dispatch table from pyexpat"
2226 );
2227 return NULL;
2228 }
2229#endif
2230
2231 self = PyObject_New(XMLParserObject, &XMLParser_Type);
2232 if (self == NULL)
2233 return NULL;
2234
2235 self->entity = PyDict_New();
2236 if (!self->entity) {
2237 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002238 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002239 }
2240
2241 self->names = PyDict_New();
2242 if (!self->names) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002243 PyObject_Del(self->entity);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002244 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002245 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002246 }
2247
2248 memory_handler.malloc_fcn = PyObject_Malloc;
2249 memory_handler.realloc_fcn = PyObject_Realloc;
2250 memory_handler.free_fcn = PyObject_Free;
2251
2252 self->parser = EXPAT(ParserCreate_MM)(encoding, &memory_handler, "}");
2253 if (!self->parser) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002254 PyObject_Del(self->names);
2255 PyObject_Del(self->entity);
2256 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002257 PyErr_NoMemory();
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002258 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002259 }
2260
2261 /* setup target handlers */
2262 if (!target) {
2263 target = treebuilder_new();
2264 if (!target) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002265 EXPAT(ParserFree)(self->parser);
2266 PyObject_Del(self->names);
2267 PyObject_Del(self->entity);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002268 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002269 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002270 }
2271 } else
2272 Py_INCREF(target);
2273 self->target = target;
2274
2275 self->handle_xml = PyObject_GetAttrString(target, "xml");
2276 self->handle_start = PyObject_GetAttrString(target, "start");
2277 self->handle_data = PyObject_GetAttrString(target, "data");
2278 self->handle_end = PyObject_GetAttrString(target, "end");
2279 self->handle_comment = PyObject_GetAttrString(target, "comment");
2280 self->handle_pi = PyObject_GetAttrString(target, "pi");
2281
2282 PyErr_Clear();
2283
2284 /* configure parser */
2285 EXPAT(SetUserData)(self->parser, self);
2286 EXPAT(SetElementHandler)(
2287 self->parser,
2288 (XML_StartElementHandler) expat_start_handler,
2289 (XML_EndElementHandler) expat_end_handler
2290 );
2291 EXPAT(SetDefaultHandlerExpand)(
2292 self->parser,
2293 (XML_DefaultHandler) expat_default_handler
2294 );
2295 EXPAT(SetCharacterDataHandler)(
2296 self->parser,
2297 (XML_CharacterDataHandler) expat_data_handler
2298 );
2299 if (self->handle_comment)
2300 EXPAT(SetCommentHandler)(
2301 self->parser,
2302 (XML_CommentHandler) expat_comment_handler
2303 );
2304 if (self->handle_pi)
2305 EXPAT(SetProcessingInstructionHandler)(
2306 self->parser,
2307 (XML_ProcessingInstructionHandler) expat_pi_handler
2308 );
2309#if defined(Py_USING_UNICODE)
2310 EXPAT(SetUnknownEncodingHandler)(
2311 self->parser,
2312 (XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL
2313 );
2314#endif
2315
2316 ALLOC(sizeof(XMLParserObject), "create expatparser");
2317
2318 return (PyObject*) self;
2319}
2320
2321static void
2322xmlparser_dealloc(XMLParserObject* self)
2323{
2324 EXPAT(ParserFree)(self->parser);
2325
2326 Py_XDECREF(self->handle_pi);
2327 Py_XDECREF(self->handle_comment);
2328 Py_XDECREF(self->handle_end);
2329 Py_XDECREF(self->handle_data);
2330 Py_XDECREF(self->handle_start);
2331 Py_XDECREF(self->handle_xml);
2332
2333 Py_DECREF(self->target);
2334 Py_DECREF(self->entity);
2335 Py_DECREF(self->names);
2336
2337 RELEASE(sizeof(XMLParserObject), "destroy expatparser");
2338
2339 PyObject_Del(self);
2340}
2341
2342/* -------------------------------------------------------------------- */
2343/* methods (in alphabetical order) */
2344
2345LOCAL(PyObject*)
2346expat_parse(XMLParserObject* self, char* data, int data_len, int final)
2347{
2348 int ok;
2349
2350 ok = EXPAT(Parse)(self->parser, data, data_len, final);
2351
2352 if (PyErr_Occurred())
2353 return NULL;
2354
2355 if (!ok) {
2356 PyErr_Format(
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002357 PyExc_SyntaxError, "%s: line %ld, column %ld",
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002358 EXPAT(ErrorString)(EXPAT(GetErrorCode)(self->parser)),
2359 EXPAT(GetErrorLineNumber)(self->parser),
2360 EXPAT(GetErrorColumnNumber)(self->parser)
2361 );
2362 return NULL;
2363 }
2364
2365 Py_RETURN_NONE;
2366}
2367
2368static PyObject*
2369xmlparser_close(XMLParserObject* self, PyObject* args)
2370{
2371 /* end feeding data to parser */
2372
2373 PyObject* res;
2374 if (!PyArg_ParseTuple(args, ":close"))
2375 return NULL;
2376
2377 res = expat_parse(self, "", 0, 1);
2378
2379 if (res && TreeBuilder_CheckExact(self->target)) {
2380 Py_DECREF(res);
2381 return treebuilder_done((TreeBuilderObject*) self->target);
2382 }
2383
2384 return res;
2385}
2386
2387static PyObject*
2388xmlparser_feed(XMLParserObject* self, PyObject* args)
2389{
2390 /* feed data to parser */
2391
2392 char* data;
2393 int data_len;
2394 if (!PyArg_ParseTuple(args, "s#:feed", &data, &data_len))
2395 return NULL;
2396
2397 return expat_parse(self, data, data_len, 0);
2398}
2399
2400static PyObject*
2401xmlparser_parse(XMLParserObject* self, PyObject* args)
2402{
2403 /* (internal) parse until end of input stream */
2404
2405 PyObject* reader;
2406 PyObject* buffer;
2407 PyObject* res;
2408
2409 PyObject* fileobj;
2410 if (!PyArg_ParseTuple(args, "O:_parse", &fileobj))
2411 return NULL;
2412
2413 reader = PyObject_GetAttrString(fileobj, "read");
2414 if (!reader)
2415 return NULL;
2416
2417 /* read from open file object */
2418 for (;;) {
2419
2420 buffer = PyObject_CallFunction(reader, "i", 64*1024);
2421
2422 if (!buffer) {
2423 /* read failed (e.g. due to KeyboardInterrupt) */
2424 Py_DECREF(reader);
2425 return NULL;
2426 }
2427
2428 if (!PyString_CheckExact(buffer) || PyString_GET_SIZE(buffer) == 0) {
2429 Py_DECREF(buffer);
2430 break;
2431 }
2432
2433 res = expat_parse(
2434 self, PyString_AS_STRING(buffer), PyString_GET_SIZE(buffer), 0
2435 );
2436
2437 Py_DECREF(buffer);
2438
2439 if (!res) {
2440 Py_DECREF(reader);
2441 return NULL;
2442 }
2443 Py_DECREF(res);
2444
2445 }
2446
2447 Py_DECREF(reader);
2448
2449 res = expat_parse(self, "", 0, 1);
2450
2451 if (res && TreeBuilder_CheckExact(self->target)) {
2452 Py_DECREF(res);
2453 return treebuilder_done((TreeBuilderObject*) self->target);
2454 }
2455
2456 return res;
2457}
2458
2459static PyObject*
2460xmlparser_setevents(XMLParserObject* self, PyObject* args)
2461{
2462 /* activate element event reporting */
2463
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002464 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002465 TreeBuilderObject* target;
2466
2467 PyObject* events; /* event collector */
2468 PyObject* event_set = Py_None;
2469 if (!PyArg_ParseTuple(args, "O!|O:_setevents", &PyList_Type, &events,
2470 &event_set))
2471 return NULL;
2472
2473 if (!TreeBuilder_CheckExact(self->target)) {
2474 PyErr_SetString(
2475 PyExc_TypeError,
2476 "event handling only supported for cElementTree.Treebuilder "
2477 "targets"
2478 );
2479 return NULL;
2480 }
2481
2482 target = (TreeBuilderObject*) self->target;
2483
2484 Py_INCREF(events);
2485 Py_XDECREF(target->events);
2486 target->events = events;
2487
2488 /* clear out existing events */
2489 Py_XDECREF(target->start_event_obj); target->start_event_obj = NULL;
2490 Py_XDECREF(target->end_event_obj); target->end_event_obj = NULL;
2491 Py_XDECREF(target->start_ns_event_obj); target->start_ns_event_obj = NULL;
2492 Py_XDECREF(target->end_ns_event_obj); target->end_ns_event_obj = NULL;
2493
2494 if (event_set == Py_None) {
2495 /* default is "end" only */
2496 target->end_event_obj = PyString_FromString("end");
2497 Py_RETURN_NONE;
2498 }
2499
2500 if (!PyTuple_Check(event_set)) /* FIXME: handle arbitrary sequences */
2501 goto error;
2502
2503 for (i = 0; i < PyTuple_GET_SIZE(event_set); i++) {
2504 PyObject* item = PyTuple_GET_ITEM(event_set, i);
2505 char* event;
2506 if (!PyString_Check(item))
2507 goto error;
2508 event = PyString_AS_STRING(item);
2509 if (strcmp(event, "start") == 0) {
2510 Py_INCREF(item);
2511 target->start_event_obj = item;
2512 } else if (strcmp(event, "end") == 0) {
2513 Py_INCREF(item);
2514 Py_XDECREF(target->end_event_obj);
2515 target->end_event_obj = item;
2516 } else if (strcmp(event, "start-ns") == 0) {
2517 Py_INCREF(item);
2518 Py_XDECREF(target->start_ns_event_obj);
2519 target->start_ns_event_obj = item;
2520 EXPAT(SetNamespaceDeclHandler)(
2521 self->parser,
2522 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2523 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2524 );
2525 } else if (strcmp(event, "end-ns") == 0) {
2526 Py_INCREF(item);
2527 Py_XDECREF(target->end_ns_event_obj);
2528 target->end_ns_event_obj = item;
2529 EXPAT(SetNamespaceDeclHandler)(
2530 self->parser,
2531 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2532 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2533 );
2534 } else {
2535 PyErr_Format(
2536 PyExc_ValueError,
2537 "unknown event '%s'", event
2538 );
2539 return NULL;
2540 }
2541 }
2542
2543 Py_RETURN_NONE;
2544
2545 error:
2546 PyErr_SetString(
2547 PyExc_TypeError,
2548 "invalid event tuple"
2549 );
2550 return NULL;
2551}
2552
2553static PyMethodDef xmlparser_methods[] = {
2554 {"feed", (PyCFunction) xmlparser_feed, METH_VARARGS},
2555 {"close", (PyCFunction) xmlparser_close, METH_VARARGS},
2556 {"_parse", (PyCFunction) xmlparser_parse, METH_VARARGS},
2557 {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
2558 {NULL, NULL}
2559};
2560
2561static PyObject*
2562xmlparser_getattr(XMLParserObject* self, char* name)
2563{
2564 PyObject* res;
2565
2566 res = Py_FindMethod(xmlparser_methods, (PyObject*) self, name);
2567 if (res)
2568 return res;
2569
2570 PyErr_Clear();
2571
2572 if (strcmp(name, "entity") == 0)
2573 res = self->entity;
2574 else if (strcmp(name, "target") == 0)
2575 res = self->target;
2576 else if (strcmp(name, "version") == 0) {
2577 char buffer[100];
2578 sprintf(buffer, "Expat %d.%d.%d", XML_MAJOR_VERSION,
2579 XML_MINOR_VERSION, XML_MICRO_VERSION);
2580 return PyString_FromString(buffer);
2581 } else {
2582 PyErr_SetString(PyExc_AttributeError, name);
2583 return NULL;
2584 }
2585
2586 Py_INCREF(res);
2587 return res;
2588}
2589
Neal Norwitz227b5332006-03-22 09:28:35 +00002590static PyTypeObject XMLParser_Type = {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002591 PyObject_HEAD_INIT(NULL)
2592 0, "XMLParser", sizeof(XMLParserObject), 0,
2593 /* methods */
2594 (destructor)xmlparser_dealloc, /* tp_dealloc */
2595 0, /* tp_print */
2596 (getattrfunc)xmlparser_getattr, /* tp_getattr */
2597};
2598
2599#endif
2600
2601/* ==================================================================== */
2602/* python module interface */
2603
2604static PyMethodDef _functions[] = {
2605 {"Element", (PyCFunction) element, METH_VARARGS|METH_KEYWORDS},
2606 {"SubElement", (PyCFunction) subelement, METH_VARARGS|METH_KEYWORDS},
2607 {"TreeBuilder", (PyCFunction) treebuilder, METH_VARARGS},
2608#if defined(USE_EXPAT)
2609 {"XMLParser", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
2610 {"XMLTreeBuilder", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
2611#endif
2612 {NULL, NULL}
2613};
2614
Neal Norwitzf6657e62006-12-28 04:47:50 +00002615PyMODINIT_FUNC
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002616init_elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002617{
2618 PyObject* m;
2619 PyObject* g;
2620 char* bootstrap;
2621#if defined(USE_PYEXPAT_CAPI)
2622 struct PyExpat_CAPI* capi;
2623#endif
2624
2625 /* Patch object type */
2626 Element_Type.ob_type = TreeBuilder_Type.ob_type = &PyType_Type;
2627#if defined(USE_EXPAT)
2628 XMLParser_Type.ob_type = &PyType_Type;
2629#endif
2630
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002631 m = Py_InitModule("_elementtree", _functions);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002632 if (!m)
2633 return;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002634
2635 /* python glue code */
2636
2637 g = PyDict_New();
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002638 if (!g)
2639 return;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002640
2641 PyDict_SetItemString(g, "__builtins__", PyEval_GetBuiltins());
2642
2643 bootstrap = (
2644
2645#if (PY_VERSION_HEX >= 0x02020000 && PY_VERSION_HEX < 0x02030000)
2646 "from __future__ import generators\n" /* enable yield under 2.2 */
2647#endif
2648
2649 "from copy import copy, deepcopy\n"
2650
2651 "try:\n"
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002652 " from xml.etree import ElementTree\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002653 "except ImportError:\n"
2654 " import ElementTree\n"
2655 "ET = ElementTree\n"
2656 "del ElementTree\n"
2657
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002658 "import _elementtree as cElementTree\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002659
2660 "try:\n" /* check if copy works as is */
2661 " copy(cElementTree.Element('x'))\n"
2662 "except:\n"
2663 " def copyelement(elem):\n"
2664 " return elem\n"
2665
2666 "def Comment(text=None):\n" /* public */
2667 " element = cElementTree.Element(ET.Comment)\n"
2668 " element.text = text\n"
2669 " return element\n"
2670 "cElementTree.Comment = Comment\n"
2671
2672 "class ElementTree(ET.ElementTree):\n" /* public */
2673 " def parse(self, source, parser=None):\n"
2674 " if not hasattr(source, 'read'):\n"
2675 " source = open(source, 'rb')\n"
2676 " if parser is not None:\n"
2677 " while 1:\n"
2678 " data = source.read(65536)\n"
2679 " if not data:\n"
2680 " break\n"
2681 " parser.feed(data)\n"
2682 " self._root = parser.close()\n"
2683 " else:\n"
2684 " parser = cElementTree.XMLParser()\n"
2685 " self._root = parser._parse(source)\n"
2686 " return self._root\n"
2687 "cElementTree.ElementTree = ElementTree\n"
2688
2689 "def getiterator(node, tag=None):\n" /* helper */
2690 " if tag == '*':\n"
2691 " tag = None\n"
2692#if (PY_VERSION_HEX < 0x02020000)
2693 " nodes = []\n" /* 2.1 doesn't have yield */
2694 " if tag is None or node.tag == tag:\n"
2695 " nodes.append(node)\n"
2696 " for node in node:\n"
2697 " nodes.extend(getiterator(node, tag))\n"
2698 " return nodes\n"
2699#else
2700 " if tag is None or node.tag == tag:\n"
2701 " yield node\n"
2702 " for node in node:\n"
2703 " for node in getiterator(node, tag):\n"
2704 " yield node\n"
2705#endif
2706
2707 "def parse(source, parser=None):\n" /* public */
2708 " tree = ElementTree()\n"
2709 " tree.parse(source, parser)\n"
2710 " return tree\n"
2711 "cElementTree.parse = parse\n"
2712
2713#if (PY_VERSION_HEX < 0x02020000)
2714 "if hasattr(ET, 'iterparse'):\n"
2715 " cElementTree.iterparse = ET.iterparse\n" /* delegate on 2.1 */
2716#else
2717 "class iterparse(object):\n"
2718 " root = None\n"
2719 " def __init__(self, file, events=None):\n"
2720 " if not hasattr(file, 'read'):\n"
2721 " file = open(file, 'rb')\n"
2722 " self._file = file\n"
2723 " self._events = events\n"
2724 " def __iter__(self):\n"
2725 " events = []\n"
2726 " b = cElementTree.TreeBuilder()\n"
2727 " p = cElementTree.XMLParser(b)\n"
2728 " p._setevents(events, self._events)\n"
2729 " while 1:\n"
2730 " data = self._file.read(16384)\n"
2731 " if not data:\n"
2732 " break\n"
2733 " p.feed(data)\n"
2734 " for event in events:\n"
2735 " yield event\n"
2736 " del events[:]\n"
2737 " root = p.close()\n"
2738 " for event in events:\n"
2739 " yield event\n"
2740 " self.root = root\n"
2741 "cElementTree.iterparse = iterparse\n"
2742#endif
2743
2744 "def PI(target, text=None):\n" /* public */
2745 " element = cElementTree.Element(ET.ProcessingInstruction)\n"
2746 " element.text = target\n"
2747 " if text:\n"
2748 " element.text = element.text + ' ' + text\n"
2749 " return element\n"
2750
2751 " elem = cElementTree.Element(ET.PI)\n"
2752 " elem.text = text\n"
2753 " return elem\n"
2754 "cElementTree.PI = cElementTree.ProcessingInstruction = PI\n"
2755
2756 "def XML(text):\n" /* public */
2757 " parser = cElementTree.XMLParser()\n"
2758 " parser.feed(text)\n"
2759 " return parser.close()\n"
2760 "cElementTree.XML = cElementTree.fromstring = XML\n"
2761
2762 "def XMLID(text):\n" /* public */
2763 " tree = XML(text)\n"
2764 " ids = {}\n"
2765 " for elem in tree.getiterator():\n"
2766 " id = elem.get('id')\n"
2767 " if id:\n"
2768 " ids[id] = elem\n"
2769 " return tree, ids\n"
2770 "cElementTree.XMLID = XMLID\n"
2771
2772 "cElementTree.dump = ET.dump\n"
2773 "cElementTree.ElementPath = ElementPath = ET.ElementPath\n"
2774 "cElementTree.iselement = ET.iselement\n"
2775 "cElementTree.QName = ET.QName\n"
2776 "cElementTree.tostring = ET.tostring\n"
2777 "cElementTree.VERSION = '" VERSION "'\n"
2778 "cElementTree.__version__ = '" VERSION "'\n"
2779 "cElementTree.XMLParserError = SyntaxError\n"
2780
2781 );
2782
2783 PyRun_String(bootstrap, Py_file_input, g, NULL);
2784
2785 elementpath_obj = PyDict_GetItemString(g, "ElementPath");
2786
2787 elementtree_copyelement_obj = PyDict_GetItemString(g, "copyelement");
2788 if (elementtree_copyelement_obj) {
2789 /* reduce hack needed; enable reduce method */
2790 PyMethodDef* mp;
2791 for (mp = element_methods; mp->ml_name; mp++)
2792 if (mp->ml_meth == (PyCFunction) element_reduce) {
2793 mp->ml_name = "__reduce__";
2794 break;
2795 }
2796 } else
2797 PyErr_Clear();
2798 elementtree_deepcopy_obj = PyDict_GetItemString(g, "deepcopy");
2799 elementtree_getiterator_obj = PyDict_GetItemString(g, "getiterator");
2800
2801#if defined(USE_PYEXPAT_CAPI)
2802 /* link against pyexpat, if possible */
2803 capi = PyCObject_Import("pyexpat", "expat_CAPI");
2804 if (capi &&
2805 strcmp(capi->magic, PyExpat_CAPI_MAGIC) == 0 &&
2806 capi->size <= sizeof(*expat_capi) &&
2807 capi->MAJOR_VERSION == XML_MAJOR_VERSION &&
2808 capi->MINOR_VERSION == XML_MINOR_VERSION &&
2809 capi->MICRO_VERSION == XML_MICRO_VERSION)
2810 expat_capi = capi;
2811 else
2812 expat_capi = NULL;
2813#endif
2814
2815}