blob: 884e50d077a921cc5509996a426249c1a2e6b8c5 [file] [log] [blame]
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001/*
2 * ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003 * $Id: _elementtree.c 3473 2009-01-11 22:53:55Z fredrik $
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
5 * elementtree accelerator
6 *
7 * History:
8 * 1999-06-20 fl created (as part of sgmlop)
9 * 2001-05-29 fl effdom edition
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000010 * 2003-02-27 fl elementtree edition (alpha)
11 * 2004-06-03 fl updates for elementtree 1.2
Florent Xiclunaf15351d2010-03-13 23:24:31 +000012 * 2005-01-05 fl major optimization effort
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000013 * 2005-01-11 fl first public release (cElementTree 0.8)
14 * 2005-01-12 fl split element object into base and extras
15 * 2005-01-13 fl use tagged pointers for tail/text (cElementTree 0.9)
16 * 2005-01-17 fl added treebuilder close method
17 * 2005-01-17 fl fixed crash in getchildren
18 * 2005-01-18 fl removed observer api, added iterparse (cElementTree 0.9.3)
19 * 2005-01-23 fl revised iterparse api; added namespace event support (0.9.8)
20 * 2005-01-26 fl added VERSION module property (cElementTree 1.0)
21 * 2005-01-28 fl added remove method (1.0.1)
22 * 2005-03-01 fl added iselement function; fixed makeelement aliasing (1.0.2)
23 * 2005-03-13 fl export Comment and ProcessingInstruction/PI helpers
24 * 2005-03-26 fl added Comment and PI support to XMLParser
25 * 2005-03-27 fl event optimizations; complain about bogus events
26 * 2005-08-08 fl fixed read error handling in parse
27 * 2005-08-11 fl added runtime test for copy workaround (1.0.3)
28 * 2005-12-13 fl added expat_capi support (for xml.etree) (1.0.4)
29 * 2005-12-16 fl added support for non-standard encodings
Fredrik Lundh44ed4db2006-03-12 21:06:35 +000030 * 2006-03-08 fl fixed a couple of potential null-refs and leaks
31 * 2006-03-12 fl merge in 2.5 ssize_t changes
Florent Xiclunaf15351d2010-03-13 23:24:31 +000032 * 2007-08-25 fl call custom builder's close method from XMLParser
33 * 2007-08-31 fl added iter, extend from ET 1.3
34 * 2007-09-01 fl fixed ParseError exception, setslice source type, etc
35 * 2007-09-03 fl fixed handling of negative insert indexes
36 * 2007-09-04 fl added itertext from ET 1.3
37 * 2007-09-06 fl added position attribute to ParseError exception
38 * 2008-06-06 fl delay error reporting in iterparse (from Hrvoje Niksic)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000039 *
Florent Xiclunaf15351d2010-03-13 23:24:31 +000040 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
41 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000042 *
43 * info@pythonware.com
44 * http://www.pythonware.com
45 */
46
Fredrik Lundh6d52b552005-12-16 22:06:43 +000047/* Licensed to PSF under a Contributor Agreement. */
Florent Xiclunaf15351d2010-03-13 23:24:31 +000048/* See http://www.python.org/psf/license for licensing details. */
Fredrik Lundh6d52b552005-12-16 22:06:43 +000049
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000050#include "Python.h"
51
Thomas Wouters00ee7ba2006-08-21 19:07:27 +000052#define VERSION "1.0.6"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000053
54/* -------------------------------------------------------------------- */
55/* configuration */
56
57/* Leave defined to include the expat-based XMLParser type */
58#define USE_EXPAT
59
Florent Xiclunaf15351d2010-03-13 23:24:31 +000060/* Define to do all expat calls via pyexpat's embedded expat library */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000061/* #define USE_PYEXPAT_CAPI */
62
63/* An element can hold this many children without extra memory
64 allocations. */
65#define STATIC_CHILDREN 4
66
67/* For best performance, chose a value so that 80-90% of all nodes
68 have no more than the given number of children. Set this to zero
69 to minimize the size of the element structure itself (this only
70 helps if you have lots of leaf nodes with attributes). */
71
72/* Also note that pymalloc always allocates blocks in multiples of
73 eight bytes. For the current version of cElementTree, this means
74 that the number of children should be an even number, at least on
75 32-bit platforms. */
76
77/* -------------------------------------------------------------------- */
78
79#if 0
80static int memory = 0;
81#define ALLOC(size, comment)\
82do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
83#define RELEASE(size, comment)\
84do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
85#else
86#define ALLOC(size, comment)
87#define RELEASE(size, comment)
88#endif
89
90/* compiler tweaks */
91#if defined(_MSC_VER)
92#define LOCAL(type) static __inline type __fastcall
93#else
94#define LOCAL(type) static type
95#endif
96
Florent Xiclunaf15351d2010-03-13 23:24:31 +000097/* compatibility macros */
98#if (PY_VERSION_HEX < 0x02060000)
99#define Py_REFCNT(ob) (((PyObject*)(ob))->ob_refcnt)
100#define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)
101#endif
102
103#if (PY_VERSION_HEX < 0x02050000)
104typedef int Py_ssize_t;
105#define lenfunc inquiry
106#endif
107
108#if (PY_VERSION_HEX < 0x02040000)
109#define PyDict_CheckExact PyDict_Check
110
111#if !defined(Py_RETURN_NONE)
112#define Py_RETURN_NONE return Py_INCREF(Py_None), Py_None
113#endif
114#endif
115
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000116/* macros used to store 'join' flags in string object pointers. note
117 that all use of text and tail as object pointers must be wrapped in
118 JOIN_OBJ. see comments in the ElementObject definition for more
119 info. */
120#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
121#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
122#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~1))
123
124/* glue functions (see the init function for details) */
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000125static PyObject* elementtree_parseerror_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000126static PyObject* elementtree_copyelement_obj;
127static PyObject* elementtree_deepcopy_obj;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000128static PyObject* elementtree_iter_obj;
129static PyObject* elementtree_itertext_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000130static PyObject* elementpath_obj;
131
132/* helpers */
133
134LOCAL(PyObject*)
135deepcopy(PyObject* object, PyObject* memo)
136{
137 /* do a deep copy of the given object */
138
139 PyObject* args;
140 PyObject* result;
141
142 if (!elementtree_deepcopy_obj) {
143 PyErr_SetString(
144 PyExc_RuntimeError,
145 "deepcopy helper not found"
146 );
147 return NULL;
148 }
149
150 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000151 if (!args)
152 return NULL;
153
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000154 Py_INCREF(object); PyTuple_SET_ITEM(args, 0, (PyObject*) object);
155 Py_INCREF(memo); PyTuple_SET_ITEM(args, 1, (PyObject*) memo);
156
157 result = PyObject_CallObject(elementtree_deepcopy_obj, args);
158
159 Py_DECREF(args);
160
161 return result;
162}
163
164LOCAL(PyObject*)
165list_join(PyObject* list)
166{
167 /* join list elements (destroying the list in the process) */
168
169 PyObject* joiner;
170 PyObject* function;
171 PyObject* args;
172 PyObject* result;
173
174 switch (PyList_GET_SIZE(list)) {
175 case 0:
176 Py_DECREF(list);
Christian Heimes72b710a2008-05-26 13:28:38 +0000177 return PyBytes_FromString("");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000178 case 1:
179 result = PyList_GET_ITEM(list, 0);
180 Py_INCREF(result);
181 Py_DECREF(list);
182 return result;
183 }
184
185 /* two or more elements: slice out a suitable separator from the
186 first member, and use that to join the entire list */
187
188 joiner = PySequence_GetSlice(PyList_GET_ITEM(list, 0), 0, 0);
189 if (!joiner)
190 return NULL;
191
192 function = PyObject_GetAttrString(joiner, "join");
193 if (!function) {
194 Py_DECREF(joiner);
195 return NULL;
196 }
197
198 args = PyTuple_New(1);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000199 if (!args)
200 return NULL;
201
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000202 PyTuple_SET_ITEM(args, 0, list);
203
204 result = PyObject_CallObject(function, args);
205
206 Py_DECREF(args); /* also removes list */
207 Py_DECREF(function);
208 Py_DECREF(joiner);
209
210 return result;
211}
212
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000213/* -------------------------------------------------------------------- */
214/* the element type */
215
216typedef struct {
217
218 /* attributes (a dictionary object), or None if no attributes */
219 PyObject* attrib;
220
221 /* child elements */
222 int length; /* actual number of items */
223 int allocated; /* allocated items */
224
225 /* this either points to _children or to a malloced buffer */
226 PyObject* *children;
227
228 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100229
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000230} ElementObjectExtra;
231
232typedef struct {
233 PyObject_HEAD
234
235 /* element tag (a string). */
236 PyObject* tag;
237
238 /* text before first child. note that this is a tagged pointer;
239 use JOIN_OBJ to get the object pointer. the join flag is used
240 to distinguish lists created by the tree builder from lists
241 assigned to the attribute by application code; the former
242 should be joined before being returned to the user, the latter
243 should be left intact. */
244 PyObject* text;
245
246 /* text after this element, in parent. note that this is a tagged
247 pointer; use JOIN_OBJ to get the object pointer. */
248 PyObject* tail;
249
250 ElementObjectExtra* extra;
251
252} ElementObject;
253
Neal Norwitz227b5332006-03-22 09:28:35 +0000254static PyTypeObject Element_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000255
Christian Heimes90aa7642007-12-19 02:45:37 +0000256#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000257
258/* -------------------------------------------------------------------- */
259/* element constructor and destructor */
260
261LOCAL(int)
262element_new_extra(ElementObject* self, PyObject* attrib)
263{
264 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
265 if (!self->extra)
266 return -1;
267
268 if (!attrib)
269 attrib = Py_None;
270
271 Py_INCREF(attrib);
272 self->extra->attrib = attrib;
273
274 self->extra->length = 0;
275 self->extra->allocated = STATIC_CHILDREN;
276 self->extra->children = self->extra->_children;
277
278 return 0;
279}
280
281LOCAL(void)
282element_dealloc_extra(ElementObject* self)
283{
284 int i;
285
286 Py_DECREF(self->extra->attrib);
287
288 for (i = 0; i < self->extra->length; i++)
289 Py_DECREF(self->extra->children[i]);
290
291 if (self->extra->children != self->extra->_children)
292 PyObject_Free(self->extra->children);
293
294 PyObject_Free(self->extra);
295}
296
297LOCAL(PyObject*)
298element_new(PyObject* tag, PyObject* attrib)
299{
300 ElementObject* self;
301
302 self = PyObject_New(ElementObject, &Element_Type);
303 if (self == NULL)
304 return NULL;
305
306 /* use None for empty dictionaries */
307 if (PyDict_CheckExact(attrib) && !PyDict_Size(attrib))
308 attrib = Py_None;
309
310 self->extra = NULL;
311
312 if (attrib != Py_None) {
313
Thomas Wouters477c8d52006-05-27 19:21:47 +0000314 if (element_new_extra(self, attrib) < 0) {
315 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000316 return NULL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000317 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000318
319 self->extra->length = 0;
320 self->extra->allocated = STATIC_CHILDREN;
321 self->extra->children = self->extra->_children;
322
323 }
324
325 Py_INCREF(tag);
326 self->tag = tag;
327
328 Py_INCREF(Py_None);
329 self->text = Py_None;
330
331 Py_INCREF(Py_None);
332 self->tail = Py_None;
333
334 ALLOC(sizeof(ElementObject), "create element");
335
336 return (PyObject*) self;
337}
338
339LOCAL(int)
340element_resize(ElementObject* self, int extra)
341{
342 int size;
343 PyObject* *children;
344
345 /* make sure self->children can hold the given number of extra
346 elements. set an exception and return -1 if allocation failed */
347
348 if (!self->extra)
349 element_new_extra(self, NULL);
350
351 size = self->extra->length + extra;
352
353 if (size > self->extra->allocated) {
354 /* use Python 2.4's list growth strategy */
355 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000356 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100357 * which needs at least 4 bytes.
358 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000359 * be safe.
360 */
361 size = size ? size : 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000362 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000363 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100364 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000365 * false alarm always assume at least one child to be safe.
366 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000367 children = PyObject_Realloc(self->extra->children,
368 size * sizeof(PyObject*));
369 if (!children)
370 goto nomemory;
371 } else {
372 children = PyObject_Malloc(size * sizeof(PyObject*));
373 if (!children)
374 goto nomemory;
375 /* copy existing children from static area to malloc buffer */
376 memcpy(children, self->extra->children,
377 self->extra->length * sizeof(PyObject*));
378 }
379 self->extra->children = children;
380 self->extra->allocated = size;
381 }
382
383 return 0;
384
385 nomemory:
386 PyErr_NoMemory();
387 return -1;
388}
389
390LOCAL(int)
391element_add_subelement(ElementObject* self, PyObject* element)
392{
393 /* add a child element to a parent */
394
395 if (element_resize(self, 1) < 0)
396 return -1;
397
398 Py_INCREF(element);
399 self->extra->children[self->extra->length] = element;
400
401 self->extra->length++;
402
403 return 0;
404}
405
406LOCAL(PyObject*)
407element_get_attrib(ElementObject* self)
408{
409 /* return borrowed reference to attrib dictionary */
410 /* note: this function assumes that the extra section exists */
411
412 PyObject* res = self->extra->attrib;
413
414 if (res == Py_None) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000415 Py_DECREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000416 /* create missing dictionary */
417 res = PyDict_New();
418 if (!res)
419 return NULL;
420 self->extra->attrib = res;
421 }
422
423 return res;
424}
425
426LOCAL(PyObject*)
427element_get_text(ElementObject* self)
428{
429 /* return borrowed reference to text attribute */
430
431 PyObject* res = self->text;
432
433 if (JOIN_GET(res)) {
434 res = JOIN_OBJ(res);
435 if (PyList_CheckExact(res)) {
436 res = list_join(res);
437 if (!res)
438 return NULL;
439 self->text = res;
440 }
441 }
442
443 return res;
444}
445
446LOCAL(PyObject*)
447element_get_tail(ElementObject* self)
448{
449 /* return borrowed reference to text attribute */
450
451 PyObject* res = self->tail;
452
453 if (JOIN_GET(res)) {
454 res = JOIN_OBJ(res);
455 if (PyList_CheckExact(res)) {
456 res = list_join(res);
457 if (!res)
458 return NULL;
459 self->tail = res;
460 }
461 }
462
463 return res;
464}
465
466static PyObject*
467element(PyObject* self, PyObject* args, PyObject* kw)
468{
469 PyObject* elem;
470
471 PyObject* tag;
472 PyObject* attrib = NULL;
473 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag,
474 &PyDict_Type, &attrib))
475 return NULL;
476
477 if (attrib || kw) {
478 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
479 if (!attrib)
480 return NULL;
481 if (kw)
482 PyDict_Update(attrib, kw);
483 } else {
484 Py_INCREF(Py_None);
485 attrib = Py_None;
486 }
487
488 elem = element_new(tag, attrib);
489
490 Py_DECREF(attrib);
491
492 return elem;
493}
494
495static PyObject*
496subelement(PyObject* self, PyObject* args, PyObject* kw)
497{
498 PyObject* elem;
499
500 ElementObject* parent;
501 PyObject* tag;
502 PyObject* attrib = NULL;
503 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
504 &Element_Type, &parent, &tag,
505 &PyDict_Type, &attrib))
506 return NULL;
507
508 if (attrib || kw) {
509 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
510 if (!attrib)
511 return NULL;
512 if (kw)
513 PyDict_Update(attrib, kw);
514 } else {
515 Py_INCREF(Py_None);
516 attrib = Py_None;
517 }
518
519 elem = element_new(tag, attrib);
520
521 Py_DECREF(attrib);
522
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000523 if (element_add_subelement(parent, elem) < 0) {
524 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000525 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000526 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000527
528 return elem;
529}
530
531static void
532element_dealloc(ElementObject* self)
533{
534 if (self->extra)
535 element_dealloc_extra(self);
536
537 /* discard attributes */
538 Py_DECREF(self->tag);
539 Py_DECREF(JOIN_OBJ(self->text));
540 Py_DECREF(JOIN_OBJ(self->tail));
541
542 RELEASE(sizeof(ElementObject), "destroy element");
543
544 PyObject_Del(self);
545}
546
547/* -------------------------------------------------------------------- */
548/* methods (in alphabetical order) */
549
550static PyObject*
551element_append(ElementObject* self, PyObject* args)
552{
553 PyObject* element;
554 if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element))
555 return NULL;
556
557 if (element_add_subelement(self, element) < 0)
558 return NULL;
559
560 Py_RETURN_NONE;
561}
562
563static PyObject*
564element_clear(ElementObject* self, PyObject* args)
565{
566 if (!PyArg_ParseTuple(args, ":clear"))
567 return NULL;
568
569 if (self->extra) {
570 element_dealloc_extra(self);
571 self->extra = NULL;
572 }
573
574 Py_INCREF(Py_None);
575 Py_DECREF(JOIN_OBJ(self->text));
576 self->text = Py_None;
577
578 Py_INCREF(Py_None);
579 Py_DECREF(JOIN_OBJ(self->tail));
580 self->tail = Py_None;
581
582 Py_RETURN_NONE;
583}
584
585static PyObject*
586element_copy(ElementObject* self, PyObject* args)
587{
588 int i;
589 ElementObject* element;
590
591 if (!PyArg_ParseTuple(args, ":__copy__"))
592 return NULL;
593
594 element = (ElementObject*) element_new(
595 self->tag, (self->extra) ? self->extra->attrib : Py_None
596 );
597 if (!element)
598 return NULL;
599
600 Py_DECREF(JOIN_OBJ(element->text));
601 element->text = self->text;
602 Py_INCREF(JOIN_OBJ(element->text));
603
604 Py_DECREF(JOIN_OBJ(element->tail));
605 element->tail = self->tail;
606 Py_INCREF(JOIN_OBJ(element->tail));
607
608 if (self->extra) {
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100609
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000610 if (element_resize(element, self->extra->length) < 0) {
611 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000612 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000613 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000614
615 for (i = 0; i < self->extra->length; i++) {
616 Py_INCREF(self->extra->children[i]);
617 element->extra->children[i] = self->extra->children[i];
618 }
619
620 element->extra->length = self->extra->length;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100621
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000622 }
623
624 return (PyObject*) element;
625}
626
627static PyObject*
628element_deepcopy(ElementObject* self, PyObject* args)
629{
630 int i;
631 ElementObject* element;
632 PyObject* tag;
633 PyObject* attrib;
634 PyObject* text;
635 PyObject* tail;
636 PyObject* id;
637
638 PyObject* memo;
639 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
640 return NULL;
641
642 tag = deepcopy(self->tag, memo);
643 if (!tag)
644 return NULL;
645
646 if (self->extra) {
647 attrib = deepcopy(self->extra->attrib, memo);
648 if (!attrib) {
649 Py_DECREF(tag);
650 return NULL;
651 }
652 } else {
653 Py_INCREF(Py_None);
654 attrib = Py_None;
655 }
656
657 element = (ElementObject*) element_new(tag, attrib);
658
659 Py_DECREF(tag);
660 Py_DECREF(attrib);
661
662 if (!element)
663 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100664
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000665 text = deepcopy(JOIN_OBJ(self->text), memo);
666 if (!text)
667 goto error;
668 Py_DECREF(element->text);
669 element->text = JOIN_SET(text, JOIN_GET(self->text));
670
671 tail = deepcopy(JOIN_OBJ(self->tail), memo);
672 if (!tail)
673 goto error;
674 Py_DECREF(element->tail);
675 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
676
677 if (self->extra) {
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100678
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000679 if (element_resize(element, self->extra->length) < 0)
680 goto error;
681
682 for (i = 0; i < self->extra->length; i++) {
683 PyObject* child = deepcopy(self->extra->children[i], memo);
684 if (!child) {
685 element->extra->length = i;
686 goto error;
687 }
688 element->extra->children[i] = child;
689 }
690
691 element->extra->length = self->extra->length;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100692
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000693 }
694
695 /* add object to memo dictionary (so deepcopy won't visit it again) */
Christian Heimes217cfd12007-12-02 14:31:20 +0000696 id = PyLong_FromLong((Py_uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000697 if (!id)
698 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000699
700 i = PyDict_SetItem(memo, id, (PyObject*) element);
701
702 Py_DECREF(id);
703
704 if (i < 0)
705 goto error;
706
707 return (PyObject*) element;
708
709 error:
710 Py_DECREF(element);
711 return NULL;
712}
713
714LOCAL(int)
715checkpath(PyObject* tag)
716{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000717 Py_ssize_t i;
718 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000719
720 /* check if a tag contains an xpath character */
721
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000722#define PATHCHAR(ch) \
723 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000724
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000725 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200726 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
727 void *data = PyUnicode_DATA(tag);
728 unsigned int kind = PyUnicode_KIND(tag);
729 for (i = 0; i < len; i++) {
730 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
731 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000732 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200733 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000734 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200735 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000736 return 1;
737 }
738 return 0;
739 }
Christian Heimes72b710a2008-05-26 13:28:38 +0000740 if (PyBytes_Check(tag)) {
741 char *p = PyBytes_AS_STRING(tag);
742 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000743 if (p[i] == '{')
744 check = 0;
745 else if (p[i] == '}')
746 check = 1;
747 else if (check && PATHCHAR(p[i]))
748 return 1;
749 }
750 return 0;
751 }
752
753 return 1; /* unknown type; might be path expression */
754}
755
756static PyObject*
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000757element_extend(ElementObject* self, PyObject* args)
758{
759 PyObject* seq;
760 Py_ssize_t i, seqlen = 0;
761
762 PyObject* seq_in;
763 if (!PyArg_ParseTuple(args, "O:extend", &seq_in))
764 return NULL;
765
766 seq = PySequence_Fast(seq_in, "");
767 if (!seq) {
768 PyErr_Format(
769 PyExc_TypeError,
770 "expected sequence, not \"%.200s\"", Py_TYPE(seq_in)->tp_name
771 );
772 return NULL;
773 }
774
775 seqlen = PySequence_Size(seq);
776 for (i = 0; i < seqlen; i++) {
777 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
778 if (element_add_subelement(self, element) < 0) {
779 Py_DECREF(seq);
780 return NULL;
781 }
782 }
783
784 Py_DECREF(seq);
785
786 Py_RETURN_NONE;
787}
788
789static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000790element_find(ElementObject* self, PyObject* args)
791{
792 int i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000793 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000794 PyObject* namespaces = Py_None;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200795
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000796 if (!PyArg_ParseTuple(args, "O|O:find", &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000797 return NULL;
798
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200799 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200800 _Py_IDENTIFIER(find);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200801 return _PyObject_CallMethodId(
802 elementpath_obj, &PyId_find, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000803 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200804 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000805
806 if (!self->extra)
807 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100808
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000809 for (i = 0; i < self->extra->length; i++) {
810 PyObject* item = self->extra->children[i];
811 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +0000812 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000813 Py_INCREF(item);
814 return item;
815 }
816 }
817
818 Py_RETURN_NONE;
819}
820
821static PyObject*
822element_findtext(ElementObject* self, PyObject* args)
823{
824 int i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000825 PyObject* tag;
826 PyObject* default_value = Py_None;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000827 PyObject* namespaces = Py_None;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200828 _Py_IDENTIFIER(findtext);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200829
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000830 if (!PyArg_ParseTuple(args, "O|OO:findtext", &tag, &default_value, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000831 return NULL;
832
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000833 if (checkpath(tag) || namespaces != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200834 return _PyObject_CallMethodId(
835 elementpath_obj, &PyId_findtext, "OOOO", self, tag, default_value, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000836 );
837
838 if (!self->extra) {
839 Py_INCREF(default_value);
840 return default_value;
841 }
842
843 for (i = 0; i < self->extra->length; i++) {
844 ElementObject* item = (ElementObject*) self->extra->children[i];
Mark Dickinson211c6252009-02-01 10:28:51 +0000845 if (Element_CheckExact(item) && (PyObject_RichCompareBool(item->tag, tag, Py_EQ) == 1)) {
846
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000847 PyObject* text = element_get_text(item);
848 if (text == Py_None)
Christian Heimes72b710a2008-05-26 13:28:38 +0000849 return PyBytes_FromString("");
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000850 Py_XINCREF(text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000851 return text;
852 }
853 }
854
855 Py_INCREF(default_value);
856 return default_value;
857}
858
859static PyObject*
860element_findall(ElementObject* self, PyObject* args)
861{
862 int i;
863 PyObject* out;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000864 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000865 PyObject* namespaces = Py_None;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200866
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000867 if (!PyArg_ParseTuple(args, "O|O:findall", &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000868 return NULL;
869
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200870 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200871 _Py_IDENTIFIER(findall);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200872 return _PyObject_CallMethodId(
873 elementpath_obj, &PyId_findall, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000874 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200875 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000876
877 out = PyList_New(0);
878 if (!out)
879 return NULL;
880
881 if (!self->extra)
882 return out;
883
884 for (i = 0; i < self->extra->length; i++) {
885 PyObject* item = self->extra->children[i];
886 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +0000887 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000888 if (PyList_Append(out, item) < 0) {
889 Py_DECREF(out);
890 return NULL;
891 }
892 }
893 }
894
895 return out;
896}
897
898static PyObject*
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000899element_iterfind(ElementObject* self, PyObject* args)
900{
901 PyObject* tag;
902 PyObject* namespaces = Py_None;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200903 _Py_IDENTIFIER(iterfind);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200904
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000905 if (!PyArg_ParseTuple(args, "O|O:iterfind", &tag, &namespaces))
906 return NULL;
907
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200908 return _PyObject_CallMethodId(
909 elementpath_obj, &PyId_iterfind, "OOO", self, tag, namespaces
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000910 );
911}
912
913static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000914element_get(ElementObject* self, PyObject* args)
915{
916 PyObject* value;
917
918 PyObject* key;
919 PyObject* default_value = Py_None;
920 if (!PyArg_ParseTuple(args, "O|O:get", &key, &default_value))
921 return NULL;
922
923 if (!self->extra || self->extra->attrib == Py_None)
924 value = default_value;
925 else {
926 value = PyDict_GetItem(self->extra->attrib, key);
927 if (!value)
928 value = default_value;
929 }
930
931 Py_INCREF(value);
932 return value;
933}
934
935static PyObject*
936element_getchildren(ElementObject* self, PyObject* args)
937{
938 int i;
939 PyObject* list;
940
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000941 /* FIXME: report as deprecated? */
942
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000943 if (!PyArg_ParseTuple(args, ":getchildren"))
944 return NULL;
945
946 if (!self->extra)
947 return PyList_New(0);
948
949 list = PyList_New(self->extra->length);
950 if (!list)
951 return NULL;
952
953 for (i = 0; i < self->extra->length; i++) {
954 PyObject* item = self->extra->children[i];
955 Py_INCREF(item);
956 PyList_SET_ITEM(list, i, item);
957 }
958
959 return list;
960}
961
962static PyObject*
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000963element_iter(ElementObject* self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000964{
965 PyObject* result;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100966
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000967 PyObject* tag = Py_None;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000968 if (!PyArg_ParseTuple(args, "|O:iter", &tag))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000969 return NULL;
970
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000971 if (!elementtree_iter_obj) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000972 PyErr_SetString(
973 PyExc_RuntimeError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000974 "iter helper not found"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000975 );
976 return NULL;
977 }
978
979 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000980 if (!args)
981 return NULL;
Neal Norwitz02876df2006-02-07 06:58:52 +0000982
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000983 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
984 Py_INCREF(tag); PyTuple_SET_ITEM(args, 1, (PyObject*) tag);
985
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000986 result = PyObject_CallObject(elementtree_iter_obj, args);
987
988 Py_DECREF(args);
989
990 return result;
991}
992
993
994static PyObject*
995element_itertext(ElementObject* self, PyObject* args)
996{
997 PyObject* result;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100998
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000999 if (!PyArg_ParseTuple(args, ":itertext"))
1000 return NULL;
1001
1002 if (!elementtree_itertext_obj) {
1003 PyErr_SetString(
1004 PyExc_RuntimeError,
1005 "itertext helper not found"
1006 );
1007 return NULL;
1008 }
1009
1010 args = PyTuple_New(1);
1011 if (!args)
1012 return NULL;
1013
1014 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
1015
1016 result = PyObject_CallObject(elementtree_itertext_obj, args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001017
1018 Py_DECREF(args);
1019
1020 return result;
1021}
1022
1023static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001024element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001025{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001026 ElementObject* self = (ElementObject*) self_;
1027
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001028 if (!self->extra || index < 0 || index >= self->extra->length) {
1029 PyErr_SetString(
1030 PyExc_IndexError,
1031 "child index out of range"
1032 );
1033 return NULL;
1034 }
1035
1036 Py_INCREF(self->extra->children[index]);
1037 return self->extra->children[index];
1038}
1039
1040static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001041element_insert(ElementObject* self, PyObject* args)
1042{
1043 int i;
1044
1045 int index;
1046 PyObject* element;
1047 if (!PyArg_ParseTuple(args, "iO!:insert", &index,
1048 &Element_Type, &element))
1049 return NULL;
1050
1051 if (!self->extra)
1052 element_new_extra(self, NULL);
1053
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001054 if (index < 0) {
1055 index += self->extra->length;
1056 if (index < 0)
1057 index = 0;
1058 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001059 if (index > self->extra->length)
1060 index = self->extra->length;
1061
1062 if (element_resize(self, 1) < 0)
1063 return NULL;
1064
1065 for (i = self->extra->length; i > index; i--)
1066 self->extra->children[i] = self->extra->children[i-1];
1067
1068 Py_INCREF(element);
1069 self->extra->children[index] = element;
1070
1071 self->extra->length++;
1072
1073 Py_RETURN_NONE;
1074}
1075
1076static PyObject*
1077element_items(ElementObject* self, PyObject* args)
1078{
1079 if (!PyArg_ParseTuple(args, ":items"))
1080 return NULL;
1081
1082 if (!self->extra || self->extra->attrib == Py_None)
1083 return PyList_New(0);
1084
1085 return PyDict_Items(self->extra->attrib);
1086}
1087
1088static PyObject*
1089element_keys(ElementObject* self, PyObject* args)
1090{
1091 if (!PyArg_ParseTuple(args, ":keys"))
1092 return NULL;
1093
1094 if (!self->extra || self->extra->attrib == Py_None)
1095 return PyList_New(0);
1096
1097 return PyDict_Keys(self->extra->attrib);
1098}
1099
Martin v. Löwis18e16552006-02-15 17:27:45 +00001100static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001101element_length(ElementObject* self)
1102{
1103 if (!self->extra)
1104 return 0;
1105
1106 return self->extra->length;
1107}
1108
1109static PyObject*
1110element_makeelement(PyObject* self, PyObject* args, PyObject* kw)
1111{
1112 PyObject* elem;
1113
1114 PyObject* tag;
1115 PyObject* attrib;
1116 if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib))
1117 return NULL;
1118
1119 attrib = PyDict_Copy(attrib);
1120 if (!attrib)
1121 return NULL;
1122
1123 elem = element_new(tag, attrib);
1124
1125 Py_DECREF(attrib);
1126
1127 return elem;
1128}
1129
1130static PyObject*
1131element_reduce(ElementObject* self, PyObject* args)
1132{
1133 if (!PyArg_ParseTuple(args, ":__reduce__"))
1134 return NULL;
1135
1136 /* Hack alert: This method is used to work around a __copy__
1137 problem on certain 2.3 and 2.4 versions. To save time and
1138 simplify the code, we create the copy in here, and use a dummy
1139 copyelement helper to trick the copy module into doing the
1140 right thing. */
1141
1142 if (!elementtree_copyelement_obj) {
1143 PyErr_SetString(
1144 PyExc_RuntimeError,
1145 "copyelement helper not found"
1146 );
1147 return NULL;
1148 }
1149
1150 return Py_BuildValue(
1151 "O(N)", elementtree_copyelement_obj, element_copy(self, args)
1152 );
1153}
1154
1155static PyObject*
1156element_remove(ElementObject* self, PyObject* args)
1157{
1158 int i;
1159
1160 PyObject* element;
1161 if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element))
1162 return NULL;
1163
1164 if (!self->extra) {
1165 /* element has no children, so raise exception */
1166 PyErr_SetString(
1167 PyExc_ValueError,
1168 "list.remove(x): x not in list"
1169 );
1170 return NULL;
1171 }
1172
1173 for (i = 0; i < self->extra->length; i++) {
1174 if (self->extra->children[i] == element)
1175 break;
Mark Dickinson211c6252009-02-01 10:28:51 +00001176 if (PyObject_RichCompareBool(self->extra->children[i], element, Py_EQ) == 1)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001177 break;
1178 }
1179
1180 if (i == self->extra->length) {
1181 /* element is not in children, so raise exception */
1182 PyErr_SetString(
1183 PyExc_ValueError,
1184 "list.remove(x): x not in list"
1185 );
1186 return NULL;
1187 }
1188
1189 Py_DECREF(self->extra->children[i]);
1190
1191 self->extra->length--;
1192
1193 for (; i < self->extra->length; i++)
1194 self->extra->children[i] = self->extra->children[i+1];
1195
1196 Py_RETURN_NONE;
1197}
1198
1199static PyObject*
1200element_repr(ElementObject* self)
1201{
Walter Dörwald7569dfe2007-05-19 21:49:49 +00001202 return PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001203}
1204
1205static PyObject*
1206element_set(ElementObject* self, PyObject* args)
1207{
1208 PyObject* attrib;
1209
1210 PyObject* key;
1211 PyObject* value;
1212 if (!PyArg_ParseTuple(args, "OO:set", &key, &value))
1213 return NULL;
1214
1215 if (!self->extra)
1216 element_new_extra(self, NULL);
1217
1218 attrib = element_get_attrib(self);
1219 if (!attrib)
1220 return NULL;
1221
1222 if (PyDict_SetItem(attrib, key, value) < 0)
1223 return NULL;
1224
1225 Py_RETURN_NONE;
1226}
1227
1228static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001229element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001230{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001231 ElementObject* self = (ElementObject*) self_;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001232 int i;
1233 PyObject* old;
1234
1235 if (!self->extra || index < 0 || index >= self->extra->length) {
1236 PyErr_SetString(
1237 PyExc_IndexError,
1238 "child assignment index out of range");
1239 return -1;
1240 }
1241
1242 old = self->extra->children[index];
1243
1244 if (item) {
1245 Py_INCREF(item);
1246 self->extra->children[index] = item;
1247 } else {
1248 self->extra->length--;
1249 for (i = index; i < self->extra->length; i++)
1250 self->extra->children[i] = self->extra->children[i+1];
1251 }
1252
1253 Py_DECREF(old);
1254
1255 return 0;
1256}
1257
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001258static PyObject*
1259element_subscr(PyObject* self_, PyObject* item)
1260{
1261 ElementObject* self = (ElementObject*) self_;
1262
1263#if (PY_VERSION_HEX < 0x02050000)
1264 if (PyInt_Check(item) || PyLong_Check(item)) {
1265 long i = PyInt_AsLong(item);
1266#else
1267 if (PyIndex_Check(item)) {
1268 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1269#endif
1270
1271 if (i == -1 && PyErr_Occurred()) {
1272 return NULL;
1273 }
1274 if (i < 0 && self->extra)
1275 i += self->extra->length;
1276 return element_getitem(self_, i);
1277 }
1278 else if (PySlice_Check(item)) {
1279 Py_ssize_t start, stop, step, slicelen, cur, i;
1280 PyObject* list;
1281
1282 if (!self->extra)
1283 return PyList_New(0);
1284
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001285 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001286 self->extra->length,
1287 &start, &stop, &step, &slicelen) < 0) {
1288 return NULL;
1289 }
1290
1291 if (slicelen <= 0)
1292 return PyList_New(0);
1293 else {
1294 list = PyList_New(slicelen);
1295 if (!list)
1296 return NULL;
1297
1298 for (cur = start, i = 0; i < slicelen;
1299 cur += step, i++) {
1300 PyObject* item = self->extra->children[cur];
1301 Py_INCREF(item);
1302 PyList_SET_ITEM(list, i, item);
1303 }
1304
1305 return list;
1306 }
1307 }
1308 else {
1309 PyErr_SetString(PyExc_TypeError,
1310 "element indices must be integers");
1311 return NULL;
1312 }
1313}
1314
1315static int
1316element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1317{
1318 ElementObject* self = (ElementObject*) self_;
1319
1320#if (PY_VERSION_HEX < 0x02050000)
1321 if (PyInt_Check(item) || PyLong_Check(item)) {
1322 long i = PyInt_AsLong(item);
1323#else
1324 if (PyIndex_Check(item)) {
1325 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1326#endif
1327
1328 if (i == -1 && PyErr_Occurred()) {
1329 return -1;
1330 }
1331 if (i < 0 && self->extra)
1332 i += self->extra->length;
1333 return element_setitem(self_, i, value);
1334 }
1335 else if (PySlice_Check(item)) {
1336 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1337
1338 PyObject* recycle = NULL;
1339 PyObject* seq = NULL;
1340
1341 if (!self->extra)
1342 element_new_extra(self, NULL);
1343
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001344 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001345 self->extra->length,
1346 &start, &stop, &step, &slicelen) < 0) {
1347 return -1;
1348 }
1349
1350 if (value == NULL)
1351 newlen = 0;
1352 else {
1353 seq = PySequence_Fast(value, "");
1354 if (!seq) {
1355 PyErr_Format(
1356 PyExc_TypeError,
1357 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1358 );
1359 return -1;
1360 }
1361 newlen = PySequence_Size(seq);
1362 }
1363
1364 if (step != 1 && newlen != slicelen)
1365 {
1366 PyErr_Format(PyExc_ValueError,
1367#if (PY_VERSION_HEX < 0x02050000)
1368 "attempt to assign sequence of size %d "
1369 "to extended slice of size %d",
1370#else
1371 "attempt to assign sequence of size %zd "
1372 "to extended slice of size %zd",
1373#endif
1374 newlen, slicelen
1375 );
1376 return -1;
1377 }
1378
1379
1380 /* Resize before creating the recycle bin, to prevent refleaks. */
1381 if (newlen > slicelen) {
1382 if (element_resize(self, newlen - slicelen) < 0) {
1383 if (seq) {
1384 Py_DECREF(seq);
1385 }
1386 return -1;
1387 }
1388 }
1389
1390 if (slicelen > 0) {
1391 /* to avoid recursive calls to this method (via decref), move
1392 old items to the recycle bin here, and get rid of them when
1393 we're done modifying the element */
1394 recycle = PyList_New(slicelen);
1395 if (!recycle) {
1396 if (seq) {
1397 Py_DECREF(seq);
1398 }
1399 return -1;
1400 }
1401 for (cur = start, i = 0; i < slicelen;
1402 cur += step, i++)
1403 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1404 }
1405
1406 if (newlen < slicelen) {
1407 /* delete slice */
1408 for (i = stop; i < self->extra->length; i++)
1409 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1410 } else if (newlen > slicelen) {
1411 /* insert slice */
1412 for (i = self->extra->length-1; i >= stop; i--)
1413 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1414 }
1415
1416 /* replace the slice */
1417 for (cur = start, i = 0; i < newlen;
1418 cur += step, i++) {
1419 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1420 Py_INCREF(element);
1421 self->extra->children[cur] = element;
1422 }
1423
1424 self->extra->length += newlen - slicelen;
1425
1426 if (seq) {
1427 Py_DECREF(seq);
1428 }
1429
1430 /* discard the recycle bin, and everything in it */
1431 Py_XDECREF(recycle);
1432
1433 return 0;
1434 }
1435 else {
1436 PyErr_SetString(PyExc_TypeError,
1437 "element indices must be integers");
1438 return -1;
1439 }
1440}
1441
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001442static PyMethodDef element_methods[] = {
1443
1444 {"clear", (PyCFunction) element_clear, METH_VARARGS},
1445
1446 {"get", (PyCFunction) element_get, METH_VARARGS},
1447 {"set", (PyCFunction) element_set, METH_VARARGS},
1448
1449 {"find", (PyCFunction) element_find, METH_VARARGS},
1450 {"findtext", (PyCFunction) element_findtext, METH_VARARGS},
1451 {"findall", (PyCFunction) element_findall, METH_VARARGS},
1452
1453 {"append", (PyCFunction) element_append, METH_VARARGS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001454 {"extend", (PyCFunction) element_extend, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001455 {"insert", (PyCFunction) element_insert, METH_VARARGS},
1456 {"remove", (PyCFunction) element_remove, METH_VARARGS},
1457
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001458 {"iter", (PyCFunction) element_iter, METH_VARARGS},
1459 {"itertext", (PyCFunction) element_itertext, METH_VARARGS},
1460 {"iterfind", (PyCFunction) element_iterfind, METH_VARARGS},
1461
1462 {"getiterator", (PyCFunction) element_iter, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001463 {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS},
1464
1465 {"items", (PyCFunction) element_items, METH_VARARGS},
1466 {"keys", (PyCFunction) element_keys, METH_VARARGS},
1467
1468 {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS},
1469
1470 {"__copy__", (PyCFunction) element_copy, METH_VARARGS},
1471 {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS},
1472
1473 /* Some 2.3 and 2.4 versions do not handle the __copy__ method on
1474 C objects correctly, so we have to fake it using a __reduce__-
1475 based hack (see the element_reduce implementation above for
1476 details). */
1477
1478 /* The behaviour has been changed in 2.3.5 and 2.4.1, so we're
1479 using a runtime test to figure out if we need to fake things
1480 or now (see the init code below). The following entry is
1481 enabled only if the hack is needed. */
1482
1483 {"!__reduce__", (PyCFunction) element_reduce, METH_VARARGS},
1484
1485 {NULL, NULL}
1486};
1487
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001488static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001489element_getattro(ElementObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001490{
1491 PyObject* res;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001492 char *name = "";
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001493
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001494 if (PyUnicode_Check(nameobj))
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001495 name = _PyUnicode_AsString(nameobj);
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001496
Alexander Belopolskye239d232010-12-08 23:31:48 +00001497 if (name == NULL)
1498 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001499
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001500 /* handle common attributes first */
1501 if (strcmp(name, "tag") == 0) {
1502 res = self->tag;
1503 Py_INCREF(res);
1504 return res;
1505 } else if (strcmp(name, "text") == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001506 res = element_get_text(self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001507 Py_INCREF(res);
1508 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001509 }
1510
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001511 /* methods */
1512 res = PyObject_GenericGetAttr((PyObject*) self, nameobj);
1513 if (res)
1514 return res;
1515
1516 /* less common attributes */
1517 if (strcmp(name, "tail") == 0) {
1518 PyErr_Clear();
1519 res = element_get_tail(self);
1520 } else if (strcmp(name, "attrib") == 0) {
1521 PyErr_Clear();
1522 if (!self->extra)
1523 element_new_extra(self, NULL);
1524 res = element_get_attrib(self);
1525 }
1526
1527 if (!res)
1528 return NULL;
1529
1530 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001531 return res;
1532}
1533
1534static int
1535element_setattr(ElementObject* self, const char* name, PyObject* value)
1536{
1537 if (value == NULL) {
1538 PyErr_SetString(
1539 PyExc_AttributeError,
1540 "can't delete element attributes"
1541 );
1542 return -1;
1543 }
1544
1545 if (strcmp(name, "tag") == 0) {
1546 Py_DECREF(self->tag);
1547 self->tag = value;
1548 Py_INCREF(self->tag);
1549 } else if (strcmp(name, "text") == 0) {
1550 Py_DECREF(JOIN_OBJ(self->text));
1551 self->text = value;
1552 Py_INCREF(self->text);
1553 } else if (strcmp(name, "tail") == 0) {
1554 Py_DECREF(JOIN_OBJ(self->tail));
1555 self->tail = value;
1556 Py_INCREF(self->tail);
1557 } else if (strcmp(name, "attrib") == 0) {
1558 if (!self->extra)
1559 element_new_extra(self, NULL);
1560 Py_DECREF(self->extra->attrib);
1561 self->extra->attrib = value;
1562 Py_INCREF(self->extra->attrib);
1563 } else {
1564 PyErr_SetString(PyExc_AttributeError, name);
1565 return -1;
1566 }
1567
1568 return 0;
1569}
1570
1571static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001572 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001573 0, /* sq_concat */
1574 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001575 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001576 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001577 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001578 0,
1579};
1580
1581static PyMappingMethods element_as_mapping = {
1582 (lenfunc) element_length,
1583 (binaryfunc) element_subscr,
1584 (objobjargproc) element_ass_subscr,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001585};
1586
Neal Norwitz227b5332006-03-22 09:28:35 +00001587static PyTypeObject Element_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001588 PyVarObject_HEAD_INIT(NULL, 0)
1589 "Element", sizeof(ElementObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001590 /* methods */
1591 (destructor)element_dealloc, /* tp_dealloc */
1592 0, /* tp_print */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001593 0, /* tp_getattr */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001594 (setattrfunc)element_setattr, /* tp_setattr */
Mark Dickinsone94c6792009-02-02 20:36:42 +00001595 0, /* tp_reserved */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001596 (reprfunc)element_repr, /* tp_repr */
1597 0, /* tp_as_number */
1598 &element_as_sequence, /* tp_as_sequence */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001599 &element_as_mapping, /* tp_as_mapping */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001600 0, /* tp_hash */
1601 0, /* tp_call */
1602 0, /* tp_str */
1603 (getattrofunc)element_getattro, /* tp_getattro */
1604 0, /* tp_setattro */
1605 0, /* tp_as_buffer */
1606 Py_TPFLAGS_DEFAULT, /* tp_flags */
1607 0, /* tp_doc */
1608 0, /* tp_traverse */
1609 0, /* tp_clear */
1610 0, /* tp_richcompare */
1611 0, /* tp_weaklistoffset */
1612 0, /* tp_iter */
1613 0, /* tp_iternext */
1614 element_methods, /* tp_methods */
1615 0, /* tp_members */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001616};
1617
1618/* ==================================================================== */
1619/* the tree builder type */
1620
1621typedef struct {
1622 PyObject_HEAD
1623
1624 PyObject* root; /* root node (first created node) */
1625
1626 ElementObject* this; /* current node */
1627 ElementObject* last; /* most recently created node */
1628
1629 PyObject* data; /* data collector (string or list), or NULL */
1630
1631 PyObject* stack; /* element stack */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001632 Py_ssize_t index; /* current stack size (0=empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001633
1634 /* element tracing */
1635 PyObject* events; /* list of events, or NULL if not collecting */
1636 PyObject* start_event_obj; /* event objects (NULL to ignore) */
1637 PyObject* end_event_obj;
1638 PyObject* start_ns_event_obj;
1639 PyObject* end_ns_event_obj;
1640
1641} TreeBuilderObject;
1642
Neal Norwitz227b5332006-03-22 09:28:35 +00001643static PyTypeObject TreeBuilder_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001644
Christian Heimes90aa7642007-12-19 02:45:37 +00001645#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001646
1647/* -------------------------------------------------------------------- */
1648/* constructor and destructor */
1649
1650LOCAL(PyObject*)
1651treebuilder_new(void)
1652{
1653 TreeBuilderObject* self;
1654
1655 self = PyObject_New(TreeBuilderObject, &TreeBuilder_Type);
1656 if (self == NULL)
1657 return NULL;
1658
1659 self->root = NULL;
1660
1661 Py_INCREF(Py_None);
1662 self->this = (ElementObject*) Py_None;
1663
1664 Py_INCREF(Py_None);
1665 self->last = (ElementObject*) Py_None;
1666
1667 self->data = NULL;
1668
1669 self->stack = PyList_New(20);
1670 self->index = 0;
1671
1672 self->events = NULL;
1673 self->start_event_obj = self->end_event_obj = NULL;
1674 self->start_ns_event_obj = self->end_ns_event_obj = NULL;
1675
1676 ALLOC(sizeof(TreeBuilderObject), "create treebuilder");
1677
1678 return (PyObject*) self;
1679}
1680
1681static PyObject*
Thomas Wouters73e5a5b2006-06-08 15:35:45 +00001682treebuilder(PyObject* self_, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001683{
1684 if (!PyArg_ParseTuple(args, ":TreeBuilder"))
1685 return NULL;
1686
1687 return treebuilder_new();
1688}
1689
1690static void
1691treebuilder_dealloc(TreeBuilderObject* self)
1692{
1693 Py_XDECREF(self->end_ns_event_obj);
1694 Py_XDECREF(self->start_ns_event_obj);
1695 Py_XDECREF(self->end_event_obj);
1696 Py_XDECREF(self->start_event_obj);
1697 Py_XDECREF(self->events);
1698 Py_DECREF(self->stack);
1699 Py_XDECREF(self->data);
1700 Py_DECREF(self->last);
1701 Py_DECREF(self->this);
1702 Py_XDECREF(self->root);
1703
1704 RELEASE(sizeof(TreeBuilderObject), "destroy treebuilder");
1705
1706 PyObject_Del(self);
1707}
1708
1709/* -------------------------------------------------------------------- */
1710/* handlers */
1711
1712LOCAL(PyObject*)
1713treebuilder_handle_xml(TreeBuilderObject* self, PyObject* encoding,
1714 PyObject* standalone)
1715{
1716 Py_RETURN_NONE;
1717}
1718
1719LOCAL(PyObject*)
1720treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
1721 PyObject* attrib)
1722{
1723 PyObject* node;
1724 PyObject* this;
1725
1726 if (self->data) {
1727 if (self->this == self->last) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001728 Py_DECREF(JOIN_OBJ(self->last->text));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001729 self->last->text = JOIN_SET(
1730 self->data, PyList_CheckExact(self->data)
1731 );
1732 } else {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001733 Py_DECREF(JOIN_OBJ(self->last->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001734 self->last->tail = JOIN_SET(
1735 self->data, PyList_CheckExact(self->data)
1736 );
1737 }
1738 self->data = NULL;
1739 }
1740
1741 node = element_new(tag, attrib);
1742 if (!node)
1743 return NULL;
1744
1745 this = (PyObject*) self->this;
1746
1747 if (this != Py_None) {
1748 if (element_add_subelement((ElementObject*) this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001749 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001750 } else {
1751 if (self->root) {
1752 PyErr_SetString(
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001753 elementtree_parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001754 "multiple elements on top level"
1755 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001756 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001757 }
1758 Py_INCREF(node);
1759 self->root = node;
1760 }
1761
1762 if (self->index < PyList_GET_SIZE(self->stack)) {
1763 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001764 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001765 Py_INCREF(this);
1766 } else {
1767 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001768 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001769 }
1770 self->index++;
1771
1772 Py_DECREF(this);
1773 Py_INCREF(node);
1774 self->this = (ElementObject*) node;
1775
1776 Py_DECREF(self->last);
1777 Py_INCREF(node);
1778 self->last = (ElementObject*) node;
1779
1780 if (self->start_event_obj) {
1781 PyObject* res;
1782 PyObject* action = self->start_event_obj;
1783 res = PyTuple_New(2);
1784 if (res) {
1785 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
1786 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
1787 PyList_Append(self->events, res);
1788 Py_DECREF(res);
1789 } else
1790 PyErr_Clear(); /* FIXME: propagate error */
1791 }
1792
1793 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001794
1795 error:
1796 Py_DECREF(node);
1797 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001798}
1799
1800LOCAL(PyObject*)
1801treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
1802{
1803 if (!self->data) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001804 if (self->last == (ElementObject*) Py_None) {
1805 /* ignore calls to data before the first call to start */
1806 Py_RETURN_NONE;
1807 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001808 /* store the first item as is */
1809 Py_INCREF(data); self->data = data;
1810 } else {
1811 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00001812 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
1813 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001814 /* expat often generates single character data sections; handle
1815 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00001816 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
1817 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001818 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00001819 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001820 } else if (PyList_CheckExact(self->data)) {
1821 if (PyList_Append(self->data, data) < 0)
1822 return NULL;
1823 } else {
1824 PyObject* list = PyList_New(2);
1825 if (!list)
1826 return NULL;
1827 PyList_SET_ITEM(list, 0, self->data);
1828 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
1829 self->data = list;
1830 }
1831 }
1832
1833 Py_RETURN_NONE;
1834}
1835
1836LOCAL(PyObject*)
1837treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
1838{
1839 PyObject* item;
1840
1841 if (self->data) {
1842 if (self->this == self->last) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001843 Py_DECREF(JOIN_OBJ(self->last->text));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001844 self->last->text = JOIN_SET(
1845 self->data, PyList_CheckExact(self->data)
1846 );
1847 } else {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001848 Py_DECREF(JOIN_OBJ(self->last->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001849 self->last->tail = JOIN_SET(
1850 self->data, PyList_CheckExact(self->data)
1851 );
1852 }
1853 self->data = NULL;
1854 }
1855
1856 if (self->index == 0) {
1857 PyErr_SetString(
1858 PyExc_IndexError,
1859 "pop from empty stack"
1860 );
1861 return NULL;
1862 }
1863
1864 self->index--;
1865
1866 item = PyList_GET_ITEM(self->stack, self->index);
1867 Py_INCREF(item);
1868
1869 Py_DECREF(self->last);
1870
1871 self->last = (ElementObject*) self->this;
1872 self->this = (ElementObject*) item;
1873
1874 if (self->end_event_obj) {
1875 PyObject* res;
1876 PyObject* action = self->end_event_obj;
1877 PyObject* node = (PyObject*) self->last;
1878 res = PyTuple_New(2);
1879 if (res) {
1880 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
1881 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
1882 PyList_Append(self->events, res);
1883 Py_DECREF(res);
1884 } else
1885 PyErr_Clear(); /* FIXME: propagate error */
1886 }
1887
1888 Py_INCREF(self->last);
1889 return (PyObject*) self->last;
1890}
1891
1892LOCAL(void)
1893treebuilder_handle_namespace(TreeBuilderObject* self, int start,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001894 PyObject *prefix, PyObject *uri)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001895{
1896 PyObject* res;
1897 PyObject* action;
1898 PyObject* parcel;
1899
1900 if (!self->events)
1901 return;
1902
1903 if (start) {
1904 if (!self->start_ns_event_obj)
1905 return;
1906 action = self->start_ns_event_obj;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001907 parcel = Py_BuildValue("OO", prefix, uri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001908 if (!parcel)
1909 return;
1910 Py_INCREF(action);
1911 } else {
1912 if (!self->end_ns_event_obj)
1913 return;
1914 action = self->end_ns_event_obj;
1915 Py_INCREF(action);
1916 parcel = Py_None;
1917 Py_INCREF(parcel);
1918 }
1919
1920 res = PyTuple_New(2);
1921
1922 if (res) {
1923 PyTuple_SET_ITEM(res, 0, action);
1924 PyTuple_SET_ITEM(res, 1, parcel);
1925 PyList_Append(self->events, res);
1926 Py_DECREF(res);
1927 } else
1928 PyErr_Clear(); /* FIXME: propagate error */
1929}
1930
1931/* -------------------------------------------------------------------- */
1932/* methods (in alphabetical order) */
1933
1934static PyObject*
1935treebuilder_data(TreeBuilderObject* self, PyObject* args)
1936{
1937 PyObject* data;
1938 if (!PyArg_ParseTuple(args, "O:data", &data))
1939 return NULL;
1940
1941 return treebuilder_handle_data(self, data);
1942}
1943
1944static PyObject*
1945treebuilder_end(TreeBuilderObject* self, PyObject* args)
1946{
1947 PyObject* tag;
1948 if (!PyArg_ParseTuple(args, "O:end", &tag))
1949 return NULL;
1950
1951 return treebuilder_handle_end(self, tag);
1952}
1953
1954LOCAL(PyObject*)
1955treebuilder_done(TreeBuilderObject* self)
1956{
1957 PyObject* res;
1958
1959 /* FIXME: check stack size? */
1960
1961 if (self->root)
1962 res = self->root;
1963 else
1964 res = Py_None;
1965
1966 Py_INCREF(res);
1967 return res;
1968}
1969
1970static PyObject*
1971treebuilder_close(TreeBuilderObject* self, PyObject* args)
1972{
1973 if (!PyArg_ParseTuple(args, ":close"))
1974 return NULL;
1975
1976 return treebuilder_done(self);
1977}
1978
1979static PyObject*
1980treebuilder_start(TreeBuilderObject* self, PyObject* args)
1981{
1982 PyObject* tag;
1983 PyObject* attrib = Py_None;
1984 if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib))
1985 return NULL;
1986
1987 return treebuilder_handle_start(self, tag, attrib);
1988}
1989
1990static PyObject*
1991treebuilder_xml(TreeBuilderObject* self, PyObject* args)
1992{
1993 PyObject* encoding;
1994 PyObject* standalone;
1995 if (!PyArg_ParseTuple(args, "OO:xml", &encoding, &standalone))
1996 return NULL;
1997
1998 return treebuilder_handle_xml(self, encoding, standalone);
1999}
2000
2001static PyMethodDef treebuilder_methods[] = {
2002 {"data", (PyCFunction) treebuilder_data, METH_VARARGS},
2003 {"start", (PyCFunction) treebuilder_start, METH_VARARGS},
2004 {"end", (PyCFunction) treebuilder_end, METH_VARARGS},
2005 {"xml", (PyCFunction) treebuilder_xml, METH_VARARGS},
2006 {"close", (PyCFunction) treebuilder_close, METH_VARARGS},
2007 {NULL, NULL}
2008};
2009
Neal Norwitz227b5332006-03-22 09:28:35 +00002010static PyTypeObject TreeBuilder_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002011 PyVarObject_HEAD_INIT(NULL, 0)
2012 "TreeBuilder", sizeof(TreeBuilderObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002013 /* methods */
2014 (destructor)treebuilder_dealloc, /* tp_dealloc */
2015 0, /* tp_print */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002016 0, /* tp_getattr */
2017 0, /* tp_setattr */
Mark Dickinsone94c6792009-02-02 20:36:42 +00002018 0, /* tp_reserved */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002019 0, /* tp_repr */
2020 0, /* tp_as_number */
2021 0, /* tp_as_sequence */
2022 0, /* tp_as_mapping */
2023 0, /* tp_hash */
2024 0, /* tp_call */
2025 0, /* tp_str */
2026 0, /* tp_getattro */
2027 0, /* tp_setattro */
2028 0, /* tp_as_buffer */
2029 Py_TPFLAGS_DEFAULT, /* tp_flags */
2030 0, /* tp_doc */
2031 0, /* tp_traverse */
2032 0, /* tp_clear */
2033 0, /* tp_richcompare */
2034 0, /* tp_weaklistoffset */
2035 0, /* tp_iter */
2036 0, /* tp_iternext */
2037 treebuilder_methods, /* tp_methods */
2038 0, /* tp_members */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002039};
2040
2041/* ==================================================================== */
2042/* the expat interface */
2043
2044#if defined(USE_EXPAT)
2045
2046#include "expat.h"
2047
2048#if defined(USE_PYEXPAT_CAPI)
2049#include "pyexpat.h"
2050static struct PyExpat_CAPI* expat_capi;
2051#define EXPAT(func) (expat_capi->func)
2052#else
2053#define EXPAT(func) (XML_##func)
2054#endif
2055
2056typedef struct {
2057 PyObject_HEAD
2058
2059 XML_Parser parser;
2060
2061 PyObject* target;
2062 PyObject* entity;
2063
2064 PyObject* names;
2065
2066 PyObject* handle_xml;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002067
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002068 PyObject* handle_start;
2069 PyObject* handle_data;
2070 PyObject* handle_end;
2071
2072 PyObject* handle_comment;
2073 PyObject* handle_pi;
2074
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002075 PyObject* handle_close;
2076
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002077} XMLParserObject;
2078
Neal Norwitz227b5332006-03-22 09:28:35 +00002079static PyTypeObject XMLParser_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002080
2081/* helpers */
2082
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002083LOCAL(PyObject*)
2084makeuniversal(XMLParserObject* self, const char* string)
2085{
2086 /* convert a UTF-8 tag/attribute name from the expat parser
2087 to a universal name string */
2088
2089 int size = strlen(string);
2090 PyObject* key;
2091 PyObject* value;
2092
2093 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00002094 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002095 if (!key)
2096 return NULL;
2097
2098 value = PyDict_GetItem(self->names, key);
2099
2100 if (value) {
2101 Py_INCREF(value);
2102 } else {
2103 /* new name. convert to universal name, and decode as
2104 necessary */
2105
2106 PyObject* tag;
2107 char* p;
2108 int i;
2109
2110 /* look for namespace separator */
2111 for (i = 0; i < size; i++)
2112 if (string[i] == '}')
2113 break;
2114 if (i != size) {
2115 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002116 tag = PyBytes_FromStringAndSize(NULL, size+1);
2117 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002118 p[0] = '{';
2119 memcpy(p+1, string, size);
2120 size++;
2121 } else {
2122 /* plain name; use key as tag */
2123 Py_INCREF(key);
2124 tag = key;
2125 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002126
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002127 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002128 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00002129 value = PyUnicode_DecodeUTF8(p, size, "strict");
2130 Py_DECREF(tag);
2131 if (!value) {
2132 Py_DECREF(key);
2133 return NULL;
2134 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002135
2136 /* add to names dictionary */
2137 if (PyDict_SetItem(self->names, key, value) < 0) {
2138 Py_DECREF(key);
2139 Py_DECREF(value);
2140 return NULL;
2141 }
2142 }
2143
2144 Py_DECREF(key);
2145 return value;
2146}
2147
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002148static void
2149expat_set_error(const char* message, int line, int column)
2150{
Victor Stinner499dfcf2011-03-21 13:26:24 +01002151 PyObject *errmsg, *error, *position;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002152
Victor Stinner499dfcf2011-03-21 13:26:24 +01002153 errmsg = PyUnicode_FromFormat("%s: line %d, column %d",
2154 message, line, column);
2155 if (errmsg == NULL)
2156 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002157
Victor Stinner499dfcf2011-03-21 13:26:24 +01002158 error = PyObject_CallFunction(elementtree_parseerror_obj, "O", errmsg);
2159 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002160 if (!error)
2161 return;
2162
2163 /* add position attribute */
2164 position = Py_BuildValue("(ii)", line, column);
2165 if (!position) {
2166 Py_DECREF(error);
2167 return;
2168 }
2169 if (PyObject_SetAttrString(error, "position", position) == -1) {
2170 Py_DECREF(error);
2171 Py_DECREF(position);
2172 return;
2173 }
2174 Py_DECREF(position);
2175
2176 PyErr_SetObject(elementtree_parseerror_obj, error);
2177 Py_DECREF(error);
2178}
2179
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002180/* -------------------------------------------------------------------- */
2181/* handlers */
2182
2183static void
2184expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2185 int data_len)
2186{
2187 PyObject* key;
2188 PyObject* value;
2189 PyObject* res;
2190
2191 if (data_len < 2 || data_in[0] != '&')
2192 return;
2193
Neal Norwitz0269b912007-08-08 06:56:02 +00002194 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002195 if (!key)
2196 return;
2197
2198 value = PyDict_GetItem(self->entity, key);
2199
2200 if (value) {
2201 if (TreeBuilder_CheckExact(self->target))
2202 res = treebuilder_handle_data(
2203 (TreeBuilderObject*) self->target, value
2204 );
2205 else if (self->handle_data)
2206 res = PyObject_CallFunction(self->handle_data, "O", value);
2207 else
2208 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002209 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002210 } else if (!PyErr_Occurred()) {
2211 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00002212 char message[128] = "undefined entity ";
2213 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002214 expat_set_error(
2215 message,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002216 EXPAT(GetErrorLineNumber)(self->parser),
2217 EXPAT(GetErrorColumnNumber)(self->parser)
2218 );
2219 }
2220
2221 Py_DECREF(key);
2222}
2223
2224static void
2225expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2226 const XML_Char **attrib_in)
2227{
2228 PyObject* res;
2229 PyObject* tag;
2230 PyObject* attrib;
2231 int ok;
2232
2233 /* tag name */
2234 tag = makeuniversal(self, tag_in);
2235 if (!tag)
2236 return; /* parser will look for errors */
2237
2238 /* attributes */
2239 if (attrib_in[0]) {
2240 attrib = PyDict_New();
2241 if (!attrib)
2242 return;
2243 while (attrib_in[0] && attrib_in[1]) {
2244 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00002245 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002246 if (!key || !value) {
2247 Py_XDECREF(value);
2248 Py_XDECREF(key);
2249 Py_DECREF(attrib);
2250 return;
2251 }
2252 ok = PyDict_SetItem(attrib, key, value);
2253 Py_DECREF(value);
2254 Py_DECREF(key);
2255 if (ok < 0) {
2256 Py_DECREF(attrib);
2257 return;
2258 }
2259 attrib_in += 2;
2260 }
2261 } else {
2262 Py_INCREF(Py_None);
2263 attrib = Py_None;
2264 }
2265
2266 if (TreeBuilder_CheckExact(self->target))
2267 /* shortcut */
2268 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2269 tag, attrib);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002270 else if (self->handle_start) {
2271 if (attrib == Py_None) {
2272 Py_DECREF(attrib);
2273 attrib = PyDict_New();
2274 if (!attrib)
2275 return;
2276 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002277 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002278 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002279 res = NULL;
2280
2281 Py_DECREF(tag);
2282 Py_DECREF(attrib);
2283
2284 Py_XDECREF(res);
2285}
2286
2287static void
2288expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2289 int data_len)
2290{
2291 PyObject* data;
2292 PyObject* res;
2293
Neal Norwitz0269b912007-08-08 06:56:02 +00002294 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002295 if (!data)
2296 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002297
2298 if (TreeBuilder_CheckExact(self->target))
2299 /* shortcut */
2300 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
2301 else if (self->handle_data)
2302 res = PyObject_CallFunction(self->handle_data, "O", data);
2303 else
2304 res = NULL;
2305
2306 Py_DECREF(data);
2307
2308 Py_XDECREF(res);
2309}
2310
2311static void
2312expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
2313{
2314 PyObject* tag;
2315 PyObject* res = NULL;
2316
2317 if (TreeBuilder_CheckExact(self->target))
2318 /* shortcut */
2319 /* the standard tree builder doesn't look at the end tag */
2320 res = treebuilder_handle_end(
2321 (TreeBuilderObject*) self->target, Py_None
2322 );
2323 else if (self->handle_end) {
2324 tag = makeuniversal(self, tag_in);
2325 if (tag) {
2326 res = PyObject_CallFunction(self->handle_end, "O", tag);
2327 Py_DECREF(tag);
2328 }
2329 }
2330
2331 Py_XDECREF(res);
2332}
2333
2334static void
2335expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
2336 const XML_Char *uri)
2337{
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002338 PyObject* sprefix = NULL;
2339 PyObject* suri = NULL;
2340
2341 suri = PyUnicode_DecodeUTF8(uri, strlen(uri), "strict");
2342 if (!suri)
2343 return;
2344
2345 if (prefix)
2346 sprefix = PyUnicode_DecodeUTF8(prefix, strlen(prefix), "strict");
2347 else
2348 sprefix = PyUnicode_FromString("");
2349 if (!sprefix) {
2350 Py_DECREF(suri);
2351 return;
2352 }
2353
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002354 treebuilder_handle_namespace(
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002355 (TreeBuilderObject*) self->target, 1, sprefix, suri
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002356 );
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002357
2358 Py_DECREF(sprefix);
2359 Py_DECREF(suri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002360}
2361
2362static void
2363expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
2364{
2365 treebuilder_handle_namespace(
2366 (TreeBuilderObject*) self->target, 0, NULL, NULL
2367 );
2368}
2369
2370static void
2371expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
2372{
2373 PyObject* comment;
2374 PyObject* res;
2375
2376 if (self->handle_comment) {
Neal Norwitz0269b912007-08-08 06:56:02 +00002377 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002378 if (comment) {
2379 res = PyObject_CallFunction(self->handle_comment, "O", comment);
2380 Py_XDECREF(res);
2381 Py_DECREF(comment);
2382 }
2383 }
2384}
2385
2386static void
2387expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
2388 const XML_Char* data_in)
2389{
2390 PyObject* target;
2391 PyObject* data;
2392 PyObject* res;
2393
2394 if (self->handle_pi) {
Neal Norwitz0269b912007-08-08 06:56:02 +00002395 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
2396 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002397 if (target && data) {
2398 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
2399 Py_XDECREF(res);
2400 Py_DECREF(data);
2401 Py_DECREF(target);
2402 } else {
2403 Py_XDECREF(data);
2404 Py_XDECREF(target);
2405 }
2406 }
2407}
2408
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002409static int
2410expat_unknown_encoding_handler(XMLParserObject *self, const XML_Char *name,
2411 XML_Encoding *info)
2412{
2413 PyObject* u;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002414 unsigned char s[256];
2415 int i;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002416 void *data;
2417 unsigned int kind;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002418
2419 memset(info, 0, sizeof(XML_Encoding));
2420
2421 for (i = 0; i < 256; i++)
2422 s[i] = i;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002423
Fredrik Lundhc3389992005-12-25 11:40:19 +00002424 u = PyUnicode_Decode((char*) s, 256, name, "replace");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002425 if (!u)
2426 return XML_STATUS_ERROR;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002427 if (PyUnicode_READY(u))
2428 return XML_STATUS_ERROR;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002429
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002430 if (PyUnicode_GET_LENGTH(u) != 256) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002431 Py_DECREF(u);
2432 return XML_STATUS_ERROR;
2433 }
2434
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002435 kind = PyUnicode_KIND(u);
2436 data = PyUnicode_DATA(u);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002437 for (i = 0; i < 256; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002438 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
2439 if (ch != Py_UNICODE_REPLACEMENT_CHARACTER)
2440 info->map[i] = ch;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002441 else
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002442 info->map[i] = -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002443 }
2444
2445 Py_DECREF(u);
2446
2447 return XML_STATUS_OK;
2448}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002449
2450/* -------------------------------------------------------------------- */
2451/* constructor and destructor */
2452
2453static PyObject*
Thomas Wouters73e5a5b2006-06-08 15:35:45 +00002454xmlparser(PyObject* self_, PyObject* args, PyObject* kw)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002455{
2456 XMLParserObject* self;
2457 /* FIXME: does this need to be static? */
2458 static XML_Memory_Handling_Suite memory_handler;
2459
2460 PyObject* target = NULL;
2461 char* encoding = NULL;
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +00002462 static char* kwlist[] = { "target", "encoding", NULL };
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002463 if (!PyArg_ParseTupleAndKeywords(args, kw, "|Oz:XMLParser", kwlist,
2464 &target, &encoding))
2465 return NULL;
2466
2467#if defined(USE_PYEXPAT_CAPI)
2468 if (!expat_capi) {
2469 PyErr_SetString(
2470 PyExc_RuntimeError, "cannot load dispatch table from pyexpat"
2471 );
2472 return NULL;
2473 }
2474#endif
2475
2476 self = PyObject_New(XMLParserObject, &XMLParser_Type);
2477 if (self == NULL)
2478 return NULL;
2479
2480 self->entity = PyDict_New();
2481 if (!self->entity) {
2482 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002483 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002484 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002485
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002486 self->names = PyDict_New();
2487 if (!self->names) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002488 PyObject_Del(self->entity);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002489 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002490 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002491 }
2492
2493 memory_handler.malloc_fcn = PyObject_Malloc;
2494 memory_handler.realloc_fcn = PyObject_Realloc;
2495 memory_handler.free_fcn = PyObject_Free;
2496
2497 self->parser = EXPAT(ParserCreate_MM)(encoding, &memory_handler, "}");
2498 if (!self->parser) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002499 PyObject_Del(self->names);
2500 PyObject_Del(self->entity);
2501 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002502 PyErr_NoMemory();
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002503 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002504 }
2505
2506 /* setup target handlers */
2507 if (!target) {
2508 target = treebuilder_new();
2509 if (!target) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002510 EXPAT(ParserFree)(self->parser);
2511 PyObject_Del(self->names);
2512 PyObject_Del(self->entity);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002513 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002514 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002515 }
2516 } else
2517 Py_INCREF(target);
2518 self->target = target;
2519
2520 self->handle_xml = PyObject_GetAttrString(target, "xml");
2521 self->handle_start = PyObject_GetAttrString(target, "start");
2522 self->handle_data = PyObject_GetAttrString(target, "data");
2523 self->handle_end = PyObject_GetAttrString(target, "end");
2524 self->handle_comment = PyObject_GetAttrString(target, "comment");
2525 self->handle_pi = PyObject_GetAttrString(target, "pi");
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002526 self->handle_close = PyObject_GetAttrString(target, "close");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002527
2528 PyErr_Clear();
2529
2530 /* configure parser */
2531 EXPAT(SetUserData)(self->parser, self);
2532 EXPAT(SetElementHandler)(
2533 self->parser,
2534 (XML_StartElementHandler) expat_start_handler,
2535 (XML_EndElementHandler) expat_end_handler
2536 );
2537 EXPAT(SetDefaultHandlerExpand)(
2538 self->parser,
2539 (XML_DefaultHandler) expat_default_handler
2540 );
2541 EXPAT(SetCharacterDataHandler)(
2542 self->parser,
2543 (XML_CharacterDataHandler) expat_data_handler
2544 );
2545 if (self->handle_comment)
2546 EXPAT(SetCommentHandler)(
2547 self->parser,
2548 (XML_CommentHandler) expat_comment_handler
2549 );
2550 if (self->handle_pi)
2551 EXPAT(SetProcessingInstructionHandler)(
2552 self->parser,
2553 (XML_ProcessingInstructionHandler) expat_pi_handler
2554 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002555 EXPAT(SetUnknownEncodingHandler)(
2556 self->parser,
2557 (XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL
2558 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002559
2560 ALLOC(sizeof(XMLParserObject), "create expatparser");
2561
2562 return (PyObject*) self;
2563}
2564
2565static void
2566xmlparser_dealloc(XMLParserObject* self)
2567{
2568 EXPAT(ParserFree)(self->parser);
2569
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002570 Py_XDECREF(self->handle_close);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002571 Py_XDECREF(self->handle_pi);
2572 Py_XDECREF(self->handle_comment);
2573 Py_XDECREF(self->handle_end);
2574 Py_XDECREF(self->handle_data);
2575 Py_XDECREF(self->handle_start);
2576 Py_XDECREF(self->handle_xml);
2577
2578 Py_DECREF(self->target);
2579 Py_DECREF(self->entity);
2580 Py_DECREF(self->names);
2581
2582 RELEASE(sizeof(XMLParserObject), "destroy expatparser");
2583
2584 PyObject_Del(self);
2585}
2586
2587/* -------------------------------------------------------------------- */
2588/* methods (in alphabetical order) */
2589
2590LOCAL(PyObject*)
2591expat_parse(XMLParserObject* self, char* data, int data_len, int final)
2592{
2593 int ok;
2594
2595 ok = EXPAT(Parse)(self->parser, data, data_len, final);
2596
2597 if (PyErr_Occurred())
2598 return NULL;
2599
2600 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002601 expat_set_error(
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002602 EXPAT(ErrorString)(EXPAT(GetErrorCode)(self->parser)),
2603 EXPAT(GetErrorLineNumber)(self->parser),
2604 EXPAT(GetErrorColumnNumber)(self->parser)
2605 );
2606 return NULL;
2607 }
2608
2609 Py_RETURN_NONE;
2610}
2611
2612static PyObject*
2613xmlparser_close(XMLParserObject* self, PyObject* args)
2614{
2615 /* end feeding data to parser */
2616
2617 PyObject* res;
2618 if (!PyArg_ParseTuple(args, ":close"))
2619 return NULL;
2620
2621 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002622 if (!res)
2623 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002624
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002625 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002626 Py_DECREF(res);
2627 return treebuilder_done((TreeBuilderObject*) self->target);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002628 } if (self->handle_close) {
2629 Py_DECREF(res);
2630 return PyObject_CallFunction(self->handle_close, "");
2631 } else
2632 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002633}
2634
2635static PyObject*
2636xmlparser_feed(XMLParserObject* self, PyObject* args)
2637{
2638 /* feed data to parser */
2639
2640 char* data;
2641 int data_len;
2642 if (!PyArg_ParseTuple(args, "s#:feed", &data, &data_len))
2643 return NULL;
2644
2645 return expat_parse(self, data, data_len, 0);
2646}
2647
2648static PyObject*
2649xmlparser_parse(XMLParserObject* self, PyObject* args)
2650{
2651 /* (internal) parse until end of input stream */
2652
2653 PyObject* reader;
2654 PyObject* buffer;
2655 PyObject* res;
2656
2657 PyObject* fileobj;
2658 if (!PyArg_ParseTuple(args, "O:_parse", &fileobj))
2659 return NULL;
2660
2661 reader = PyObject_GetAttrString(fileobj, "read");
2662 if (!reader)
2663 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002664
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002665 /* read from open file object */
2666 for (;;) {
2667
2668 buffer = PyObject_CallFunction(reader, "i", 64*1024);
2669
2670 if (!buffer) {
2671 /* read failed (e.g. due to KeyboardInterrupt) */
2672 Py_DECREF(reader);
2673 return NULL;
2674 }
2675
Christian Heimes72b710a2008-05-26 13:28:38 +00002676 if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002677 Py_DECREF(buffer);
2678 break;
2679 }
2680
2681 res = expat_parse(
Christian Heimes72b710a2008-05-26 13:28:38 +00002682 self, PyBytes_AS_STRING(buffer), PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002683 );
2684
2685 Py_DECREF(buffer);
2686
2687 if (!res) {
2688 Py_DECREF(reader);
2689 return NULL;
2690 }
2691 Py_DECREF(res);
2692
2693 }
2694
2695 Py_DECREF(reader);
2696
2697 res = expat_parse(self, "", 0, 1);
2698
2699 if (res && TreeBuilder_CheckExact(self->target)) {
2700 Py_DECREF(res);
2701 return treebuilder_done((TreeBuilderObject*) self->target);
2702 }
2703
2704 return res;
2705}
2706
2707static PyObject*
2708xmlparser_setevents(XMLParserObject* self, PyObject* args)
2709{
2710 /* activate element event reporting */
2711
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002712 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002713 TreeBuilderObject* target;
2714
2715 PyObject* events; /* event collector */
2716 PyObject* event_set = Py_None;
2717 if (!PyArg_ParseTuple(args, "O!|O:_setevents", &PyList_Type, &events,
2718 &event_set))
2719 return NULL;
2720
2721 if (!TreeBuilder_CheckExact(self->target)) {
2722 PyErr_SetString(
2723 PyExc_TypeError,
2724 "event handling only supported for cElementTree.Treebuilder "
2725 "targets"
2726 );
2727 return NULL;
2728 }
2729
2730 target = (TreeBuilderObject*) self->target;
2731
2732 Py_INCREF(events);
2733 Py_XDECREF(target->events);
2734 target->events = events;
2735
2736 /* clear out existing events */
2737 Py_XDECREF(target->start_event_obj); target->start_event_obj = NULL;
2738 Py_XDECREF(target->end_event_obj); target->end_event_obj = NULL;
2739 Py_XDECREF(target->start_ns_event_obj); target->start_ns_event_obj = NULL;
2740 Py_XDECREF(target->end_ns_event_obj); target->end_ns_event_obj = NULL;
2741
2742 if (event_set == Py_None) {
2743 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002744 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002745 Py_RETURN_NONE;
2746 }
2747
2748 if (!PyTuple_Check(event_set)) /* FIXME: handle arbitrary sequences */
2749 goto error;
2750
2751 for (i = 0; i < PyTuple_GET_SIZE(event_set); i++) {
2752 PyObject* item = PyTuple_GET_ITEM(event_set, i);
2753 char* event;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002754 if (PyUnicode_Check(item)) {
2755 event = _PyUnicode_AsString(item);
Victor Stinner0477bf32010-03-22 12:11:44 +00002756 if (event == NULL)
2757 goto error;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002758 } else if (PyBytes_Check(item))
2759 event = PyBytes_AS_STRING(item);
2760 else {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002761 goto error;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002762 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002763 if (strcmp(event, "start") == 0) {
2764 Py_INCREF(item);
2765 target->start_event_obj = item;
2766 } else if (strcmp(event, "end") == 0) {
2767 Py_INCREF(item);
2768 Py_XDECREF(target->end_event_obj);
2769 target->end_event_obj = item;
2770 } else if (strcmp(event, "start-ns") == 0) {
2771 Py_INCREF(item);
2772 Py_XDECREF(target->start_ns_event_obj);
2773 target->start_ns_event_obj = item;
2774 EXPAT(SetNamespaceDeclHandler)(
2775 self->parser,
2776 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2777 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2778 );
2779 } else if (strcmp(event, "end-ns") == 0) {
2780 Py_INCREF(item);
2781 Py_XDECREF(target->end_ns_event_obj);
2782 target->end_ns_event_obj = item;
2783 EXPAT(SetNamespaceDeclHandler)(
2784 self->parser,
2785 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2786 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2787 );
2788 } else {
2789 PyErr_Format(
2790 PyExc_ValueError,
2791 "unknown event '%s'", event
2792 );
2793 return NULL;
2794 }
2795 }
2796
2797 Py_RETURN_NONE;
2798
2799 error:
2800 PyErr_SetString(
2801 PyExc_TypeError,
2802 "invalid event tuple"
2803 );
2804 return NULL;
2805}
2806
2807static PyMethodDef xmlparser_methods[] = {
2808 {"feed", (PyCFunction) xmlparser_feed, METH_VARARGS},
2809 {"close", (PyCFunction) xmlparser_close, METH_VARARGS},
2810 {"_parse", (PyCFunction) xmlparser_parse, METH_VARARGS},
2811 {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
2812 {NULL, NULL}
2813};
2814
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002815static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002816xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002817{
Alexander Belopolskye239d232010-12-08 23:31:48 +00002818 if (PyUnicode_Check(nameobj)) {
2819 PyObject* res;
2820 if (PyUnicode_CompareWithASCIIString(nameobj, "entity") == 0)
2821 res = self->entity;
2822 else if (PyUnicode_CompareWithASCIIString(nameobj, "target") == 0)
2823 res = self->target;
2824 else if (PyUnicode_CompareWithASCIIString(nameobj, "version") == 0) {
2825 return PyUnicode_FromFormat(
2826 "Expat %d.%d.%d", XML_MAJOR_VERSION,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002827 XML_MINOR_VERSION, XML_MICRO_VERSION);
Alexander Belopolskye239d232010-12-08 23:31:48 +00002828 }
2829 else
2830 goto generic;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002831
Alexander Belopolskye239d232010-12-08 23:31:48 +00002832 Py_INCREF(res);
2833 return res;
2834 }
2835 generic:
2836 return PyObject_GenericGetAttr((PyObject*) self, nameobj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002837}
2838
Neal Norwitz227b5332006-03-22 09:28:35 +00002839static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002840 PyVarObject_HEAD_INIT(NULL, 0)
2841 "XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002842 /* methods */
2843 (destructor)xmlparser_dealloc, /* tp_dealloc */
2844 0, /* tp_print */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002845 0, /* tp_getattr */
2846 0, /* tp_setattr */
Mark Dickinsone94c6792009-02-02 20:36:42 +00002847 0, /* tp_reserved */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002848 0, /* tp_repr */
2849 0, /* tp_as_number */
2850 0, /* tp_as_sequence */
2851 0, /* tp_as_mapping */
2852 0, /* tp_hash */
2853 0, /* tp_call */
2854 0, /* tp_str */
2855 (getattrofunc)xmlparser_getattro, /* tp_getattro */
2856 0, /* tp_setattro */
2857 0, /* tp_as_buffer */
2858 Py_TPFLAGS_DEFAULT, /* tp_flags */
2859 0, /* tp_doc */
2860 0, /* tp_traverse */
2861 0, /* tp_clear */
2862 0, /* tp_richcompare */
2863 0, /* tp_weaklistoffset */
2864 0, /* tp_iter */
2865 0, /* tp_iternext */
2866 xmlparser_methods, /* tp_methods */
2867 0, /* tp_members */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002868};
2869
2870#endif
2871
2872/* ==================================================================== */
2873/* python module interface */
2874
2875static PyMethodDef _functions[] = {
2876 {"Element", (PyCFunction) element, METH_VARARGS|METH_KEYWORDS},
2877 {"SubElement", (PyCFunction) subelement, METH_VARARGS|METH_KEYWORDS},
2878 {"TreeBuilder", (PyCFunction) treebuilder, METH_VARARGS},
2879#if defined(USE_EXPAT)
2880 {"XMLParser", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
2881 {"XMLTreeBuilder", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
2882#endif
2883 {NULL, NULL}
2884};
2885
Martin v. Löwis1a214512008-06-11 05:26:20 +00002886
2887static struct PyModuleDef _elementtreemodule = {
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00002888 PyModuleDef_HEAD_INIT,
2889 "_elementtree",
2890 NULL,
2891 -1,
2892 _functions,
2893 NULL,
2894 NULL,
2895 NULL,
2896 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00002897};
2898
Neal Norwitzf6657e62006-12-28 04:47:50 +00002899PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00002900PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002901{
2902 PyObject* m;
2903 PyObject* g;
2904 char* bootstrap;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002905
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002906 /* Initialize object types */
2907 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00002908 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002909 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00002910 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002911#if defined(USE_EXPAT)
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002912 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00002913 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002914#endif
2915
Martin v. Löwis1a214512008-06-11 05:26:20 +00002916 m = PyModule_Create(&_elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002917 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00002918 return NULL;
2919
2920 /* The code below requires that the module gets already added
2921 to sys.modules. */
2922 PyDict_SetItemString(PyImport_GetModuleDict(),
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00002923 _elementtreemodule.m_name,
2924 m);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002925
2926 /* python glue code */
2927
2928 g = PyDict_New();
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002929 if (!g)
Martin v. Löwis1a214512008-06-11 05:26:20 +00002930 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002931
2932 PyDict_SetItemString(g, "__builtins__", PyEval_GetBuiltins());
2933
2934 bootstrap = (
2935
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002936 "from copy import copy, deepcopy\n"
2937
2938 "try:\n"
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002939 " from xml.etree import ElementTree\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002940 "except ImportError:\n"
2941 " import ElementTree\n"
2942 "ET = ElementTree\n"
2943 "del ElementTree\n"
2944
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002945 "import _elementtree as cElementTree\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002946
2947 "try:\n" /* check if copy works as is */
2948 " copy(cElementTree.Element('x'))\n"
2949 "except:\n"
2950 " def copyelement(elem):\n"
2951 " return elem\n"
2952
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002953 "class CommentProxy:\n"
2954 " def __call__(self, text=None):\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002955 " element = cElementTree.Element(ET.Comment)\n"
2956 " element.text = text\n"
2957 " return element\n"
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002958 " def __eq__(self, other):\n"
2959 " return ET.Comment == other\n"
2960 "cElementTree.Comment = CommentProxy()\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002961
2962 "class ElementTree(ET.ElementTree):\n" /* public */
2963 " def parse(self, source, parser=None):\n"
Antoine Pitroue033e062010-10-29 10:38:18 +00002964 " close_source = False\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002965 " if not hasattr(source, 'read'):\n"
2966 " source = open(source, 'rb')\n"
Antoine Pitroue033e062010-10-29 10:38:18 +00002967 " close_source = True\n"
2968 " try:\n"
2969 " if parser is not None:\n"
2970 " while 1:\n"
2971 " data = source.read(65536)\n"
2972 " if not data:\n"
2973 " break\n"
2974 " parser.feed(data)\n"
2975 " self._root = parser.close()\n"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002976 " else:\n"
Antoine Pitroue033e062010-10-29 10:38:18 +00002977 " parser = cElementTree.XMLParser()\n"
2978 " self._root = parser._parse(source)\n"
2979 " return self._root\n"
2980 " finally:\n"
2981 " if close_source:\n"
2982 " source.close()\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002983 "cElementTree.ElementTree = ElementTree\n"
2984
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002985 "def iter(node, tag=None):\n" /* helper */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002986 " if tag == '*':\n"
2987 " tag = None\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002988 " if tag is None or node.tag == tag:\n"
2989 " yield node\n"
2990 " for node in node:\n"
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002991 " for node in iter(node, tag):\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002992 " yield node\n"
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002993
2994 "def itertext(node):\n" /* helper */
2995 " if node.text:\n"
2996 " yield node.text\n"
2997 " for e in node:\n"
2998 " for s in e.itertext():\n"
2999 " yield s\n"
3000 " if e.tail:\n"
3001 " yield e.tail\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003002
3003 "def parse(source, parser=None):\n" /* public */
3004 " tree = ElementTree()\n"
3005 " tree.parse(source, parser)\n"
3006 " return tree\n"
3007 "cElementTree.parse = parse\n"
3008
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003009 "class iterparse:\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003010 " root = None\n"
3011 " def __init__(self, file, events=None):\n"
Antoine Pitroue033e062010-10-29 10:38:18 +00003012 " self._close_file = False\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003013 " if not hasattr(file, 'read'):\n"
3014 " file = open(file, 'rb')\n"
Antoine Pitroue033e062010-10-29 10:38:18 +00003015 " self._close_file = True\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003016 " self._file = file\n"
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003017 " self._events = []\n"
3018 " self._index = 0\n"
Florent Xicluna91d51932011-11-01 23:31:09 +01003019 " self._error = None\n"
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003020 " self.root = self._root = None\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003021 " b = cElementTree.TreeBuilder()\n"
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003022 " self._parser = cElementTree.XMLParser(b)\n"
3023 " self._parser._setevents(self._events, events)\n"
3024 " def __next__(self):\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003025 " while 1:\n"
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003026 " try:\n"
3027 " item = self._events[self._index]\n"
Florent Xicluna91d51932011-11-01 23:31:09 +01003028 " self._index += 1\n"
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003029 " return item\n"
Florent Xicluna91d51932011-11-01 23:31:09 +01003030 " except IndexError:\n"
3031 " pass\n"
3032 " if self._error:\n"
3033 " e = self._error\n"
3034 " self._error = None\n"
3035 " raise e\n"
3036 " if self._parser is None:\n"
3037 " self.root = self._root\n"
3038 " if self._close_file:\n"
3039 " self._file.close()\n"
3040 " raise StopIteration\n"
3041 " # load event buffer\n"
3042 " del self._events[:]\n"
3043 " self._index = 0\n"
3044 " data = self._file.read(16384)\n"
3045 " if data:\n"
3046 " try:\n"
3047 " self._parser.feed(data)\n"
3048 " except SyntaxError as exc:\n"
3049 " self._error = exc\n"
3050 " else:\n"
3051 " self._root = self._parser.close()\n"
3052 " self._parser = None\n"
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003053 " def __iter__(self):\n"
3054 " return self\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003055 "cElementTree.iterparse = iterparse\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003056
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003057 "class PIProxy:\n"
3058 " def __call__(self, target, text=None):\n"
3059 " element = cElementTree.Element(ET.PI)\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003060 " element.text = target\n"
3061 " if text:\n"
3062 " element.text = element.text + ' ' + text\n"
3063 " return element\n"
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003064 " def __eq__(self, other):\n"
3065 " return ET.PI == other\n"
3066 "cElementTree.PI = cElementTree.ProcessingInstruction = PIProxy()\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003067
3068 "def XML(text):\n" /* public */
3069 " parser = cElementTree.XMLParser()\n"
3070 " parser.feed(text)\n"
3071 " return parser.close()\n"
3072 "cElementTree.XML = cElementTree.fromstring = XML\n"
3073
3074 "def XMLID(text):\n" /* public */
3075 " tree = XML(text)\n"
3076 " ids = {}\n"
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003077 " for elem in tree.iter():\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003078 " id = elem.get('id')\n"
3079 " if id:\n"
3080 " ids[id] = elem\n"
3081 " return tree, ids\n"
3082 "cElementTree.XMLID = XMLID\n"
3083
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003084 "try:\n"
3085 " register_namespace = ET.register_namespace\n"
3086 "except AttributeError:\n"
3087 " def register_namespace(prefix, uri):\n"
3088 " ET._namespace_map[uri] = prefix\n"
3089 "cElementTree.register_namespace = register_namespace\n"
3090
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003091 "cElementTree.dump = ET.dump\n"
3092 "cElementTree.ElementPath = ElementPath = ET.ElementPath\n"
3093 "cElementTree.iselement = ET.iselement\n"
3094 "cElementTree.QName = ET.QName\n"
3095 "cElementTree.tostring = ET.tostring\n"
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003096 "cElementTree.fromstringlist = ET.fromstringlist\n"
3097 "cElementTree.tostringlist = ET.tostringlist\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003098 "cElementTree.VERSION = '" VERSION "'\n"
3099 "cElementTree.__version__ = '" VERSION "'\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003100
3101 );
3102
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003103 if (!PyRun_String(bootstrap, Py_file_input, g, NULL))
3104 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003105
3106 elementpath_obj = PyDict_GetItemString(g, "ElementPath");
3107
3108 elementtree_copyelement_obj = PyDict_GetItemString(g, "copyelement");
3109 if (elementtree_copyelement_obj) {
3110 /* reduce hack needed; enable reduce method */
3111 PyMethodDef* mp;
3112 for (mp = element_methods; mp->ml_name; mp++)
3113 if (mp->ml_meth == (PyCFunction) element_reduce) {
3114 mp->ml_name = "__reduce__";
3115 break;
3116 }
3117 } else
3118 PyErr_Clear();
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003119
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003120 elementtree_deepcopy_obj = PyDict_GetItemString(g, "deepcopy");
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003121 elementtree_iter_obj = PyDict_GetItemString(g, "iter");
3122 elementtree_itertext_obj = PyDict_GetItemString(g, "itertext");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003123
3124#if defined(USE_PYEXPAT_CAPI)
3125 /* link against pyexpat, if possible */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003126 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
3127 if (expat_capi) {
3128 /* check that it's usable */
3129 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
3130 expat_capi->size < sizeof(struct PyExpat_CAPI) ||
3131 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
3132 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
3133 expat_capi->MICRO_VERSION != XML_MICRO_VERSION)
3134 expat_capi = NULL;
3135 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003136#endif
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003137
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003138 elementtree_parseerror_obj = PyErr_NewException(
3139 "cElementTree.ParseError", PyExc_SyntaxError, NULL
3140 );
3141 Py_INCREF(elementtree_parseerror_obj);
3142 PyModule_AddObject(m, "ParseError", elementtree_parseerror_obj);
3143
3144 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003145}