blob: 6373c4848f29d6ff64574543cd8bdccdeb3aca85 [file] [log] [blame]
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001/*
2 * ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003 * $Id: _elementtree.c 3473 2009-01-11 22:53:55Z fredrik $
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
5 * elementtree accelerator
6 *
7 * History:
8 * 1999-06-20 fl created (as part of sgmlop)
9 * 2001-05-29 fl effdom edition
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000010 * 2003-02-27 fl elementtree edition (alpha)
11 * 2004-06-03 fl updates for elementtree 1.2
Florent Xiclunaf15351d2010-03-13 23:24:31 +000012 * 2005-01-05 fl major optimization effort
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000013 * 2005-01-11 fl first public release (cElementTree 0.8)
14 * 2005-01-12 fl split element object into base and extras
15 * 2005-01-13 fl use tagged pointers for tail/text (cElementTree 0.9)
16 * 2005-01-17 fl added treebuilder close method
17 * 2005-01-17 fl fixed crash in getchildren
18 * 2005-01-18 fl removed observer api, added iterparse (cElementTree 0.9.3)
19 * 2005-01-23 fl revised iterparse api; added namespace event support (0.9.8)
20 * 2005-01-26 fl added VERSION module property (cElementTree 1.0)
21 * 2005-01-28 fl added remove method (1.0.1)
22 * 2005-03-01 fl added iselement function; fixed makeelement aliasing (1.0.2)
23 * 2005-03-13 fl export Comment and ProcessingInstruction/PI helpers
24 * 2005-03-26 fl added Comment and PI support to XMLParser
25 * 2005-03-27 fl event optimizations; complain about bogus events
26 * 2005-08-08 fl fixed read error handling in parse
27 * 2005-08-11 fl added runtime test for copy workaround (1.0.3)
28 * 2005-12-13 fl added expat_capi support (for xml.etree) (1.0.4)
29 * 2005-12-16 fl added support for non-standard encodings
Fredrik Lundh44ed4db2006-03-12 21:06:35 +000030 * 2006-03-08 fl fixed a couple of potential null-refs and leaks
31 * 2006-03-12 fl merge in 2.5 ssize_t changes
Florent Xiclunaf15351d2010-03-13 23:24:31 +000032 * 2007-08-25 fl call custom builder's close method from XMLParser
33 * 2007-08-31 fl added iter, extend from ET 1.3
34 * 2007-09-01 fl fixed ParseError exception, setslice source type, etc
35 * 2007-09-03 fl fixed handling of negative insert indexes
36 * 2007-09-04 fl added itertext from ET 1.3
37 * 2007-09-06 fl added position attribute to ParseError exception
38 * 2008-06-06 fl delay error reporting in iterparse (from Hrvoje Niksic)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000039 *
Florent Xiclunaf15351d2010-03-13 23:24:31 +000040 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
41 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000042 *
43 * info@pythonware.com
44 * http://www.pythonware.com
45 */
46
Fredrik Lundh6d52b552005-12-16 22:06:43 +000047/* Licensed to PSF under a Contributor Agreement. */
Florent Xiclunaf15351d2010-03-13 23:24:31 +000048/* See http://www.python.org/psf/license for licensing details. */
Fredrik Lundh6d52b552005-12-16 22:06:43 +000049
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000050#include "Python.h"
51
Thomas Wouters00ee7ba2006-08-21 19:07:27 +000052#define VERSION "1.0.6"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000053
54/* -------------------------------------------------------------------- */
55/* configuration */
56
57/* Leave defined to include the expat-based XMLParser type */
58#define USE_EXPAT
59
Florent Xiclunaf15351d2010-03-13 23:24:31 +000060/* Define to do all expat calls via pyexpat's embedded expat library */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000061/* #define USE_PYEXPAT_CAPI */
62
63/* An element can hold this many children without extra memory
64 allocations. */
65#define STATIC_CHILDREN 4
66
67/* For best performance, chose a value so that 80-90% of all nodes
68 have no more than the given number of children. Set this to zero
69 to minimize the size of the element structure itself (this only
70 helps if you have lots of leaf nodes with attributes). */
71
72/* Also note that pymalloc always allocates blocks in multiples of
73 eight bytes. For the current version of cElementTree, this means
74 that the number of children should be an even number, at least on
75 32-bit platforms. */
76
77/* -------------------------------------------------------------------- */
78
79#if 0
80static int memory = 0;
81#define ALLOC(size, comment)\
82do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
83#define RELEASE(size, comment)\
84do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
85#else
86#define ALLOC(size, comment)
87#define RELEASE(size, comment)
88#endif
89
90/* compiler tweaks */
91#if defined(_MSC_VER)
92#define LOCAL(type) static __inline type __fastcall
93#else
94#define LOCAL(type) static type
95#endif
96
Florent Xiclunaf15351d2010-03-13 23:24:31 +000097/* compatibility macros */
98#if (PY_VERSION_HEX < 0x02060000)
99#define Py_REFCNT(ob) (((PyObject*)(ob))->ob_refcnt)
100#define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)
101#endif
102
103#if (PY_VERSION_HEX < 0x02050000)
104typedef int Py_ssize_t;
105#define lenfunc inquiry
106#endif
107
108#if (PY_VERSION_HEX < 0x02040000)
109#define PyDict_CheckExact PyDict_Check
110
111#if !defined(Py_RETURN_NONE)
112#define Py_RETURN_NONE return Py_INCREF(Py_None), Py_None
113#endif
114#endif
115
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000116/* macros used to store 'join' flags in string object pointers. note
117 that all use of text and tail as object pointers must be wrapped in
118 JOIN_OBJ. see comments in the ElementObject definition for more
119 info. */
120#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
121#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
122#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~1))
123
124/* glue functions (see the init function for details) */
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000125static PyObject* elementtree_parseerror_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000126static PyObject* elementtree_copyelement_obj;
127static PyObject* elementtree_deepcopy_obj;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000128static PyObject* elementtree_iter_obj;
129static PyObject* elementtree_itertext_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000130static PyObject* elementpath_obj;
131
132/* helpers */
133
134LOCAL(PyObject*)
135deepcopy(PyObject* object, PyObject* memo)
136{
137 /* do a deep copy of the given object */
138
139 PyObject* args;
140 PyObject* result;
141
142 if (!elementtree_deepcopy_obj) {
143 PyErr_SetString(
144 PyExc_RuntimeError,
145 "deepcopy helper not found"
146 );
147 return NULL;
148 }
149
150 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000151 if (!args)
152 return NULL;
153
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000154 Py_INCREF(object); PyTuple_SET_ITEM(args, 0, (PyObject*) object);
155 Py_INCREF(memo); PyTuple_SET_ITEM(args, 1, (PyObject*) memo);
156
157 result = PyObject_CallObject(elementtree_deepcopy_obj, args);
158
159 Py_DECREF(args);
160
161 return result;
162}
163
164LOCAL(PyObject*)
165list_join(PyObject* list)
166{
167 /* join list elements (destroying the list in the process) */
168
169 PyObject* joiner;
170 PyObject* function;
171 PyObject* args;
172 PyObject* result;
173
174 switch (PyList_GET_SIZE(list)) {
175 case 0:
176 Py_DECREF(list);
Christian Heimes72b710a2008-05-26 13:28:38 +0000177 return PyBytes_FromString("");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000178 case 1:
179 result = PyList_GET_ITEM(list, 0);
180 Py_INCREF(result);
181 Py_DECREF(list);
182 return result;
183 }
184
185 /* two or more elements: slice out a suitable separator from the
186 first member, and use that to join the entire list */
187
188 joiner = PySequence_GetSlice(PyList_GET_ITEM(list, 0), 0, 0);
189 if (!joiner)
190 return NULL;
191
192 function = PyObject_GetAttrString(joiner, "join");
193 if (!function) {
194 Py_DECREF(joiner);
195 return NULL;
196 }
197
198 args = PyTuple_New(1);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000199 if (!args)
200 return NULL;
201
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000202 PyTuple_SET_ITEM(args, 0, list);
203
204 result = PyObject_CallObject(function, args);
205
206 Py_DECREF(args); /* also removes list */
207 Py_DECREF(function);
208 Py_DECREF(joiner);
209
210 return result;
211}
212
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000213/* -------------------------------------------------------------------- */
214/* the element type */
215
216typedef struct {
217
218 /* attributes (a dictionary object), or None if no attributes */
219 PyObject* attrib;
220
221 /* child elements */
222 int length; /* actual number of items */
223 int allocated; /* allocated items */
224
225 /* this either points to _children or to a malloced buffer */
226 PyObject* *children;
227
228 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100229
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000230} ElementObjectExtra;
231
232typedef struct {
233 PyObject_HEAD
234
235 /* element tag (a string). */
236 PyObject* tag;
237
238 /* text before first child. note that this is a tagged pointer;
239 use JOIN_OBJ to get the object pointer. the join flag is used
240 to distinguish lists created by the tree builder from lists
241 assigned to the attribute by application code; the former
242 should be joined before being returned to the user, the latter
243 should be left intact. */
244 PyObject* text;
245
246 /* text after this element, in parent. note that this is a tagged
247 pointer; use JOIN_OBJ to get the object pointer. */
248 PyObject* tail;
249
250 ElementObjectExtra* extra;
251
252} ElementObject;
253
Neal Norwitz227b5332006-03-22 09:28:35 +0000254static PyTypeObject Element_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000255
Christian Heimes90aa7642007-12-19 02:45:37 +0000256#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000257
258/* -------------------------------------------------------------------- */
259/* element constructor and destructor */
260
261LOCAL(int)
262element_new_extra(ElementObject* self, PyObject* attrib)
263{
264 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
265 if (!self->extra)
266 return -1;
267
268 if (!attrib)
269 attrib = Py_None;
270
271 Py_INCREF(attrib);
272 self->extra->attrib = attrib;
273
274 self->extra->length = 0;
275 self->extra->allocated = STATIC_CHILDREN;
276 self->extra->children = self->extra->_children;
277
278 return 0;
279}
280
281LOCAL(void)
282element_dealloc_extra(ElementObject* self)
283{
284 int i;
285
286 Py_DECREF(self->extra->attrib);
287
288 for (i = 0; i < self->extra->length; i++)
289 Py_DECREF(self->extra->children[i]);
290
291 if (self->extra->children != self->extra->_children)
292 PyObject_Free(self->extra->children);
293
294 PyObject_Free(self->extra);
295}
296
297LOCAL(PyObject*)
298element_new(PyObject* tag, PyObject* attrib)
299{
300 ElementObject* self;
301
302 self = PyObject_New(ElementObject, &Element_Type);
303 if (self == NULL)
304 return NULL;
305
306 /* use None for empty dictionaries */
307 if (PyDict_CheckExact(attrib) && !PyDict_Size(attrib))
308 attrib = Py_None;
309
310 self->extra = NULL;
311
312 if (attrib != Py_None) {
313
Thomas Wouters477c8d52006-05-27 19:21:47 +0000314 if (element_new_extra(self, attrib) < 0) {
315 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000316 return NULL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000317 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000318
319 self->extra->length = 0;
320 self->extra->allocated = STATIC_CHILDREN;
321 self->extra->children = self->extra->_children;
322
323 }
324
325 Py_INCREF(tag);
326 self->tag = tag;
327
328 Py_INCREF(Py_None);
329 self->text = Py_None;
330
331 Py_INCREF(Py_None);
332 self->tail = Py_None;
333
334 ALLOC(sizeof(ElementObject), "create element");
335
336 return (PyObject*) self;
337}
338
339LOCAL(int)
340element_resize(ElementObject* self, int extra)
341{
342 int size;
343 PyObject* *children;
344
345 /* make sure self->children can hold the given number of extra
346 elements. set an exception and return -1 if allocation failed */
347
348 if (!self->extra)
349 element_new_extra(self, NULL);
350
351 size = self->extra->length + extra;
352
353 if (size > self->extra->allocated) {
354 /* use Python 2.4's list growth strategy */
355 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000356 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100357 * which needs at least 4 bytes.
358 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000359 * be safe.
360 */
361 size = size ? size : 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000362 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000363 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100364 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000365 * false alarm always assume at least one child to be safe.
366 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000367 children = PyObject_Realloc(self->extra->children,
368 size * sizeof(PyObject*));
369 if (!children)
370 goto nomemory;
371 } else {
372 children = PyObject_Malloc(size * sizeof(PyObject*));
373 if (!children)
374 goto nomemory;
375 /* copy existing children from static area to malloc buffer */
376 memcpy(children, self->extra->children,
377 self->extra->length * sizeof(PyObject*));
378 }
379 self->extra->children = children;
380 self->extra->allocated = size;
381 }
382
383 return 0;
384
385 nomemory:
386 PyErr_NoMemory();
387 return -1;
388}
389
390LOCAL(int)
391element_add_subelement(ElementObject* self, PyObject* element)
392{
393 /* add a child element to a parent */
394
395 if (element_resize(self, 1) < 0)
396 return -1;
397
398 Py_INCREF(element);
399 self->extra->children[self->extra->length] = element;
400
401 self->extra->length++;
402
403 return 0;
404}
405
406LOCAL(PyObject*)
407element_get_attrib(ElementObject* self)
408{
409 /* return borrowed reference to attrib dictionary */
410 /* note: this function assumes that the extra section exists */
411
412 PyObject* res = self->extra->attrib;
413
414 if (res == Py_None) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000415 Py_DECREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000416 /* create missing dictionary */
417 res = PyDict_New();
418 if (!res)
419 return NULL;
420 self->extra->attrib = res;
421 }
422
423 return res;
424}
425
426LOCAL(PyObject*)
427element_get_text(ElementObject* self)
428{
429 /* return borrowed reference to text attribute */
430
431 PyObject* res = self->text;
432
433 if (JOIN_GET(res)) {
434 res = JOIN_OBJ(res);
435 if (PyList_CheckExact(res)) {
436 res = list_join(res);
437 if (!res)
438 return NULL;
439 self->text = res;
440 }
441 }
442
443 return res;
444}
445
446LOCAL(PyObject*)
447element_get_tail(ElementObject* self)
448{
449 /* return borrowed reference to text attribute */
450
451 PyObject* res = self->tail;
452
453 if (JOIN_GET(res)) {
454 res = JOIN_OBJ(res);
455 if (PyList_CheckExact(res)) {
456 res = list_join(res);
457 if (!res)
458 return NULL;
459 self->tail = res;
460 }
461 }
462
463 return res;
464}
465
466static PyObject*
467element(PyObject* self, PyObject* args, PyObject* kw)
468{
469 PyObject* elem;
470
471 PyObject* tag;
472 PyObject* attrib = NULL;
473 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag,
474 &PyDict_Type, &attrib))
475 return NULL;
476
477 if (attrib || kw) {
478 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
479 if (!attrib)
480 return NULL;
481 if (kw)
482 PyDict_Update(attrib, kw);
483 } else {
484 Py_INCREF(Py_None);
485 attrib = Py_None;
486 }
487
488 elem = element_new(tag, attrib);
489
490 Py_DECREF(attrib);
491
492 return elem;
493}
494
495static PyObject*
496subelement(PyObject* self, PyObject* args, PyObject* kw)
497{
498 PyObject* elem;
499
500 ElementObject* parent;
501 PyObject* tag;
502 PyObject* attrib = NULL;
503 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
504 &Element_Type, &parent, &tag,
505 &PyDict_Type, &attrib))
506 return NULL;
507
508 if (attrib || kw) {
509 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
510 if (!attrib)
511 return NULL;
512 if (kw)
513 PyDict_Update(attrib, kw);
514 } else {
515 Py_INCREF(Py_None);
516 attrib = Py_None;
517 }
518
519 elem = element_new(tag, attrib);
520
521 Py_DECREF(attrib);
522
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000523 if (element_add_subelement(parent, elem) < 0) {
524 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000525 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000526 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000527
528 return elem;
529}
530
531static void
532element_dealloc(ElementObject* self)
533{
534 if (self->extra)
535 element_dealloc_extra(self);
536
537 /* discard attributes */
538 Py_DECREF(self->tag);
539 Py_DECREF(JOIN_OBJ(self->text));
540 Py_DECREF(JOIN_OBJ(self->tail));
541
542 RELEASE(sizeof(ElementObject), "destroy element");
543
544 PyObject_Del(self);
545}
546
547/* -------------------------------------------------------------------- */
548/* methods (in alphabetical order) */
549
550static PyObject*
551element_append(ElementObject* self, PyObject* args)
552{
553 PyObject* element;
554 if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element))
555 return NULL;
556
557 if (element_add_subelement(self, element) < 0)
558 return NULL;
559
560 Py_RETURN_NONE;
561}
562
563static PyObject*
564element_clear(ElementObject* self, PyObject* args)
565{
566 if (!PyArg_ParseTuple(args, ":clear"))
567 return NULL;
568
569 if (self->extra) {
570 element_dealloc_extra(self);
571 self->extra = NULL;
572 }
573
574 Py_INCREF(Py_None);
575 Py_DECREF(JOIN_OBJ(self->text));
576 self->text = Py_None;
577
578 Py_INCREF(Py_None);
579 Py_DECREF(JOIN_OBJ(self->tail));
580 self->tail = Py_None;
581
582 Py_RETURN_NONE;
583}
584
585static PyObject*
586element_copy(ElementObject* self, PyObject* args)
587{
588 int i;
589 ElementObject* element;
590
591 if (!PyArg_ParseTuple(args, ":__copy__"))
592 return NULL;
593
594 element = (ElementObject*) element_new(
595 self->tag, (self->extra) ? self->extra->attrib : Py_None
596 );
597 if (!element)
598 return NULL;
599
600 Py_DECREF(JOIN_OBJ(element->text));
601 element->text = self->text;
602 Py_INCREF(JOIN_OBJ(element->text));
603
604 Py_DECREF(JOIN_OBJ(element->tail));
605 element->tail = self->tail;
606 Py_INCREF(JOIN_OBJ(element->tail));
607
608 if (self->extra) {
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100609
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000610 if (element_resize(element, self->extra->length) < 0) {
611 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000612 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000613 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000614
615 for (i = 0; i < self->extra->length; i++) {
616 Py_INCREF(self->extra->children[i]);
617 element->extra->children[i] = self->extra->children[i];
618 }
619
620 element->extra->length = self->extra->length;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100621
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000622 }
623
624 return (PyObject*) element;
625}
626
627static PyObject*
628element_deepcopy(ElementObject* self, PyObject* args)
629{
630 int i;
631 ElementObject* element;
632 PyObject* tag;
633 PyObject* attrib;
634 PyObject* text;
635 PyObject* tail;
636 PyObject* id;
637
638 PyObject* memo;
639 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
640 return NULL;
641
642 tag = deepcopy(self->tag, memo);
643 if (!tag)
644 return NULL;
645
646 if (self->extra) {
647 attrib = deepcopy(self->extra->attrib, memo);
648 if (!attrib) {
649 Py_DECREF(tag);
650 return NULL;
651 }
652 } else {
653 Py_INCREF(Py_None);
654 attrib = Py_None;
655 }
656
657 element = (ElementObject*) element_new(tag, attrib);
658
659 Py_DECREF(tag);
660 Py_DECREF(attrib);
661
662 if (!element)
663 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100664
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000665 text = deepcopy(JOIN_OBJ(self->text), memo);
666 if (!text)
667 goto error;
668 Py_DECREF(element->text);
669 element->text = JOIN_SET(text, JOIN_GET(self->text));
670
671 tail = deepcopy(JOIN_OBJ(self->tail), memo);
672 if (!tail)
673 goto error;
674 Py_DECREF(element->tail);
675 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
676
677 if (self->extra) {
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100678
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000679 if (element_resize(element, self->extra->length) < 0)
680 goto error;
681
682 for (i = 0; i < self->extra->length; i++) {
683 PyObject* child = deepcopy(self->extra->children[i], memo);
684 if (!child) {
685 element->extra->length = i;
686 goto error;
687 }
688 element->extra->children[i] = child;
689 }
690
691 element->extra->length = self->extra->length;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100692
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000693 }
694
695 /* add object to memo dictionary (so deepcopy won't visit it again) */
Christian Heimes217cfd12007-12-02 14:31:20 +0000696 id = PyLong_FromLong((Py_uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000697 if (!id)
698 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000699
700 i = PyDict_SetItem(memo, id, (PyObject*) element);
701
702 Py_DECREF(id);
703
704 if (i < 0)
705 goto error;
706
707 return (PyObject*) element;
708
709 error:
710 Py_DECREF(element);
711 return NULL;
712}
713
714LOCAL(int)
715checkpath(PyObject* tag)
716{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000717 Py_ssize_t i;
718 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000719
720 /* check if a tag contains an xpath character */
721
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000722#define PATHCHAR(ch) \
723 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000724
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000725 if (PyUnicode_Check(tag)) {
726 Py_UNICODE *p = PyUnicode_AS_UNICODE(tag);
727 for (i = 0; i < PyUnicode_GET_SIZE(tag); i++) {
728 if (p[i] == '{')
729 check = 0;
730 else if (p[i] == '}')
731 check = 1;
732 else if (check && PATHCHAR(p[i]))
733 return 1;
734 }
735 return 0;
736 }
Christian Heimes72b710a2008-05-26 13:28:38 +0000737 if (PyBytes_Check(tag)) {
738 char *p = PyBytes_AS_STRING(tag);
739 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000740 if (p[i] == '{')
741 check = 0;
742 else if (p[i] == '}')
743 check = 1;
744 else if (check && PATHCHAR(p[i]))
745 return 1;
746 }
747 return 0;
748 }
749
750 return 1; /* unknown type; might be path expression */
751}
752
753static PyObject*
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000754element_extend(ElementObject* self, PyObject* args)
755{
756 PyObject* seq;
757 Py_ssize_t i, seqlen = 0;
758
759 PyObject* seq_in;
760 if (!PyArg_ParseTuple(args, "O:extend", &seq_in))
761 return NULL;
762
763 seq = PySequence_Fast(seq_in, "");
764 if (!seq) {
765 PyErr_Format(
766 PyExc_TypeError,
767 "expected sequence, not \"%.200s\"", Py_TYPE(seq_in)->tp_name
768 );
769 return NULL;
770 }
771
772 seqlen = PySequence_Size(seq);
773 for (i = 0; i < seqlen; i++) {
774 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
775 if (element_add_subelement(self, element) < 0) {
776 Py_DECREF(seq);
777 return NULL;
778 }
779 }
780
781 Py_DECREF(seq);
782
783 Py_RETURN_NONE;
784}
785
786static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000787element_find(ElementObject* self, PyObject* args)
788{
789 int i;
790
791 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000792 PyObject* namespaces = Py_None;
793 if (!PyArg_ParseTuple(args, "O|O:find", &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000794 return NULL;
795
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000796 if (checkpath(tag) || namespaces != Py_None)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000797 return PyObject_CallMethod(
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000798 elementpath_obj, "find", "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000799 );
800
801 if (!self->extra)
802 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100803
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000804 for (i = 0; i < self->extra->length; i++) {
805 PyObject* item = self->extra->children[i];
806 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +0000807 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000808 Py_INCREF(item);
809 return item;
810 }
811 }
812
813 Py_RETURN_NONE;
814}
815
816static PyObject*
817element_findtext(ElementObject* self, PyObject* args)
818{
819 int i;
820
821 PyObject* tag;
822 PyObject* default_value = Py_None;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000823 PyObject* namespaces = Py_None;
824 if (!PyArg_ParseTuple(args, "O|OO:findtext", &tag, &default_value, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000825 return NULL;
826
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000827 if (checkpath(tag) || namespaces != Py_None)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000828 return PyObject_CallMethod(
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000829 elementpath_obj, "findtext", "OOOO", self, tag, default_value, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000830 );
831
832 if (!self->extra) {
833 Py_INCREF(default_value);
834 return default_value;
835 }
836
837 for (i = 0; i < self->extra->length; i++) {
838 ElementObject* item = (ElementObject*) self->extra->children[i];
Mark Dickinson211c6252009-02-01 10:28:51 +0000839 if (Element_CheckExact(item) && (PyObject_RichCompareBool(item->tag, tag, Py_EQ) == 1)) {
840
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000841 PyObject* text = element_get_text(item);
842 if (text == Py_None)
Christian Heimes72b710a2008-05-26 13:28:38 +0000843 return PyBytes_FromString("");
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000844 Py_XINCREF(text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000845 return text;
846 }
847 }
848
849 Py_INCREF(default_value);
850 return default_value;
851}
852
853static PyObject*
854element_findall(ElementObject* self, PyObject* args)
855{
856 int i;
857 PyObject* out;
858
859 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000860 PyObject* namespaces = Py_None;
861 if (!PyArg_ParseTuple(args, "O|O:findall", &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000862 return NULL;
863
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000864 if (checkpath(tag) || namespaces != Py_None)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000865 return PyObject_CallMethod(
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000866 elementpath_obj, "findall", "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000867 );
868
869 out = PyList_New(0);
870 if (!out)
871 return NULL;
872
873 if (!self->extra)
874 return out;
875
876 for (i = 0; i < self->extra->length; i++) {
877 PyObject* item = self->extra->children[i];
878 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +0000879 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000880 if (PyList_Append(out, item) < 0) {
881 Py_DECREF(out);
882 return NULL;
883 }
884 }
885 }
886
887 return out;
888}
889
890static PyObject*
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000891element_iterfind(ElementObject* self, PyObject* args)
892{
893 PyObject* tag;
894 PyObject* namespaces = Py_None;
895 if (!PyArg_ParseTuple(args, "O|O:iterfind", &tag, &namespaces))
896 return NULL;
897
898 return PyObject_CallMethod(
899 elementpath_obj, "iterfind", "OOO", self, tag, namespaces
900 );
901}
902
903static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000904element_get(ElementObject* self, PyObject* args)
905{
906 PyObject* value;
907
908 PyObject* key;
909 PyObject* default_value = Py_None;
910 if (!PyArg_ParseTuple(args, "O|O:get", &key, &default_value))
911 return NULL;
912
913 if (!self->extra || self->extra->attrib == Py_None)
914 value = default_value;
915 else {
916 value = PyDict_GetItem(self->extra->attrib, key);
917 if (!value)
918 value = default_value;
919 }
920
921 Py_INCREF(value);
922 return value;
923}
924
925static PyObject*
926element_getchildren(ElementObject* self, PyObject* args)
927{
928 int i;
929 PyObject* list;
930
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000931 /* FIXME: report as deprecated? */
932
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000933 if (!PyArg_ParseTuple(args, ":getchildren"))
934 return NULL;
935
936 if (!self->extra)
937 return PyList_New(0);
938
939 list = PyList_New(self->extra->length);
940 if (!list)
941 return NULL;
942
943 for (i = 0; i < self->extra->length; i++) {
944 PyObject* item = self->extra->children[i];
945 Py_INCREF(item);
946 PyList_SET_ITEM(list, i, item);
947 }
948
949 return list;
950}
951
952static PyObject*
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000953element_iter(ElementObject* self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000954{
955 PyObject* result;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100956
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000957 PyObject* tag = Py_None;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000958 if (!PyArg_ParseTuple(args, "|O:iter", &tag))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000959 return NULL;
960
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000961 if (!elementtree_iter_obj) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000962 PyErr_SetString(
963 PyExc_RuntimeError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000964 "iter helper not found"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000965 );
966 return NULL;
967 }
968
969 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000970 if (!args)
971 return NULL;
Neal Norwitz02876df2006-02-07 06:58:52 +0000972
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000973 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
974 Py_INCREF(tag); PyTuple_SET_ITEM(args, 1, (PyObject*) tag);
975
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000976 result = PyObject_CallObject(elementtree_iter_obj, args);
977
978 Py_DECREF(args);
979
980 return result;
981}
982
983
984static PyObject*
985element_itertext(ElementObject* self, PyObject* args)
986{
987 PyObject* result;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100988
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000989 if (!PyArg_ParseTuple(args, ":itertext"))
990 return NULL;
991
992 if (!elementtree_itertext_obj) {
993 PyErr_SetString(
994 PyExc_RuntimeError,
995 "itertext helper not found"
996 );
997 return NULL;
998 }
999
1000 args = PyTuple_New(1);
1001 if (!args)
1002 return NULL;
1003
1004 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
1005
1006 result = PyObject_CallObject(elementtree_itertext_obj, args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001007
1008 Py_DECREF(args);
1009
1010 return result;
1011}
1012
1013static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001014element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001015{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001016 ElementObject* self = (ElementObject*) self_;
1017
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001018 if (!self->extra || index < 0 || index >= self->extra->length) {
1019 PyErr_SetString(
1020 PyExc_IndexError,
1021 "child index out of range"
1022 );
1023 return NULL;
1024 }
1025
1026 Py_INCREF(self->extra->children[index]);
1027 return self->extra->children[index];
1028}
1029
1030static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001031element_insert(ElementObject* self, PyObject* args)
1032{
1033 int i;
1034
1035 int index;
1036 PyObject* element;
1037 if (!PyArg_ParseTuple(args, "iO!:insert", &index,
1038 &Element_Type, &element))
1039 return NULL;
1040
1041 if (!self->extra)
1042 element_new_extra(self, NULL);
1043
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001044 if (index < 0) {
1045 index += self->extra->length;
1046 if (index < 0)
1047 index = 0;
1048 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001049 if (index > self->extra->length)
1050 index = self->extra->length;
1051
1052 if (element_resize(self, 1) < 0)
1053 return NULL;
1054
1055 for (i = self->extra->length; i > index; i--)
1056 self->extra->children[i] = self->extra->children[i-1];
1057
1058 Py_INCREF(element);
1059 self->extra->children[index] = element;
1060
1061 self->extra->length++;
1062
1063 Py_RETURN_NONE;
1064}
1065
1066static PyObject*
1067element_items(ElementObject* self, PyObject* args)
1068{
1069 if (!PyArg_ParseTuple(args, ":items"))
1070 return NULL;
1071
1072 if (!self->extra || self->extra->attrib == Py_None)
1073 return PyList_New(0);
1074
1075 return PyDict_Items(self->extra->attrib);
1076}
1077
1078static PyObject*
1079element_keys(ElementObject* self, PyObject* args)
1080{
1081 if (!PyArg_ParseTuple(args, ":keys"))
1082 return NULL;
1083
1084 if (!self->extra || self->extra->attrib == Py_None)
1085 return PyList_New(0);
1086
1087 return PyDict_Keys(self->extra->attrib);
1088}
1089
Martin v. Löwis18e16552006-02-15 17:27:45 +00001090static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001091element_length(ElementObject* self)
1092{
1093 if (!self->extra)
1094 return 0;
1095
1096 return self->extra->length;
1097}
1098
1099static PyObject*
1100element_makeelement(PyObject* self, PyObject* args, PyObject* kw)
1101{
1102 PyObject* elem;
1103
1104 PyObject* tag;
1105 PyObject* attrib;
1106 if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib))
1107 return NULL;
1108
1109 attrib = PyDict_Copy(attrib);
1110 if (!attrib)
1111 return NULL;
1112
1113 elem = element_new(tag, attrib);
1114
1115 Py_DECREF(attrib);
1116
1117 return elem;
1118}
1119
1120static PyObject*
1121element_reduce(ElementObject* self, PyObject* args)
1122{
1123 if (!PyArg_ParseTuple(args, ":__reduce__"))
1124 return NULL;
1125
1126 /* Hack alert: This method is used to work around a __copy__
1127 problem on certain 2.3 and 2.4 versions. To save time and
1128 simplify the code, we create the copy in here, and use a dummy
1129 copyelement helper to trick the copy module into doing the
1130 right thing. */
1131
1132 if (!elementtree_copyelement_obj) {
1133 PyErr_SetString(
1134 PyExc_RuntimeError,
1135 "copyelement helper not found"
1136 );
1137 return NULL;
1138 }
1139
1140 return Py_BuildValue(
1141 "O(N)", elementtree_copyelement_obj, element_copy(self, args)
1142 );
1143}
1144
1145static PyObject*
1146element_remove(ElementObject* self, PyObject* args)
1147{
1148 int i;
1149
1150 PyObject* element;
1151 if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element))
1152 return NULL;
1153
1154 if (!self->extra) {
1155 /* element has no children, so raise exception */
1156 PyErr_SetString(
1157 PyExc_ValueError,
1158 "list.remove(x): x not in list"
1159 );
1160 return NULL;
1161 }
1162
1163 for (i = 0; i < self->extra->length; i++) {
1164 if (self->extra->children[i] == element)
1165 break;
Mark Dickinson211c6252009-02-01 10:28:51 +00001166 if (PyObject_RichCompareBool(self->extra->children[i], element, Py_EQ) == 1)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001167 break;
1168 }
1169
1170 if (i == self->extra->length) {
1171 /* element is not in children, so raise exception */
1172 PyErr_SetString(
1173 PyExc_ValueError,
1174 "list.remove(x): x not in list"
1175 );
1176 return NULL;
1177 }
1178
1179 Py_DECREF(self->extra->children[i]);
1180
1181 self->extra->length--;
1182
1183 for (; i < self->extra->length; i++)
1184 self->extra->children[i] = self->extra->children[i+1];
1185
1186 Py_RETURN_NONE;
1187}
1188
1189static PyObject*
1190element_repr(ElementObject* self)
1191{
Walter Dörwald7569dfe2007-05-19 21:49:49 +00001192 return PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001193}
1194
1195static PyObject*
1196element_set(ElementObject* self, PyObject* args)
1197{
1198 PyObject* attrib;
1199
1200 PyObject* key;
1201 PyObject* value;
1202 if (!PyArg_ParseTuple(args, "OO:set", &key, &value))
1203 return NULL;
1204
1205 if (!self->extra)
1206 element_new_extra(self, NULL);
1207
1208 attrib = element_get_attrib(self);
1209 if (!attrib)
1210 return NULL;
1211
1212 if (PyDict_SetItem(attrib, key, value) < 0)
1213 return NULL;
1214
1215 Py_RETURN_NONE;
1216}
1217
1218static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001219element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001220{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001221 ElementObject* self = (ElementObject*) self_;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001222 int i;
1223 PyObject* old;
1224
1225 if (!self->extra || index < 0 || index >= self->extra->length) {
1226 PyErr_SetString(
1227 PyExc_IndexError,
1228 "child assignment index out of range");
1229 return -1;
1230 }
1231
1232 old = self->extra->children[index];
1233
1234 if (item) {
1235 Py_INCREF(item);
1236 self->extra->children[index] = item;
1237 } else {
1238 self->extra->length--;
1239 for (i = index; i < self->extra->length; i++)
1240 self->extra->children[i] = self->extra->children[i+1];
1241 }
1242
1243 Py_DECREF(old);
1244
1245 return 0;
1246}
1247
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001248static PyObject*
1249element_subscr(PyObject* self_, PyObject* item)
1250{
1251 ElementObject* self = (ElementObject*) self_;
1252
1253#if (PY_VERSION_HEX < 0x02050000)
1254 if (PyInt_Check(item) || PyLong_Check(item)) {
1255 long i = PyInt_AsLong(item);
1256#else
1257 if (PyIndex_Check(item)) {
1258 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1259#endif
1260
1261 if (i == -1 && PyErr_Occurred()) {
1262 return NULL;
1263 }
1264 if (i < 0 && self->extra)
1265 i += self->extra->length;
1266 return element_getitem(self_, i);
1267 }
1268 else if (PySlice_Check(item)) {
1269 Py_ssize_t start, stop, step, slicelen, cur, i;
1270 PyObject* list;
1271
1272 if (!self->extra)
1273 return PyList_New(0);
1274
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001275 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001276 self->extra->length,
1277 &start, &stop, &step, &slicelen) < 0) {
1278 return NULL;
1279 }
1280
1281 if (slicelen <= 0)
1282 return PyList_New(0);
1283 else {
1284 list = PyList_New(slicelen);
1285 if (!list)
1286 return NULL;
1287
1288 for (cur = start, i = 0; i < slicelen;
1289 cur += step, i++) {
1290 PyObject* item = self->extra->children[cur];
1291 Py_INCREF(item);
1292 PyList_SET_ITEM(list, i, item);
1293 }
1294
1295 return list;
1296 }
1297 }
1298 else {
1299 PyErr_SetString(PyExc_TypeError,
1300 "element indices must be integers");
1301 return NULL;
1302 }
1303}
1304
1305static int
1306element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1307{
1308 ElementObject* self = (ElementObject*) self_;
1309
1310#if (PY_VERSION_HEX < 0x02050000)
1311 if (PyInt_Check(item) || PyLong_Check(item)) {
1312 long i = PyInt_AsLong(item);
1313#else
1314 if (PyIndex_Check(item)) {
1315 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1316#endif
1317
1318 if (i == -1 && PyErr_Occurred()) {
1319 return -1;
1320 }
1321 if (i < 0 && self->extra)
1322 i += self->extra->length;
1323 return element_setitem(self_, i, value);
1324 }
1325 else if (PySlice_Check(item)) {
1326 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1327
1328 PyObject* recycle = NULL;
1329 PyObject* seq = NULL;
1330
1331 if (!self->extra)
1332 element_new_extra(self, NULL);
1333
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001334 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001335 self->extra->length,
1336 &start, &stop, &step, &slicelen) < 0) {
1337 return -1;
1338 }
1339
1340 if (value == NULL)
1341 newlen = 0;
1342 else {
1343 seq = PySequence_Fast(value, "");
1344 if (!seq) {
1345 PyErr_Format(
1346 PyExc_TypeError,
1347 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1348 );
1349 return -1;
1350 }
1351 newlen = PySequence_Size(seq);
1352 }
1353
1354 if (step != 1 && newlen != slicelen)
1355 {
1356 PyErr_Format(PyExc_ValueError,
1357#if (PY_VERSION_HEX < 0x02050000)
1358 "attempt to assign sequence of size %d "
1359 "to extended slice of size %d",
1360#else
1361 "attempt to assign sequence of size %zd "
1362 "to extended slice of size %zd",
1363#endif
1364 newlen, slicelen
1365 );
1366 return -1;
1367 }
1368
1369
1370 /* Resize before creating the recycle bin, to prevent refleaks. */
1371 if (newlen > slicelen) {
1372 if (element_resize(self, newlen - slicelen) < 0) {
1373 if (seq) {
1374 Py_DECREF(seq);
1375 }
1376 return -1;
1377 }
1378 }
1379
1380 if (slicelen > 0) {
1381 /* to avoid recursive calls to this method (via decref), move
1382 old items to the recycle bin here, and get rid of them when
1383 we're done modifying the element */
1384 recycle = PyList_New(slicelen);
1385 if (!recycle) {
1386 if (seq) {
1387 Py_DECREF(seq);
1388 }
1389 return -1;
1390 }
1391 for (cur = start, i = 0; i < slicelen;
1392 cur += step, i++)
1393 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1394 }
1395
1396 if (newlen < slicelen) {
1397 /* delete slice */
1398 for (i = stop; i < self->extra->length; i++)
1399 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1400 } else if (newlen > slicelen) {
1401 /* insert slice */
1402 for (i = self->extra->length-1; i >= stop; i--)
1403 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1404 }
1405
1406 /* replace the slice */
1407 for (cur = start, i = 0; i < newlen;
1408 cur += step, i++) {
1409 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1410 Py_INCREF(element);
1411 self->extra->children[cur] = element;
1412 }
1413
1414 self->extra->length += newlen - slicelen;
1415
1416 if (seq) {
1417 Py_DECREF(seq);
1418 }
1419
1420 /* discard the recycle bin, and everything in it */
1421 Py_XDECREF(recycle);
1422
1423 return 0;
1424 }
1425 else {
1426 PyErr_SetString(PyExc_TypeError,
1427 "element indices must be integers");
1428 return -1;
1429 }
1430}
1431
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001432static PyMethodDef element_methods[] = {
1433
1434 {"clear", (PyCFunction) element_clear, METH_VARARGS},
1435
1436 {"get", (PyCFunction) element_get, METH_VARARGS},
1437 {"set", (PyCFunction) element_set, METH_VARARGS},
1438
1439 {"find", (PyCFunction) element_find, METH_VARARGS},
1440 {"findtext", (PyCFunction) element_findtext, METH_VARARGS},
1441 {"findall", (PyCFunction) element_findall, METH_VARARGS},
1442
1443 {"append", (PyCFunction) element_append, METH_VARARGS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001444 {"extend", (PyCFunction) element_extend, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001445 {"insert", (PyCFunction) element_insert, METH_VARARGS},
1446 {"remove", (PyCFunction) element_remove, METH_VARARGS},
1447
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001448 {"iter", (PyCFunction) element_iter, METH_VARARGS},
1449 {"itertext", (PyCFunction) element_itertext, METH_VARARGS},
1450 {"iterfind", (PyCFunction) element_iterfind, METH_VARARGS},
1451
1452 {"getiterator", (PyCFunction) element_iter, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001453 {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS},
1454
1455 {"items", (PyCFunction) element_items, METH_VARARGS},
1456 {"keys", (PyCFunction) element_keys, METH_VARARGS},
1457
1458 {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS},
1459
1460 {"__copy__", (PyCFunction) element_copy, METH_VARARGS},
1461 {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS},
1462
1463 /* Some 2.3 and 2.4 versions do not handle the __copy__ method on
1464 C objects correctly, so we have to fake it using a __reduce__-
1465 based hack (see the element_reduce implementation above for
1466 details). */
1467
1468 /* The behaviour has been changed in 2.3.5 and 2.4.1, so we're
1469 using a runtime test to figure out if we need to fake things
1470 or now (see the init code below). The following entry is
1471 enabled only if the hack is needed. */
1472
1473 {"!__reduce__", (PyCFunction) element_reduce, METH_VARARGS},
1474
1475 {NULL, NULL}
1476};
1477
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001478static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001479element_getattro(ElementObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001480{
1481 PyObject* res;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001482 char *name = "";
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001483
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001484 if (PyUnicode_Check(nameobj))
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001485 name = _PyUnicode_AsString(nameobj);
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001486
Alexander Belopolskye239d232010-12-08 23:31:48 +00001487 if (name == NULL)
1488 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001489
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001490 /* handle common attributes first */
1491 if (strcmp(name, "tag") == 0) {
1492 res = self->tag;
1493 Py_INCREF(res);
1494 return res;
1495 } else if (strcmp(name, "text") == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001496 res = element_get_text(self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001497 Py_INCREF(res);
1498 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001499 }
1500
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001501 /* methods */
1502 res = PyObject_GenericGetAttr((PyObject*) self, nameobj);
1503 if (res)
1504 return res;
1505
1506 /* less common attributes */
1507 if (strcmp(name, "tail") == 0) {
1508 PyErr_Clear();
1509 res = element_get_tail(self);
1510 } else if (strcmp(name, "attrib") == 0) {
1511 PyErr_Clear();
1512 if (!self->extra)
1513 element_new_extra(self, NULL);
1514 res = element_get_attrib(self);
1515 }
1516
1517 if (!res)
1518 return NULL;
1519
1520 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001521 return res;
1522}
1523
1524static int
1525element_setattr(ElementObject* self, const char* name, PyObject* value)
1526{
1527 if (value == NULL) {
1528 PyErr_SetString(
1529 PyExc_AttributeError,
1530 "can't delete element attributes"
1531 );
1532 return -1;
1533 }
1534
1535 if (strcmp(name, "tag") == 0) {
1536 Py_DECREF(self->tag);
1537 self->tag = value;
1538 Py_INCREF(self->tag);
1539 } else if (strcmp(name, "text") == 0) {
1540 Py_DECREF(JOIN_OBJ(self->text));
1541 self->text = value;
1542 Py_INCREF(self->text);
1543 } else if (strcmp(name, "tail") == 0) {
1544 Py_DECREF(JOIN_OBJ(self->tail));
1545 self->tail = value;
1546 Py_INCREF(self->tail);
1547 } else if (strcmp(name, "attrib") == 0) {
1548 if (!self->extra)
1549 element_new_extra(self, NULL);
1550 Py_DECREF(self->extra->attrib);
1551 self->extra->attrib = value;
1552 Py_INCREF(self->extra->attrib);
1553 } else {
1554 PyErr_SetString(PyExc_AttributeError, name);
1555 return -1;
1556 }
1557
1558 return 0;
1559}
1560
1561static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001562 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001563 0, /* sq_concat */
1564 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001565 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001566 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001567 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001568 0,
1569};
1570
1571static PyMappingMethods element_as_mapping = {
1572 (lenfunc) element_length,
1573 (binaryfunc) element_subscr,
1574 (objobjargproc) element_ass_subscr,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001575};
1576
Neal Norwitz227b5332006-03-22 09:28:35 +00001577static PyTypeObject Element_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001578 PyVarObject_HEAD_INIT(NULL, 0)
1579 "Element", sizeof(ElementObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001580 /* methods */
1581 (destructor)element_dealloc, /* tp_dealloc */
1582 0, /* tp_print */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001583 0, /* tp_getattr */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001584 (setattrfunc)element_setattr, /* tp_setattr */
Mark Dickinsone94c6792009-02-02 20:36:42 +00001585 0, /* tp_reserved */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001586 (reprfunc)element_repr, /* tp_repr */
1587 0, /* tp_as_number */
1588 &element_as_sequence, /* tp_as_sequence */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001589 &element_as_mapping, /* tp_as_mapping */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001590 0, /* tp_hash */
1591 0, /* tp_call */
1592 0, /* tp_str */
1593 (getattrofunc)element_getattro, /* tp_getattro */
1594 0, /* tp_setattro */
1595 0, /* tp_as_buffer */
1596 Py_TPFLAGS_DEFAULT, /* tp_flags */
1597 0, /* tp_doc */
1598 0, /* tp_traverse */
1599 0, /* tp_clear */
1600 0, /* tp_richcompare */
1601 0, /* tp_weaklistoffset */
1602 0, /* tp_iter */
1603 0, /* tp_iternext */
1604 element_methods, /* tp_methods */
1605 0, /* tp_members */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001606};
1607
1608/* ==================================================================== */
1609/* the tree builder type */
1610
1611typedef struct {
1612 PyObject_HEAD
1613
1614 PyObject* root; /* root node (first created node) */
1615
1616 ElementObject* this; /* current node */
1617 ElementObject* last; /* most recently created node */
1618
1619 PyObject* data; /* data collector (string or list), or NULL */
1620
1621 PyObject* stack; /* element stack */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001622 Py_ssize_t index; /* current stack size (0=empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001623
1624 /* element tracing */
1625 PyObject* events; /* list of events, or NULL if not collecting */
1626 PyObject* start_event_obj; /* event objects (NULL to ignore) */
1627 PyObject* end_event_obj;
1628 PyObject* start_ns_event_obj;
1629 PyObject* end_ns_event_obj;
1630
1631} TreeBuilderObject;
1632
Neal Norwitz227b5332006-03-22 09:28:35 +00001633static PyTypeObject TreeBuilder_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001634
Christian Heimes90aa7642007-12-19 02:45:37 +00001635#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001636
1637/* -------------------------------------------------------------------- */
1638/* constructor and destructor */
1639
1640LOCAL(PyObject*)
1641treebuilder_new(void)
1642{
1643 TreeBuilderObject* self;
1644
1645 self = PyObject_New(TreeBuilderObject, &TreeBuilder_Type);
1646 if (self == NULL)
1647 return NULL;
1648
1649 self->root = NULL;
1650
1651 Py_INCREF(Py_None);
1652 self->this = (ElementObject*) Py_None;
1653
1654 Py_INCREF(Py_None);
1655 self->last = (ElementObject*) Py_None;
1656
1657 self->data = NULL;
1658
1659 self->stack = PyList_New(20);
1660 self->index = 0;
1661
1662 self->events = NULL;
1663 self->start_event_obj = self->end_event_obj = NULL;
1664 self->start_ns_event_obj = self->end_ns_event_obj = NULL;
1665
1666 ALLOC(sizeof(TreeBuilderObject), "create treebuilder");
1667
1668 return (PyObject*) self;
1669}
1670
1671static PyObject*
Thomas Wouters73e5a5b2006-06-08 15:35:45 +00001672treebuilder(PyObject* self_, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001673{
1674 if (!PyArg_ParseTuple(args, ":TreeBuilder"))
1675 return NULL;
1676
1677 return treebuilder_new();
1678}
1679
1680static void
1681treebuilder_dealloc(TreeBuilderObject* self)
1682{
1683 Py_XDECREF(self->end_ns_event_obj);
1684 Py_XDECREF(self->start_ns_event_obj);
1685 Py_XDECREF(self->end_event_obj);
1686 Py_XDECREF(self->start_event_obj);
1687 Py_XDECREF(self->events);
1688 Py_DECREF(self->stack);
1689 Py_XDECREF(self->data);
1690 Py_DECREF(self->last);
1691 Py_DECREF(self->this);
1692 Py_XDECREF(self->root);
1693
1694 RELEASE(sizeof(TreeBuilderObject), "destroy treebuilder");
1695
1696 PyObject_Del(self);
1697}
1698
1699/* -------------------------------------------------------------------- */
1700/* handlers */
1701
1702LOCAL(PyObject*)
1703treebuilder_handle_xml(TreeBuilderObject* self, PyObject* encoding,
1704 PyObject* standalone)
1705{
1706 Py_RETURN_NONE;
1707}
1708
1709LOCAL(PyObject*)
1710treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
1711 PyObject* attrib)
1712{
1713 PyObject* node;
1714 PyObject* this;
1715
1716 if (self->data) {
1717 if (self->this == self->last) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001718 Py_DECREF(JOIN_OBJ(self->last->text));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001719 self->last->text = JOIN_SET(
1720 self->data, PyList_CheckExact(self->data)
1721 );
1722 } else {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001723 Py_DECREF(JOIN_OBJ(self->last->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001724 self->last->tail = JOIN_SET(
1725 self->data, PyList_CheckExact(self->data)
1726 );
1727 }
1728 self->data = NULL;
1729 }
1730
1731 node = element_new(tag, attrib);
1732 if (!node)
1733 return NULL;
1734
1735 this = (PyObject*) self->this;
1736
1737 if (this != Py_None) {
1738 if (element_add_subelement((ElementObject*) this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001739 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001740 } else {
1741 if (self->root) {
1742 PyErr_SetString(
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001743 elementtree_parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001744 "multiple elements on top level"
1745 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001746 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001747 }
1748 Py_INCREF(node);
1749 self->root = node;
1750 }
1751
1752 if (self->index < PyList_GET_SIZE(self->stack)) {
1753 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001754 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001755 Py_INCREF(this);
1756 } else {
1757 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001758 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001759 }
1760 self->index++;
1761
1762 Py_DECREF(this);
1763 Py_INCREF(node);
1764 self->this = (ElementObject*) node;
1765
1766 Py_DECREF(self->last);
1767 Py_INCREF(node);
1768 self->last = (ElementObject*) node;
1769
1770 if (self->start_event_obj) {
1771 PyObject* res;
1772 PyObject* action = self->start_event_obj;
1773 res = PyTuple_New(2);
1774 if (res) {
1775 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
1776 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
1777 PyList_Append(self->events, res);
1778 Py_DECREF(res);
1779 } else
1780 PyErr_Clear(); /* FIXME: propagate error */
1781 }
1782
1783 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001784
1785 error:
1786 Py_DECREF(node);
1787 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001788}
1789
1790LOCAL(PyObject*)
1791treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
1792{
1793 if (!self->data) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001794 if (self->last == (ElementObject*) Py_None) {
1795 /* ignore calls to data before the first call to start */
1796 Py_RETURN_NONE;
1797 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001798 /* store the first item as is */
1799 Py_INCREF(data); self->data = data;
1800 } else {
1801 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00001802 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
1803 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001804 /* expat often generates single character data sections; handle
1805 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00001806 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
1807 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001808 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00001809 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001810 } else if (PyList_CheckExact(self->data)) {
1811 if (PyList_Append(self->data, data) < 0)
1812 return NULL;
1813 } else {
1814 PyObject* list = PyList_New(2);
1815 if (!list)
1816 return NULL;
1817 PyList_SET_ITEM(list, 0, self->data);
1818 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
1819 self->data = list;
1820 }
1821 }
1822
1823 Py_RETURN_NONE;
1824}
1825
1826LOCAL(PyObject*)
1827treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
1828{
1829 PyObject* item;
1830
1831 if (self->data) {
1832 if (self->this == self->last) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001833 Py_DECREF(JOIN_OBJ(self->last->text));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001834 self->last->text = JOIN_SET(
1835 self->data, PyList_CheckExact(self->data)
1836 );
1837 } else {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001838 Py_DECREF(JOIN_OBJ(self->last->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001839 self->last->tail = JOIN_SET(
1840 self->data, PyList_CheckExact(self->data)
1841 );
1842 }
1843 self->data = NULL;
1844 }
1845
1846 if (self->index == 0) {
1847 PyErr_SetString(
1848 PyExc_IndexError,
1849 "pop from empty stack"
1850 );
1851 return NULL;
1852 }
1853
1854 self->index--;
1855
1856 item = PyList_GET_ITEM(self->stack, self->index);
1857 Py_INCREF(item);
1858
1859 Py_DECREF(self->last);
1860
1861 self->last = (ElementObject*) self->this;
1862 self->this = (ElementObject*) item;
1863
1864 if (self->end_event_obj) {
1865 PyObject* res;
1866 PyObject* action = self->end_event_obj;
1867 PyObject* node = (PyObject*) self->last;
1868 res = PyTuple_New(2);
1869 if (res) {
1870 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
1871 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
1872 PyList_Append(self->events, res);
1873 Py_DECREF(res);
1874 } else
1875 PyErr_Clear(); /* FIXME: propagate error */
1876 }
1877
1878 Py_INCREF(self->last);
1879 return (PyObject*) self->last;
1880}
1881
1882LOCAL(void)
1883treebuilder_handle_namespace(TreeBuilderObject* self, int start,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001884 PyObject *prefix, PyObject *uri)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001885{
1886 PyObject* res;
1887 PyObject* action;
1888 PyObject* parcel;
1889
1890 if (!self->events)
1891 return;
1892
1893 if (start) {
1894 if (!self->start_ns_event_obj)
1895 return;
1896 action = self->start_ns_event_obj;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001897 parcel = Py_BuildValue("OO", prefix, uri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001898 if (!parcel)
1899 return;
1900 Py_INCREF(action);
1901 } else {
1902 if (!self->end_ns_event_obj)
1903 return;
1904 action = self->end_ns_event_obj;
1905 Py_INCREF(action);
1906 parcel = Py_None;
1907 Py_INCREF(parcel);
1908 }
1909
1910 res = PyTuple_New(2);
1911
1912 if (res) {
1913 PyTuple_SET_ITEM(res, 0, action);
1914 PyTuple_SET_ITEM(res, 1, parcel);
1915 PyList_Append(self->events, res);
1916 Py_DECREF(res);
1917 } else
1918 PyErr_Clear(); /* FIXME: propagate error */
1919}
1920
1921/* -------------------------------------------------------------------- */
1922/* methods (in alphabetical order) */
1923
1924static PyObject*
1925treebuilder_data(TreeBuilderObject* self, PyObject* args)
1926{
1927 PyObject* data;
1928 if (!PyArg_ParseTuple(args, "O:data", &data))
1929 return NULL;
1930
1931 return treebuilder_handle_data(self, data);
1932}
1933
1934static PyObject*
1935treebuilder_end(TreeBuilderObject* self, PyObject* args)
1936{
1937 PyObject* tag;
1938 if (!PyArg_ParseTuple(args, "O:end", &tag))
1939 return NULL;
1940
1941 return treebuilder_handle_end(self, tag);
1942}
1943
1944LOCAL(PyObject*)
1945treebuilder_done(TreeBuilderObject* self)
1946{
1947 PyObject* res;
1948
1949 /* FIXME: check stack size? */
1950
1951 if (self->root)
1952 res = self->root;
1953 else
1954 res = Py_None;
1955
1956 Py_INCREF(res);
1957 return res;
1958}
1959
1960static PyObject*
1961treebuilder_close(TreeBuilderObject* self, PyObject* args)
1962{
1963 if (!PyArg_ParseTuple(args, ":close"))
1964 return NULL;
1965
1966 return treebuilder_done(self);
1967}
1968
1969static PyObject*
1970treebuilder_start(TreeBuilderObject* self, PyObject* args)
1971{
1972 PyObject* tag;
1973 PyObject* attrib = Py_None;
1974 if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib))
1975 return NULL;
1976
1977 return treebuilder_handle_start(self, tag, attrib);
1978}
1979
1980static PyObject*
1981treebuilder_xml(TreeBuilderObject* self, PyObject* args)
1982{
1983 PyObject* encoding;
1984 PyObject* standalone;
1985 if (!PyArg_ParseTuple(args, "OO:xml", &encoding, &standalone))
1986 return NULL;
1987
1988 return treebuilder_handle_xml(self, encoding, standalone);
1989}
1990
1991static PyMethodDef treebuilder_methods[] = {
1992 {"data", (PyCFunction) treebuilder_data, METH_VARARGS},
1993 {"start", (PyCFunction) treebuilder_start, METH_VARARGS},
1994 {"end", (PyCFunction) treebuilder_end, METH_VARARGS},
1995 {"xml", (PyCFunction) treebuilder_xml, METH_VARARGS},
1996 {"close", (PyCFunction) treebuilder_close, METH_VARARGS},
1997 {NULL, NULL}
1998};
1999
Neal Norwitz227b5332006-03-22 09:28:35 +00002000static PyTypeObject TreeBuilder_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002001 PyVarObject_HEAD_INIT(NULL, 0)
2002 "TreeBuilder", sizeof(TreeBuilderObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002003 /* methods */
2004 (destructor)treebuilder_dealloc, /* tp_dealloc */
2005 0, /* tp_print */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002006 0, /* tp_getattr */
2007 0, /* tp_setattr */
Mark Dickinsone94c6792009-02-02 20:36:42 +00002008 0, /* tp_reserved */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002009 0, /* tp_repr */
2010 0, /* tp_as_number */
2011 0, /* tp_as_sequence */
2012 0, /* tp_as_mapping */
2013 0, /* tp_hash */
2014 0, /* tp_call */
2015 0, /* tp_str */
2016 0, /* tp_getattro */
2017 0, /* tp_setattro */
2018 0, /* tp_as_buffer */
2019 Py_TPFLAGS_DEFAULT, /* tp_flags */
2020 0, /* tp_doc */
2021 0, /* tp_traverse */
2022 0, /* tp_clear */
2023 0, /* tp_richcompare */
2024 0, /* tp_weaklistoffset */
2025 0, /* tp_iter */
2026 0, /* tp_iternext */
2027 treebuilder_methods, /* tp_methods */
2028 0, /* tp_members */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002029};
2030
2031/* ==================================================================== */
2032/* the expat interface */
2033
2034#if defined(USE_EXPAT)
2035
2036#include "expat.h"
2037
2038#if defined(USE_PYEXPAT_CAPI)
2039#include "pyexpat.h"
2040static struct PyExpat_CAPI* expat_capi;
2041#define EXPAT(func) (expat_capi->func)
2042#else
2043#define EXPAT(func) (XML_##func)
2044#endif
2045
2046typedef struct {
2047 PyObject_HEAD
2048
2049 XML_Parser parser;
2050
2051 PyObject* target;
2052 PyObject* entity;
2053
2054 PyObject* names;
2055
2056 PyObject* handle_xml;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002057
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002058 PyObject* handle_start;
2059 PyObject* handle_data;
2060 PyObject* handle_end;
2061
2062 PyObject* handle_comment;
2063 PyObject* handle_pi;
2064
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002065 PyObject* handle_close;
2066
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002067} XMLParserObject;
2068
Neal Norwitz227b5332006-03-22 09:28:35 +00002069static PyTypeObject XMLParser_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002070
2071/* helpers */
2072
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002073LOCAL(PyObject*)
2074makeuniversal(XMLParserObject* self, const char* string)
2075{
2076 /* convert a UTF-8 tag/attribute name from the expat parser
2077 to a universal name string */
2078
2079 int size = strlen(string);
2080 PyObject* key;
2081 PyObject* value;
2082
2083 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00002084 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002085 if (!key)
2086 return NULL;
2087
2088 value = PyDict_GetItem(self->names, key);
2089
2090 if (value) {
2091 Py_INCREF(value);
2092 } else {
2093 /* new name. convert to universal name, and decode as
2094 necessary */
2095
2096 PyObject* tag;
2097 char* p;
2098 int i;
2099
2100 /* look for namespace separator */
2101 for (i = 0; i < size; i++)
2102 if (string[i] == '}')
2103 break;
2104 if (i != size) {
2105 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002106 tag = PyBytes_FromStringAndSize(NULL, size+1);
2107 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002108 p[0] = '{';
2109 memcpy(p+1, string, size);
2110 size++;
2111 } else {
2112 /* plain name; use key as tag */
2113 Py_INCREF(key);
2114 tag = key;
2115 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002116
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002117 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002118 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00002119 value = PyUnicode_DecodeUTF8(p, size, "strict");
2120 Py_DECREF(tag);
2121 if (!value) {
2122 Py_DECREF(key);
2123 return NULL;
2124 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002125
2126 /* add to names dictionary */
2127 if (PyDict_SetItem(self->names, key, value) < 0) {
2128 Py_DECREF(key);
2129 Py_DECREF(value);
2130 return NULL;
2131 }
2132 }
2133
2134 Py_DECREF(key);
2135 return value;
2136}
2137
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002138static void
2139expat_set_error(const char* message, int line, int column)
2140{
Victor Stinner499dfcf2011-03-21 13:26:24 +01002141 PyObject *errmsg, *error, *position;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002142
Victor Stinner499dfcf2011-03-21 13:26:24 +01002143 errmsg = PyUnicode_FromFormat("%s: line %d, column %d",
2144 message, line, column);
2145 if (errmsg == NULL)
2146 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002147
Victor Stinner499dfcf2011-03-21 13:26:24 +01002148 error = PyObject_CallFunction(elementtree_parseerror_obj, "O", errmsg);
2149 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002150 if (!error)
2151 return;
2152
2153 /* add position attribute */
2154 position = Py_BuildValue("(ii)", line, column);
2155 if (!position) {
2156 Py_DECREF(error);
2157 return;
2158 }
2159 if (PyObject_SetAttrString(error, "position", position) == -1) {
2160 Py_DECREF(error);
2161 Py_DECREF(position);
2162 return;
2163 }
2164 Py_DECREF(position);
2165
2166 PyErr_SetObject(elementtree_parseerror_obj, error);
2167 Py_DECREF(error);
2168}
2169
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002170/* -------------------------------------------------------------------- */
2171/* handlers */
2172
2173static void
2174expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2175 int data_len)
2176{
2177 PyObject* key;
2178 PyObject* value;
2179 PyObject* res;
2180
2181 if (data_len < 2 || data_in[0] != '&')
2182 return;
2183
Neal Norwitz0269b912007-08-08 06:56:02 +00002184 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002185 if (!key)
2186 return;
2187
2188 value = PyDict_GetItem(self->entity, key);
2189
2190 if (value) {
2191 if (TreeBuilder_CheckExact(self->target))
2192 res = treebuilder_handle_data(
2193 (TreeBuilderObject*) self->target, value
2194 );
2195 else if (self->handle_data)
2196 res = PyObject_CallFunction(self->handle_data, "O", value);
2197 else
2198 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002199 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002200 } else if (!PyErr_Occurred()) {
2201 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00002202 char message[128] = "undefined entity ";
2203 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002204 expat_set_error(
2205 message,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002206 EXPAT(GetErrorLineNumber)(self->parser),
2207 EXPAT(GetErrorColumnNumber)(self->parser)
2208 );
2209 }
2210
2211 Py_DECREF(key);
2212}
2213
2214static void
2215expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2216 const XML_Char **attrib_in)
2217{
2218 PyObject* res;
2219 PyObject* tag;
2220 PyObject* attrib;
2221 int ok;
2222
2223 /* tag name */
2224 tag = makeuniversal(self, tag_in);
2225 if (!tag)
2226 return; /* parser will look for errors */
2227
2228 /* attributes */
2229 if (attrib_in[0]) {
2230 attrib = PyDict_New();
2231 if (!attrib)
2232 return;
2233 while (attrib_in[0] && attrib_in[1]) {
2234 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00002235 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002236 if (!key || !value) {
2237 Py_XDECREF(value);
2238 Py_XDECREF(key);
2239 Py_DECREF(attrib);
2240 return;
2241 }
2242 ok = PyDict_SetItem(attrib, key, value);
2243 Py_DECREF(value);
2244 Py_DECREF(key);
2245 if (ok < 0) {
2246 Py_DECREF(attrib);
2247 return;
2248 }
2249 attrib_in += 2;
2250 }
2251 } else {
2252 Py_INCREF(Py_None);
2253 attrib = Py_None;
2254 }
2255
2256 if (TreeBuilder_CheckExact(self->target))
2257 /* shortcut */
2258 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2259 tag, attrib);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002260 else if (self->handle_start) {
2261 if (attrib == Py_None) {
2262 Py_DECREF(attrib);
2263 attrib = PyDict_New();
2264 if (!attrib)
2265 return;
2266 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002267 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002268 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002269 res = NULL;
2270
2271 Py_DECREF(tag);
2272 Py_DECREF(attrib);
2273
2274 Py_XDECREF(res);
2275}
2276
2277static void
2278expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2279 int data_len)
2280{
2281 PyObject* data;
2282 PyObject* res;
2283
Neal Norwitz0269b912007-08-08 06:56:02 +00002284 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002285 if (!data)
2286 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002287
2288 if (TreeBuilder_CheckExact(self->target))
2289 /* shortcut */
2290 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
2291 else if (self->handle_data)
2292 res = PyObject_CallFunction(self->handle_data, "O", data);
2293 else
2294 res = NULL;
2295
2296 Py_DECREF(data);
2297
2298 Py_XDECREF(res);
2299}
2300
2301static void
2302expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
2303{
2304 PyObject* tag;
2305 PyObject* res = NULL;
2306
2307 if (TreeBuilder_CheckExact(self->target))
2308 /* shortcut */
2309 /* the standard tree builder doesn't look at the end tag */
2310 res = treebuilder_handle_end(
2311 (TreeBuilderObject*) self->target, Py_None
2312 );
2313 else if (self->handle_end) {
2314 tag = makeuniversal(self, tag_in);
2315 if (tag) {
2316 res = PyObject_CallFunction(self->handle_end, "O", tag);
2317 Py_DECREF(tag);
2318 }
2319 }
2320
2321 Py_XDECREF(res);
2322}
2323
2324static void
2325expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
2326 const XML_Char *uri)
2327{
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002328 PyObject* sprefix = NULL;
2329 PyObject* suri = NULL;
2330
2331 suri = PyUnicode_DecodeUTF8(uri, strlen(uri), "strict");
2332 if (!suri)
2333 return;
2334
2335 if (prefix)
2336 sprefix = PyUnicode_DecodeUTF8(prefix, strlen(prefix), "strict");
2337 else
2338 sprefix = PyUnicode_FromString("");
2339 if (!sprefix) {
2340 Py_DECREF(suri);
2341 return;
2342 }
2343
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002344 treebuilder_handle_namespace(
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002345 (TreeBuilderObject*) self->target, 1, sprefix, suri
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002346 );
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002347
2348 Py_DECREF(sprefix);
2349 Py_DECREF(suri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002350}
2351
2352static void
2353expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
2354{
2355 treebuilder_handle_namespace(
2356 (TreeBuilderObject*) self->target, 0, NULL, NULL
2357 );
2358}
2359
2360static void
2361expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
2362{
2363 PyObject* comment;
2364 PyObject* res;
2365
2366 if (self->handle_comment) {
Neal Norwitz0269b912007-08-08 06:56:02 +00002367 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002368 if (comment) {
2369 res = PyObject_CallFunction(self->handle_comment, "O", comment);
2370 Py_XDECREF(res);
2371 Py_DECREF(comment);
2372 }
2373 }
2374}
2375
2376static void
2377expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
2378 const XML_Char* data_in)
2379{
2380 PyObject* target;
2381 PyObject* data;
2382 PyObject* res;
2383
2384 if (self->handle_pi) {
Neal Norwitz0269b912007-08-08 06:56:02 +00002385 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
2386 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002387 if (target && data) {
2388 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
2389 Py_XDECREF(res);
2390 Py_DECREF(data);
2391 Py_DECREF(target);
2392 } else {
2393 Py_XDECREF(data);
2394 Py_XDECREF(target);
2395 }
2396 }
2397}
2398
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002399static int
2400expat_unknown_encoding_handler(XMLParserObject *self, const XML_Char *name,
2401 XML_Encoding *info)
2402{
2403 PyObject* u;
2404 Py_UNICODE* p;
2405 unsigned char s[256];
2406 int i;
2407
2408 memset(info, 0, sizeof(XML_Encoding));
2409
2410 for (i = 0; i < 256; i++)
2411 s[i] = i;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002412
Fredrik Lundhc3389992005-12-25 11:40:19 +00002413 u = PyUnicode_Decode((char*) s, 256, name, "replace");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002414 if (!u)
2415 return XML_STATUS_ERROR;
2416
2417 if (PyUnicode_GET_SIZE(u) != 256) {
2418 Py_DECREF(u);
2419 return XML_STATUS_ERROR;
2420 }
2421
2422 p = PyUnicode_AS_UNICODE(u);
2423
2424 for (i = 0; i < 256; i++) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002425 if (p[i] != Py_UNICODE_REPLACEMENT_CHARACTER)
2426 info->map[i] = p[i];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002427 else
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002428 info->map[i] = -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002429 }
2430
2431 Py_DECREF(u);
2432
2433 return XML_STATUS_OK;
2434}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002435
2436/* -------------------------------------------------------------------- */
2437/* constructor and destructor */
2438
2439static PyObject*
Thomas Wouters73e5a5b2006-06-08 15:35:45 +00002440xmlparser(PyObject* self_, PyObject* args, PyObject* kw)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002441{
2442 XMLParserObject* self;
2443 /* FIXME: does this need to be static? */
2444 static XML_Memory_Handling_Suite memory_handler;
2445
2446 PyObject* target = NULL;
2447 char* encoding = NULL;
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +00002448 static char* kwlist[] = { "target", "encoding", NULL };
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002449 if (!PyArg_ParseTupleAndKeywords(args, kw, "|Oz:XMLParser", kwlist,
2450 &target, &encoding))
2451 return NULL;
2452
2453#if defined(USE_PYEXPAT_CAPI)
2454 if (!expat_capi) {
2455 PyErr_SetString(
2456 PyExc_RuntimeError, "cannot load dispatch table from pyexpat"
2457 );
2458 return NULL;
2459 }
2460#endif
2461
2462 self = PyObject_New(XMLParserObject, &XMLParser_Type);
2463 if (self == NULL)
2464 return NULL;
2465
2466 self->entity = PyDict_New();
2467 if (!self->entity) {
2468 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002469 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002470 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002471
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002472 self->names = PyDict_New();
2473 if (!self->names) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002474 PyObject_Del(self->entity);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002475 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002476 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002477 }
2478
2479 memory_handler.malloc_fcn = PyObject_Malloc;
2480 memory_handler.realloc_fcn = PyObject_Realloc;
2481 memory_handler.free_fcn = PyObject_Free;
2482
2483 self->parser = EXPAT(ParserCreate_MM)(encoding, &memory_handler, "}");
2484 if (!self->parser) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002485 PyObject_Del(self->names);
2486 PyObject_Del(self->entity);
2487 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002488 PyErr_NoMemory();
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002489 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002490 }
2491
2492 /* setup target handlers */
2493 if (!target) {
2494 target = treebuilder_new();
2495 if (!target) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002496 EXPAT(ParserFree)(self->parser);
2497 PyObject_Del(self->names);
2498 PyObject_Del(self->entity);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002499 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002500 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002501 }
2502 } else
2503 Py_INCREF(target);
2504 self->target = target;
2505
2506 self->handle_xml = PyObject_GetAttrString(target, "xml");
2507 self->handle_start = PyObject_GetAttrString(target, "start");
2508 self->handle_data = PyObject_GetAttrString(target, "data");
2509 self->handle_end = PyObject_GetAttrString(target, "end");
2510 self->handle_comment = PyObject_GetAttrString(target, "comment");
2511 self->handle_pi = PyObject_GetAttrString(target, "pi");
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002512 self->handle_close = PyObject_GetAttrString(target, "close");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002513
2514 PyErr_Clear();
2515
2516 /* configure parser */
2517 EXPAT(SetUserData)(self->parser, self);
2518 EXPAT(SetElementHandler)(
2519 self->parser,
2520 (XML_StartElementHandler) expat_start_handler,
2521 (XML_EndElementHandler) expat_end_handler
2522 );
2523 EXPAT(SetDefaultHandlerExpand)(
2524 self->parser,
2525 (XML_DefaultHandler) expat_default_handler
2526 );
2527 EXPAT(SetCharacterDataHandler)(
2528 self->parser,
2529 (XML_CharacterDataHandler) expat_data_handler
2530 );
2531 if (self->handle_comment)
2532 EXPAT(SetCommentHandler)(
2533 self->parser,
2534 (XML_CommentHandler) expat_comment_handler
2535 );
2536 if (self->handle_pi)
2537 EXPAT(SetProcessingInstructionHandler)(
2538 self->parser,
2539 (XML_ProcessingInstructionHandler) expat_pi_handler
2540 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002541 EXPAT(SetUnknownEncodingHandler)(
2542 self->parser,
2543 (XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL
2544 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002545
2546 ALLOC(sizeof(XMLParserObject), "create expatparser");
2547
2548 return (PyObject*) self;
2549}
2550
2551static void
2552xmlparser_dealloc(XMLParserObject* self)
2553{
2554 EXPAT(ParserFree)(self->parser);
2555
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002556 Py_XDECREF(self->handle_close);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002557 Py_XDECREF(self->handle_pi);
2558 Py_XDECREF(self->handle_comment);
2559 Py_XDECREF(self->handle_end);
2560 Py_XDECREF(self->handle_data);
2561 Py_XDECREF(self->handle_start);
2562 Py_XDECREF(self->handle_xml);
2563
2564 Py_DECREF(self->target);
2565 Py_DECREF(self->entity);
2566 Py_DECREF(self->names);
2567
2568 RELEASE(sizeof(XMLParserObject), "destroy expatparser");
2569
2570 PyObject_Del(self);
2571}
2572
2573/* -------------------------------------------------------------------- */
2574/* methods (in alphabetical order) */
2575
2576LOCAL(PyObject*)
2577expat_parse(XMLParserObject* self, char* data, int data_len, int final)
2578{
2579 int ok;
2580
2581 ok = EXPAT(Parse)(self->parser, data, data_len, final);
2582
2583 if (PyErr_Occurred())
2584 return NULL;
2585
2586 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002587 expat_set_error(
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002588 EXPAT(ErrorString)(EXPAT(GetErrorCode)(self->parser)),
2589 EXPAT(GetErrorLineNumber)(self->parser),
2590 EXPAT(GetErrorColumnNumber)(self->parser)
2591 );
2592 return NULL;
2593 }
2594
2595 Py_RETURN_NONE;
2596}
2597
2598static PyObject*
2599xmlparser_close(XMLParserObject* self, PyObject* args)
2600{
2601 /* end feeding data to parser */
2602
2603 PyObject* res;
2604 if (!PyArg_ParseTuple(args, ":close"))
2605 return NULL;
2606
2607 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002608 if (!res)
2609 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002610
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002611 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002612 Py_DECREF(res);
2613 return treebuilder_done((TreeBuilderObject*) self->target);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002614 } if (self->handle_close) {
2615 Py_DECREF(res);
2616 return PyObject_CallFunction(self->handle_close, "");
2617 } else
2618 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002619}
2620
2621static PyObject*
2622xmlparser_feed(XMLParserObject* self, PyObject* args)
2623{
2624 /* feed data to parser */
2625
2626 char* data;
2627 int data_len;
2628 if (!PyArg_ParseTuple(args, "s#:feed", &data, &data_len))
2629 return NULL;
2630
2631 return expat_parse(self, data, data_len, 0);
2632}
2633
2634static PyObject*
2635xmlparser_parse(XMLParserObject* self, PyObject* args)
2636{
2637 /* (internal) parse until end of input stream */
2638
2639 PyObject* reader;
2640 PyObject* buffer;
2641 PyObject* res;
2642
2643 PyObject* fileobj;
2644 if (!PyArg_ParseTuple(args, "O:_parse", &fileobj))
2645 return NULL;
2646
2647 reader = PyObject_GetAttrString(fileobj, "read");
2648 if (!reader)
2649 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002650
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002651 /* read from open file object */
2652 for (;;) {
2653
2654 buffer = PyObject_CallFunction(reader, "i", 64*1024);
2655
2656 if (!buffer) {
2657 /* read failed (e.g. due to KeyboardInterrupt) */
2658 Py_DECREF(reader);
2659 return NULL;
2660 }
2661
Christian Heimes72b710a2008-05-26 13:28:38 +00002662 if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002663 Py_DECREF(buffer);
2664 break;
2665 }
2666
2667 res = expat_parse(
Christian Heimes72b710a2008-05-26 13:28:38 +00002668 self, PyBytes_AS_STRING(buffer), PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002669 );
2670
2671 Py_DECREF(buffer);
2672
2673 if (!res) {
2674 Py_DECREF(reader);
2675 return NULL;
2676 }
2677 Py_DECREF(res);
2678
2679 }
2680
2681 Py_DECREF(reader);
2682
2683 res = expat_parse(self, "", 0, 1);
2684
2685 if (res && TreeBuilder_CheckExact(self->target)) {
2686 Py_DECREF(res);
2687 return treebuilder_done((TreeBuilderObject*) self->target);
2688 }
2689
2690 return res;
2691}
2692
2693static PyObject*
2694xmlparser_setevents(XMLParserObject* self, PyObject* args)
2695{
2696 /* activate element event reporting */
2697
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002698 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002699 TreeBuilderObject* target;
2700
2701 PyObject* events; /* event collector */
2702 PyObject* event_set = Py_None;
2703 if (!PyArg_ParseTuple(args, "O!|O:_setevents", &PyList_Type, &events,
2704 &event_set))
2705 return NULL;
2706
2707 if (!TreeBuilder_CheckExact(self->target)) {
2708 PyErr_SetString(
2709 PyExc_TypeError,
2710 "event handling only supported for cElementTree.Treebuilder "
2711 "targets"
2712 );
2713 return NULL;
2714 }
2715
2716 target = (TreeBuilderObject*) self->target;
2717
2718 Py_INCREF(events);
2719 Py_XDECREF(target->events);
2720 target->events = events;
2721
2722 /* clear out existing events */
2723 Py_XDECREF(target->start_event_obj); target->start_event_obj = NULL;
2724 Py_XDECREF(target->end_event_obj); target->end_event_obj = NULL;
2725 Py_XDECREF(target->start_ns_event_obj); target->start_ns_event_obj = NULL;
2726 Py_XDECREF(target->end_ns_event_obj); target->end_ns_event_obj = NULL;
2727
2728 if (event_set == Py_None) {
2729 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002730 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002731 Py_RETURN_NONE;
2732 }
2733
2734 if (!PyTuple_Check(event_set)) /* FIXME: handle arbitrary sequences */
2735 goto error;
2736
2737 for (i = 0; i < PyTuple_GET_SIZE(event_set); i++) {
2738 PyObject* item = PyTuple_GET_ITEM(event_set, i);
2739 char* event;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002740 if (PyUnicode_Check(item)) {
2741 event = _PyUnicode_AsString(item);
Victor Stinner0477bf32010-03-22 12:11:44 +00002742 if (event == NULL)
2743 goto error;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002744 } else if (PyBytes_Check(item))
2745 event = PyBytes_AS_STRING(item);
2746 else {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002747 goto error;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002748 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002749 if (strcmp(event, "start") == 0) {
2750 Py_INCREF(item);
2751 target->start_event_obj = item;
2752 } else if (strcmp(event, "end") == 0) {
2753 Py_INCREF(item);
2754 Py_XDECREF(target->end_event_obj);
2755 target->end_event_obj = item;
2756 } else if (strcmp(event, "start-ns") == 0) {
2757 Py_INCREF(item);
2758 Py_XDECREF(target->start_ns_event_obj);
2759 target->start_ns_event_obj = item;
2760 EXPAT(SetNamespaceDeclHandler)(
2761 self->parser,
2762 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2763 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2764 );
2765 } else if (strcmp(event, "end-ns") == 0) {
2766 Py_INCREF(item);
2767 Py_XDECREF(target->end_ns_event_obj);
2768 target->end_ns_event_obj = item;
2769 EXPAT(SetNamespaceDeclHandler)(
2770 self->parser,
2771 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2772 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2773 );
2774 } else {
2775 PyErr_Format(
2776 PyExc_ValueError,
2777 "unknown event '%s'", event
2778 );
2779 return NULL;
2780 }
2781 }
2782
2783 Py_RETURN_NONE;
2784
2785 error:
2786 PyErr_SetString(
2787 PyExc_TypeError,
2788 "invalid event tuple"
2789 );
2790 return NULL;
2791}
2792
2793static PyMethodDef xmlparser_methods[] = {
2794 {"feed", (PyCFunction) xmlparser_feed, METH_VARARGS},
2795 {"close", (PyCFunction) xmlparser_close, METH_VARARGS},
2796 {"_parse", (PyCFunction) xmlparser_parse, METH_VARARGS},
2797 {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
2798 {NULL, NULL}
2799};
2800
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002801static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002802xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002803{
Alexander Belopolskye239d232010-12-08 23:31:48 +00002804 if (PyUnicode_Check(nameobj)) {
2805 PyObject* res;
2806 if (PyUnicode_CompareWithASCIIString(nameobj, "entity") == 0)
2807 res = self->entity;
2808 else if (PyUnicode_CompareWithASCIIString(nameobj, "target") == 0)
2809 res = self->target;
2810 else if (PyUnicode_CompareWithASCIIString(nameobj, "version") == 0) {
2811 return PyUnicode_FromFormat(
2812 "Expat %d.%d.%d", XML_MAJOR_VERSION,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002813 XML_MINOR_VERSION, XML_MICRO_VERSION);
Alexander Belopolskye239d232010-12-08 23:31:48 +00002814 }
2815 else
2816 goto generic;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002817
Alexander Belopolskye239d232010-12-08 23:31:48 +00002818 Py_INCREF(res);
2819 return res;
2820 }
2821 generic:
2822 return PyObject_GenericGetAttr((PyObject*) self, nameobj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002823}
2824
Neal Norwitz227b5332006-03-22 09:28:35 +00002825static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002826 PyVarObject_HEAD_INIT(NULL, 0)
2827 "XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002828 /* methods */
2829 (destructor)xmlparser_dealloc, /* tp_dealloc */
2830 0, /* tp_print */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002831 0, /* tp_getattr */
2832 0, /* tp_setattr */
Mark Dickinsone94c6792009-02-02 20:36:42 +00002833 0, /* tp_reserved */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002834 0, /* tp_repr */
2835 0, /* tp_as_number */
2836 0, /* tp_as_sequence */
2837 0, /* tp_as_mapping */
2838 0, /* tp_hash */
2839 0, /* tp_call */
2840 0, /* tp_str */
2841 (getattrofunc)xmlparser_getattro, /* tp_getattro */
2842 0, /* tp_setattro */
2843 0, /* tp_as_buffer */
2844 Py_TPFLAGS_DEFAULT, /* tp_flags */
2845 0, /* tp_doc */
2846 0, /* tp_traverse */
2847 0, /* tp_clear */
2848 0, /* tp_richcompare */
2849 0, /* tp_weaklistoffset */
2850 0, /* tp_iter */
2851 0, /* tp_iternext */
2852 xmlparser_methods, /* tp_methods */
2853 0, /* tp_members */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002854};
2855
2856#endif
2857
2858/* ==================================================================== */
2859/* python module interface */
2860
2861static PyMethodDef _functions[] = {
2862 {"Element", (PyCFunction) element, METH_VARARGS|METH_KEYWORDS},
2863 {"SubElement", (PyCFunction) subelement, METH_VARARGS|METH_KEYWORDS},
2864 {"TreeBuilder", (PyCFunction) treebuilder, METH_VARARGS},
2865#if defined(USE_EXPAT)
2866 {"XMLParser", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
2867 {"XMLTreeBuilder", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
2868#endif
2869 {NULL, NULL}
2870};
2871
Martin v. Löwis1a214512008-06-11 05:26:20 +00002872
2873static struct PyModuleDef _elementtreemodule = {
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00002874 PyModuleDef_HEAD_INIT,
2875 "_elementtree",
2876 NULL,
2877 -1,
2878 _functions,
2879 NULL,
2880 NULL,
2881 NULL,
2882 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00002883};
2884
Neal Norwitzf6657e62006-12-28 04:47:50 +00002885PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00002886PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002887{
2888 PyObject* m;
2889 PyObject* g;
2890 char* bootstrap;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002891
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002892 /* Initialize object types */
2893 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00002894 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002895 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00002896 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002897#if defined(USE_EXPAT)
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002898 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00002899 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002900#endif
2901
Martin v. Löwis1a214512008-06-11 05:26:20 +00002902 m = PyModule_Create(&_elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002903 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00002904 return NULL;
2905
2906 /* The code below requires that the module gets already added
2907 to sys.modules. */
2908 PyDict_SetItemString(PyImport_GetModuleDict(),
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00002909 _elementtreemodule.m_name,
2910 m);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002911
2912 /* python glue code */
2913
2914 g = PyDict_New();
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002915 if (!g)
Martin v. Löwis1a214512008-06-11 05:26:20 +00002916 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002917
2918 PyDict_SetItemString(g, "__builtins__", PyEval_GetBuiltins());
2919
2920 bootstrap = (
2921
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002922 "from copy import copy, deepcopy\n"
2923
2924 "try:\n"
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002925 " from xml.etree import ElementTree\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002926 "except ImportError:\n"
2927 " import ElementTree\n"
2928 "ET = ElementTree\n"
2929 "del ElementTree\n"
2930
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002931 "import _elementtree as cElementTree\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002932
2933 "try:\n" /* check if copy works as is */
2934 " copy(cElementTree.Element('x'))\n"
2935 "except:\n"
2936 " def copyelement(elem):\n"
2937 " return elem\n"
2938
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002939 "class CommentProxy:\n"
2940 " def __call__(self, text=None):\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002941 " element = cElementTree.Element(ET.Comment)\n"
2942 " element.text = text\n"
2943 " return element\n"
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002944 " def __eq__(self, other):\n"
2945 " return ET.Comment == other\n"
2946 "cElementTree.Comment = CommentProxy()\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002947
2948 "class ElementTree(ET.ElementTree):\n" /* public */
2949 " def parse(self, source, parser=None):\n"
Antoine Pitroue033e062010-10-29 10:38:18 +00002950 " close_source = False\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002951 " if not hasattr(source, 'read'):\n"
2952 " source = open(source, 'rb')\n"
Antoine Pitroue033e062010-10-29 10:38:18 +00002953 " close_source = True\n"
2954 " try:\n"
2955 " if parser is not None:\n"
2956 " while 1:\n"
2957 " data = source.read(65536)\n"
2958 " if not data:\n"
2959 " break\n"
2960 " parser.feed(data)\n"
2961 " self._root = parser.close()\n"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002962 " else:\n"
Antoine Pitroue033e062010-10-29 10:38:18 +00002963 " parser = cElementTree.XMLParser()\n"
2964 " self._root = parser._parse(source)\n"
2965 " return self._root\n"
2966 " finally:\n"
2967 " if close_source:\n"
2968 " source.close()\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002969 "cElementTree.ElementTree = ElementTree\n"
2970
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002971 "def iter(node, tag=None):\n" /* helper */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002972 " if tag == '*':\n"
2973 " tag = None\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002974 " if tag is None or node.tag == tag:\n"
2975 " yield node\n"
2976 " for node in node:\n"
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002977 " for node in iter(node, tag):\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002978 " yield node\n"
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002979
2980 "def itertext(node):\n" /* helper */
2981 " if node.text:\n"
2982 " yield node.text\n"
2983 " for e in node:\n"
2984 " for s in e.itertext():\n"
2985 " yield s\n"
2986 " if e.tail:\n"
2987 " yield e.tail\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002988
2989 "def parse(source, parser=None):\n" /* public */
2990 " tree = ElementTree()\n"
2991 " tree.parse(source, parser)\n"
2992 " return tree\n"
2993 "cElementTree.parse = parse\n"
2994
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002995 "class iterparse:\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002996 " root = None\n"
2997 " def __init__(self, file, events=None):\n"
Antoine Pitroue033e062010-10-29 10:38:18 +00002998 " self._close_file = False\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002999 " if not hasattr(file, 'read'):\n"
3000 " file = open(file, 'rb')\n"
Antoine Pitroue033e062010-10-29 10:38:18 +00003001 " self._close_file = True\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003002 " self._file = file\n"
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003003 " self._events = []\n"
3004 " self._index = 0\n"
3005 " self.root = self._root = None\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003006 " b = cElementTree.TreeBuilder()\n"
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003007 " self._parser = cElementTree.XMLParser(b)\n"
3008 " self._parser._setevents(self._events, events)\n"
3009 " def __next__(self):\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003010 " while 1:\n"
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003011 " try:\n"
3012 " item = self._events[self._index]\n"
3013 " except IndexError:\n"
3014 " if self._parser is None:\n"
3015 " self.root = self._root\n"
Antoine Pitroue033e062010-10-29 10:38:18 +00003016 " if self._close_file:\n"
3017 " self._file.close()\n"
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003018 " raise StopIteration\n"
3019 " # load event buffer\n"
3020 " del self._events[:]\n"
3021 " self._index = 0\n"
3022 " data = self._file.read(16384)\n"
3023 " if data:\n"
3024 " self._parser.feed(data)\n"
3025 " else:\n"
3026 " self._root = self._parser.close()\n"
3027 " self._parser = None\n"
3028 " else:\n"
3029 " self._index = self._index + 1\n"
3030 " return item\n"
3031 " def __iter__(self):\n"
3032 " return self\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003033 "cElementTree.iterparse = iterparse\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003034
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003035 "class PIProxy:\n"
3036 " def __call__(self, target, text=None):\n"
3037 " element = cElementTree.Element(ET.PI)\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003038 " element.text = target\n"
3039 " if text:\n"
3040 " element.text = element.text + ' ' + text\n"
3041 " return element\n"
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003042 " def __eq__(self, other):\n"
3043 " return ET.PI == other\n"
3044 "cElementTree.PI = cElementTree.ProcessingInstruction = PIProxy()\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003045
3046 "def XML(text):\n" /* public */
3047 " parser = cElementTree.XMLParser()\n"
3048 " parser.feed(text)\n"
3049 " return parser.close()\n"
3050 "cElementTree.XML = cElementTree.fromstring = XML\n"
3051
3052 "def XMLID(text):\n" /* public */
3053 " tree = XML(text)\n"
3054 " ids = {}\n"
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003055 " for elem in tree.iter():\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003056 " id = elem.get('id')\n"
3057 " if id:\n"
3058 " ids[id] = elem\n"
3059 " return tree, ids\n"
3060 "cElementTree.XMLID = XMLID\n"
3061
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003062 "try:\n"
3063 " register_namespace = ET.register_namespace\n"
3064 "except AttributeError:\n"
3065 " def register_namespace(prefix, uri):\n"
3066 " ET._namespace_map[uri] = prefix\n"
3067 "cElementTree.register_namespace = register_namespace\n"
3068
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003069 "cElementTree.dump = ET.dump\n"
3070 "cElementTree.ElementPath = ElementPath = ET.ElementPath\n"
3071 "cElementTree.iselement = ET.iselement\n"
3072 "cElementTree.QName = ET.QName\n"
3073 "cElementTree.tostring = ET.tostring\n"
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003074 "cElementTree.fromstringlist = ET.fromstringlist\n"
3075 "cElementTree.tostringlist = ET.tostringlist\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003076 "cElementTree.VERSION = '" VERSION "'\n"
3077 "cElementTree.__version__ = '" VERSION "'\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003078
3079 );
3080
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003081 if (!PyRun_String(bootstrap, Py_file_input, g, NULL))
3082 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003083
3084 elementpath_obj = PyDict_GetItemString(g, "ElementPath");
3085
3086 elementtree_copyelement_obj = PyDict_GetItemString(g, "copyelement");
3087 if (elementtree_copyelement_obj) {
3088 /* reduce hack needed; enable reduce method */
3089 PyMethodDef* mp;
3090 for (mp = element_methods; mp->ml_name; mp++)
3091 if (mp->ml_meth == (PyCFunction) element_reduce) {
3092 mp->ml_name = "__reduce__";
3093 break;
3094 }
3095 } else
3096 PyErr_Clear();
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003097
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003098 elementtree_deepcopy_obj = PyDict_GetItemString(g, "deepcopy");
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003099 elementtree_iter_obj = PyDict_GetItemString(g, "iter");
3100 elementtree_itertext_obj = PyDict_GetItemString(g, "itertext");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003101
3102#if defined(USE_PYEXPAT_CAPI)
3103 /* link against pyexpat, if possible */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003104 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
3105 if (expat_capi) {
3106 /* check that it's usable */
3107 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
3108 expat_capi->size < sizeof(struct PyExpat_CAPI) ||
3109 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
3110 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
3111 expat_capi->MICRO_VERSION != XML_MICRO_VERSION)
3112 expat_capi = NULL;
3113 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003114#endif
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003115
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003116 elementtree_parseerror_obj = PyErr_NewException(
3117 "cElementTree.ParseError", PyExc_SyntaxError, NULL
3118 );
3119 Py_INCREF(elementtree_parseerror_obj);
3120 PyModule_AddObject(m, "ParseError", elementtree_parseerror_obj);
3121
3122 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003123}