blob: 0c64dd5d4b87c570a51c0578b3a77169af959a78 [file] [log] [blame]
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001/*
2 * ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003 * $Id: _elementtree.c 3473 2009-01-11 22:53:55Z fredrik $
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
5 * elementtree accelerator
6 *
7 * History:
8 * 1999-06-20 fl created (as part of sgmlop)
9 * 2001-05-29 fl effdom edition
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000010 * 2003-02-27 fl elementtree edition (alpha)
11 * 2004-06-03 fl updates for elementtree 1.2
Florent Xiclunaf15351d2010-03-13 23:24:31 +000012 * 2005-01-05 fl major optimization effort
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000013 * 2005-01-11 fl first public release (cElementTree 0.8)
14 * 2005-01-12 fl split element object into base and extras
15 * 2005-01-13 fl use tagged pointers for tail/text (cElementTree 0.9)
16 * 2005-01-17 fl added treebuilder close method
17 * 2005-01-17 fl fixed crash in getchildren
18 * 2005-01-18 fl removed observer api, added iterparse (cElementTree 0.9.3)
19 * 2005-01-23 fl revised iterparse api; added namespace event support (0.9.8)
20 * 2005-01-26 fl added VERSION module property (cElementTree 1.0)
21 * 2005-01-28 fl added remove method (1.0.1)
22 * 2005-03-01 fl added iselement function; fixed makeelement aliasing (1.0.2)
23 * 2005-03-13 fl export Comment and ProcessingInstruction/PI helpers
24 * 2005-03-26 fl added Comment and PI support to XMLParser
25 * 2005-03-27 fl event optimizations; complain about bogus events
26 * 2005-08-08 fl fixed read error handling in parse
27 * 2005-08-11 fl added runtime test for copy workaround (1.0.3)
28 * 2005-12-13 fl added expat_capi support (for xml.etree) (1.0.4)
29 * 2005-12-16 fl added support for non-standard encodings
Fredrik Lundh44ed4db2006-03-12 21:06:35 +000030 * 2006-03-08 fl fixed a couple of potential null-refs and leaks
31 * 2006-03-12 fl merge in 2.5 ssize_t changes
Florent Xiclunaf15351d2010-03-13 23:24:31 +000032 * 2007-08-25 fl call custom builder's close method from XMLParser
33 * 2007-08-31 fl added iter, extend from ET 1.3
34 * 2007-09-01 fl fixed ParseError exception, setslice source type, etc
35 * 2007-09-03 fl fixed handling of negative insert indexes
36 * 2007-09-04 fl added itertext from ET 1.3
37 * 2007-09-06 fl added position attribute to ParseError exception
38 * 2008-06-06 fl delay error reporting in iterparse (from Hrvoje Niksic)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000039 *
Florent Xiclunaf15351d2010-03-13 23:24:31 +000040 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
41 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000042 *
43 * info@pythonware.com
44 * http://www.pythonware.com
45 */
46
Fredrik Lundh6d52b552005-12-16 22:06:43 +000047/* Licensed to PSF under a Contributor Agreement. */
Florent Xiclunaf15351d2010-03-13 23:24:31 +000048/* See http://www.python.org/psf/license for licensing details. */
Fredrik Lundh6d52b552005-12-16 22:06:43 +000049
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000050#include "Python.h"
51
Thomas Wouters00ee7ba2006-08-21 19:07:27 +000052#define VERSION "1.0.6"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000053
54/* -------------------------------------------------------------------- */
55/* configuration */
56
57/* Leave defined to include the expat-based XMLParser type */
58#define USE_EXPAT
59
Florent Xiclunaf15351d2010-03-13 23:24:31 +000060/* Define to do all expat calls via pyexpat's embedded expat library */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000061/* #define USE_PYEXPAT_CAPI */
62
63/* An element can hold this many children without extra memory
64 allocations. */
65#define STATIC_CHILDREN 4
66
67/* For best performance, chose a value so that 80-90% of all nodes
68 have no more than the given number of children. Set this to zero
69 to minimize the size of the element structure itself (this only
70 helps if you have lots of leaf nodes with attributes). */
71
72/* Also note that pymalloc always allocates blocks in multiples of
73 eight bytes. For the current version of cElementTree, this means
74 that the number of children should be an even number, at least on
75 32-bit platforms. */
76
77/* -------------------------------------------------------------------- */
78
79#if 0
80static int memory = 0;
81#define ALLOC(size, comment)\
82do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
83#define RELEASE(size, comment)\
84do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
85#else
86#define ALLOC(size, comment)
87#define RELEASE(size, comment)
88#endif
89
90/* compiler tweaks */
91#if defined(_MSC_VER)
92#define LOCAL(type) static __inline type __fastcall
93#else
94#define LOCAL(type) static type
95#endif
96
Florent Xiclunaf15351d2010-03-13 23:24:31 +000097/* compatibility macros */
98#if (PY_VERSION_HEX < 0x02060000)
99#define Py_REFCNT(ob) (((PyObject*)(ob))->ob_refcnt)
100#define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)
101#endif
102
103#if (PY_VERSION_HEX < 0x02050000)
104typedef int Py_ssize_t;
105#define lenfunc inquiry
106#endif
107
108#if (PY_VERSION_HEX < 0x02040000)
109#define PyDict_CheckExact PyDict_Check
110
111#if !defined(Py_RETURN_NONE)
112#define Py_RETURN_NONE return Py_INCREF(Py_None), Py_None
113#endif
114#endif
115
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000116/* macros used to store 'join' flags in string object pointers. note
117 that all use of text and tail as object pointers must be wrapped in
118 JOIN_OBJ. see comments in the ElementObject definition for more
119 info. */
120#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
121#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
122#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~1))
123
124/* glue functions (see the init function for details) */
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000125static PyObject* elementtree_parseerror_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000126static PyObject* elementtree_copyelement_obj;
127static PyObject* elementtree_deepcopy_obj;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000128static PyObject* elementtree_iter_obj;
129static PyObject* elementtree_itertext_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000130static PyObject* elementpath_obj;
131
132/* helpers */
133
134LOCAL(PyObject*)
135deepcopy(PyObject* object, PyObject* memo)
136{
137 /* do a deep copy of the given object */
138
139 PyObject* args;
140 PyObject* result;
141
142 if (!elementtree_deepcopy_obj) {
143 PyErr_SetString(
144 PyExc_RuntimeError,
145 "deepcopy helper not found"
146 );
147 return NULL;
148 }
149
150 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000151 if (!args)
152 return NULL;
153
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000154 Py_INCREF(object); PyTuple_SET_ITEM(args, 0, (PyObject*) object);
155 Py_INCREF(memo); PyTuple_SET_ITEM(args, 1, (PyObject*) memo);
156
157 result = PyObject_CallObject(elementtree_deepcopy_obj, args);
158
159 Py_DECREF(args);
160
161 return result;
162}
163
164LOCAL(PyObject*)
165list_join(PyObject* list)
166{
167 /* join list elements (destroying the list in the process) */
168
169 PyObject* joiner;
170 PyObject* function;
171 PyObject* args;
172 PyObject* result;
173
174 switch (PyList_GET_SIZE(list)) {
175 case 0:
176 Py_DECREF(list);
Christian Heimes72b710a2008-05-26 13:28:38 +0000177 return PyBytes_FromString("");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000178 case 1:
179 result = PyList_GET_ITEM(list, 0);
180 Py_INCREF(result);
181 Py_DECREF(list);
182 return result;
183 }
184
185 /* two or more elements: slice out a suitable separator from the
186 first member, and use that to join the entire list */
187
188 joiner = PySequence_GetSlice(PyList_GET_ITEM(list, 0), 0, 0);
189 if (!joiner)
190 return NULL;
191
192 function = PyObject_GetAttrString(joiner, "join");
193 if (!function) {
194 Py_DECREF(joiner);
195 return NULL;
196 }
197
198 args = PyTuple_New(1);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000199 if (!args)
200 return NULL;
201
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000202 PyTuple_SET_ITEM(args, 0, list);
203
204 result = PyObject_CallObject(function, args);
205
206 Py_DECREF(args); /* also removes list */
207 Py_DECREF(function);
208 Py_DECREF(joiner);
209
210 return result;
211}
212
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000213/* -------------------------------------------------------------------- */
214/* the element type */
215
216typedef struct {
217
218 /* attributes (a dictionary object), or None if no attributes */
219 PyObject* attrib;
220
221 /* child elements */
222 int length; /* actual number of items */
223 int allocated; /* allocated items */
224
225 /* this either points to _children or to a malloced buffer */
226 PyObject* *children;
227
228 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100229
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000230} ElementObjectExtra;
231
232typedef struct {
233 PyObject_HEAD
234
235 /* element tag (a string). */
236 PyObject* tag;
237
238 /* text before first child. note that this is a tagged pointer;
239 use JOIN_OBJ to get the object pointer. the join flag is used
240 to distinguish lists created by the tree builder from lists
241 assigned to the attribute by application code; the former
242 should be joined before being returned to the user, the latter
243 should be left intact. */
244 PyObject* text;
245
246 /* text after this element, in parent. note that this is a tagged
247 pointer; use JOIN_OBJ to get the object pointer. */
248 PyObject* tail;
249
250 ElementObjectExtra* extra;
251
252} ElementObject;
253
Neal Norwitz227b5332006-03-22 09:28:35 +0000254static PyTypeObject Element_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000255
Christian Heimes90aa7642007-12-19 02:45:37 +0000256#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000257
258/* -------------------------------------------------------------------- */
259/* element constructor and destructor */
260
261LOCAL(int)
262element_new_extra(ElementObject* self, PyObject* attrib)
263{
264 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
265 if (!self->extra)
266 return -1;
267
268 if (!attrib)
269 attrib = Py_None;
270
271 Py_INCREF(attrib);
272 self->extra->attrib = attrib;
273
274 self->extra->length = 0;
275 self->extra->allocated = STATIC_CHILDREN;
276 self->extra->children = self->extra->_children;
277
278 return 0;
279}
280
281LOCAL(void)
282element_dealloc_extra(ElementObject* self)
283{
284 int i;
285
286 Py_DECREF(self->extra->attrib);
287
288 for (i = 0; i < self->extra->length; i++)
289 Py_DECREF(self->extra->children[i]);
290
291 if (self->extra->children != self->extra->_children)
292 PyObject_Free(self->extra->children);
293
294 PyObject_Free(self->extra);
295}
296
297LOCAL(PyObject*)
298element_new(PyObject* tag, PyObject* attrib)
299{
300 ElementObject* self;
301
302 self = PyObject_New(ElementObject, &Element_Type);
303 if (self == NULL)
304 return NULL;
305
306 /* use None for empty dictionaries */
307 if (PyDict_CheckExact(attrib) && !PyDict_Size(attrib))
308 attrib = Py_None;
309
310 self->extra = NULL;
311
312 if (attrib != Py_None) {
313
Thomas Wouters477c8d52006-05-27 19:21:47 +0000314 if (element_new_extra(self, attrib) < 0) {
315 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000316 return NULL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000317 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000318
319 self->extra->length = 0;
320 self->extra->allocated = STATIC_CHILDREN;
321 self->extra->children = self->extra->_children;
322
323 }
324
325 Py_INCREF(tag);
326 self->tag = tag;
327
328 Py_INCREF(Py_None);
329 self->text = Py_None;
330
331 Py_INCREF(Py_None);
332 self->tail = Py_None;
333
334 ALLOC(sizeof(ElementObject), "create element");
335
336 return (PyObject*) self;
337}
338
339LOCAL(int)
340element_resize(ElementObject* self, int extra)
341{
342 int size;
343 PyObject* *children;
344
345 /* make sure self->children can hold the given number of extra
346 elements. set an exception and return -1 if allocation failed */
347
348 if (!self->extra)
349 element_new_extra(self, NULL);
350
351 size = self->extra->length + extra;
352
353 if (size > self->extra->allocated) {
354 /* use Python 2.4's list growth strategy */
355 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000356 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100357 * which needs at least 4 bytes.
358 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000359 * be safe.
360 */
361 size = size ? size : 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000362 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000363 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100364 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000365 * false alarm always assume at least one child to be safe.
366 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000367 children = PyObject_Realloc(self->extra->children,
368 size * sizeof(PyObject*));
369 if (!children)
370 goto nomemory;
371 } else {
372 children = PyObject_Malloc(size * sizeof(PyObject*));
373 if (!children)
374 goto nomemory;
375 /* copy existing children from static area to malloc buffer */
376 memcpy(children, self->extra->children,
377 self->extra->length * sizeof(PyObject*));
378 }
379 self->extra->children = children;
380 self->extra->allocated = size;
381 }
382
383 return 0;
384
385 nomemory:
386 PyErr_NoMemory();
387 return -1;
388}
389
390LOCAL(int)
391element_add_subelement(ElementObject* self, PyObject* element)
392{
393 /* add a child element to a parent */
394
395 if (element_resize(self, 1) < 0)
396 return -1;
397
398 Py_INCREF(element);
399 self->extra->children[self->extra->length] = element;
400
401 self->extra->length++;
402
403 return 0;
404}
405
406LOCAL(PyObject*)
407element_get_attrib(ElementObject* self)
408{
409 /* return borrowed reference to attrib dictionary */
410 /* note: this function assumes that the extra section exists */
411
412 PyObject* res = self->extra->attrib;
413
414 if (res == Py_None) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000415 Py_DECREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000416 /* create missing dictionary */
417 res = PyDict_New();
418 if (!res)
419 return NULL;
420 self->extra->attrib = res;
421 }
422
423 return res;
424}
425
426LOCAL(PyObject*)
427element_get_text(ElementObject* self)
428{
429 /* return borrowed reference to text attribute */
430
431 PyObject* res = self->text;
432
433 if (JOIN_GET(res)) {
434 res = JOIN_OBJ(res);
435 if (PyList_CheckExact(res)) {
436 res = list_join(res);
437 if (!res)
438 return NULL;
439 self->text = res;
440 }
441 }
442
443 return res;
444}
445
446LOCAL(PyObject*)
447element_get_tail(ElementObject* self)
448{
449 /* return borrowed reference to text attribute */
450
451 PyObject* res = self->tail;
452
453 if (JOIN_GET(res)) {
454 res = JOIN_OBJ(res);
455 if (PyList_CheckExact(res)) {
456 res = list_join(res);
457 if (!res)
458 return NULL;
459 self->tail = res;
460 }
461 }
462
463 return res;
464}
465
466static PyObject*
467element(PyObject* self, PyObject* args, PyObject* kw)
468{
469 PyObject* elem;
470
471 PyObject* tag;
472 PyObject* attrib = NULL;
473 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag,
474 &PyDict_Type, &attrib))
475 return NULL;
476
477 if (attrib || kw) {
478 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
479 if (!attrib)
480 return NULL;
481 if (kw)
482 PyDict_Update(attrib, kw);
483 } else {
484 Py_INCREF(Py_None);
485 attrib = Py_None;
486 }
487
488 elem = element_new(tag, attrib);
489
490 Py_DECREF(attrib);
491
492 return elem;
493}
494
495static PyObject*
496subelement(PyObject* self, PyObject* args, PyObject* kw)
497{
498 PyObject* elem;
499
500 ElementObject* parent;
501 PyObject* tag;
502 PyObject* attrib = NULL;
503 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
504 &Element_Type, &parent, &tag,
505 &PyDict_Type, &attrib))
506 return NULL;
507
508 if (attrib || kw) {
509 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
510 if (!attrib)
511 return NULL;
512 if (kw)
513 PyDict_Update(attrib, kw);
514 } else {
515 Py_INCREF(Py_None);
516 attrib = Py_None;
517 }
518
519 elem = element_new(tag, attrib);
520
521 Py_DECREF(attrib);
522
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000523 if (element_add_subelement(parent, elem) < 0) {
524 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000525 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000526 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000527
528 return elem;
529}
530
531static void
532element_dealloc(ElementObject* self)
533{
534 if (self->extra)
535 element_dealloc_extra(self);
536
537 /* discard attributes */
538 Py_DECREF(self->tag);
539 Py_DECREF(JOIN_OBJ(self->text));
540 Py_DECREF(JOIN_OBJ(self->tail));
541
542 RELEASE(sizeof(ElementObject), "destroy element");
543
544 PyObject_Del(self);
545}
546
547/* -------------------------------------------------------------------- */
548/* methods (in alphabetical order) */
549
550static PyObject*
551element_append(ElementObject* self, PyObject* args)
552{
553 PyObject* element;
554 if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element))
555 return NULL;
556
557 if (element_add_subelement(self, element) < 0)
558 return NULL;
559
560 Py_RETURN_NONE;
561}
562
563static PyObject*
564element_clear(ElementObject* self, PyObject* args)
565{
566 if (!PyArg_ParseTuple(args, ":clear"))
567 return NULL;
568
569 if (self->extra) {
570 element_dealloc_extra(self);
571 self->extra = NULL;
572 }
573
574 Py_INCREF(Py_None);
575 Py_DECREF(JOIN_OBJ(self->text));
576 self->text = Py_None;
577
578 Py_INCREF(Py_None);
579 Py_DECREF(JOIN_OBJ(self->tail));
580 self->tail = Py_None;
581
582 Py_RETURN_NONE;
583}
584
585static PyObject*
586element_copy(ElementObject* self, PyObject* args)
587{
588 int i;
589 ElementObject* element;
590
591 if (!PyArg_ParseTuple(args, ":__copy__"))
592 return NULL;
593
594 element = (ElementObject*) element_new(
595 self->tag, (self->extra) ? self->extra->attrib : Py_None
596 );
597 if (!element)
598 return NULL;
599
600 Py_DECREF(JOIN_OBJ(element->text));
601 element->text = self->text;
602 Py_INCREF(JOIN_OBJ(element->text));
603
604 Py_DECREF(JOIN_OBJ(element->tail));
605 element->tail = self->tail;
606 Py_INCREF(JOIN_OBJ(element->tail));
607
608 if (self->extra) {
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100609
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000610 if (element_resize(element, self->extra->length) < 0) {
611 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000612 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000613 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000614
615 for (i = 0; i < self->extra->length; i++) {
616 Py_INCREF(self->extra->children[i]);
617 element->extra->children[i] = self->extra->children[i];
618 }
619
620 element->extra->length = self->extra->length;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100621
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000622 }
623
624 return (PyObject*) element;
625}
626
627static PyObject*
628element_deepcopy(ElementObject* self, PyObject* args)
629{
630 int i;
631 ElementObject* element;
632 PyObject* tag;
633 PyObject* attrib;
634 PyObject* text;
635 PyObject* tail;
636 PyObject* id;
637
638 PyObject* memo;
639 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
640 return NULL;
641
642 tag = deepcopy(self->tag, memo);
643 if (!tag)
644 return NULL;
645
646 if (self->extra) {
647 attrib = deepcopy(self->extra->attrib, memo);
648 if (!attrib) {
649 Py_DECREF(tag);
650 return NULL;
651 }
652 } else {
653 Py_INCREF(Py_None);
654 attrib = Py_None;
655 }
656
657 element = (ElementObject*) element_new(tag, attrib);
658
659 Py_DECREF(tag);
660 Py_DECREF(attrib);
661
662 if (!element)
663 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100664
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000665 text = deepcopy(JOIN_OBJ(self->text), memo);
666 if (!text)
667 goto error;
668 Py_DECREF(element->text);
669 element->text = JOIN_SET(text, JOIN_GET(self->text));
670
671 tail = deepcopy(JOIN_OBJ(self->tail), memo);
672 if (!tail)
673 goto error;
674 Py_DECREF(element->tail);
675 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
676
677 if (self->extra) {
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100678
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000679 if (element_resize(element, self->extra->length) < 0)
680 goto error;
681
682 for (i = 0; i < self->extra->length; i++) {
683 PyObject* child = deepcopy(self->extra->children[i], memo);
684 if (!child) {
685 element->extra->length = i;
686 goto error;
687 }
688 element->extra->children[i] = child;
689 }
690
691 element->extra->length = self->extra->length;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100692
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000693 }
694
695 /* add object to memo dictionary (so deepcopy won't visit it again) */
Christian Heimes217cfd12007-12-02 14:31:20 +0000696 id = PyLong_FromLong((Py_uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000697 if (!id)
698 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000699
700 i = PyDict_SetItem(memo, id, (PyObject*) element);
701
702 Py_DECREF(id);
703
704 if (i < 0)
705 goto error;
706
707 return (PyObject*) element;
708
709 error:
710 Py_DECREF(element);
711 return NULL;
712}
713
714LOCAL(int)
715checkpath(PyObject* tag)
716{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000717 Py_ssize_t i;
718 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000719
720 /* check if a tag contains an xpath character */
721
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000722#define PATHCHAR(ch) \
723 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000724
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000725 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200726 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
727 void *data = PyUnicode_DATA(tag);
728 unsigned int kind = PyUnicode_KIND(tag);
729 for (i = 0; i < len; i++) {
730 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
731 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000732 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200733 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000734 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200735 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000736 return 1;
737 }
738 return 0;
739 }
Christian Heimes72b710a2008-05-26 13:28:38 +0000740 if (PyBytes_Check(tag)) {
741 char *p = PyBytes_AS_STRING(tag);
742 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000743 if (p[i] == '{')
744 check = 0;
745 else if (p[i] == '}')
746 check = 1;
747 else if (check && PATHCHAR(p[i]))
748 return 1;
749 }
750 return 0;
751 }
752
753 return 1; /* unknown type; might be path expression */
754}
755
756static PyObject*
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000757element_extend(ElementObject* self, PyObject* args)
758{
759 PyObject* seq;
760 Py_ssize_t i, seqlen = 0;
761
762 PyObject* seq_in;
763 if (!PyArg_ParseTuple(args, "O:extend", &seq_in))
764 return NULL;
765
766 seq = PySequence_Fast(seq_in, "");
767 if (!seq) {
768 PyErr_Format(
769 PyExc_TypeError,
770 "expected sequence, not \"%.200s\"", Py_TYPE(seq_in)->tp_name
771 );
772 return NULL;
773 }
774
775 seqlen = PySequence_Size(seq);
776 for (i = 0; i < seqlen; i++) {
777 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
778 if (element_add_subelement(self, element) < 0) {
779 Py_DECREF(seq);
780 return NULL;
781 }
782 }
783
784 Py_DECREF(seq);
785
786 Py_RETURN_NONE;
787}
788
789static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000790element_find(ElementObject* self, PyObject* args)
791{
792 int i;
793
794 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000795 PyObject* namespaces = Py_None;
796 if (!PyArg_ParseTuple(args, "O|O:find", &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000797 return NULL;
798
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000799 if (checkpath(tag) || namespaces != Py_None)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000800 return PyObject_CallMethod(
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000801 elementpath_obj, "find", "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000802 );
803
804 if (!self->extra)
805 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100806
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000807 for (i = 0; i < self->extra->length; i++) {
808 PyObject* item = self->extra->children[i];
809 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +0000810 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000811 Py_INCREF(item);
812 return item;
813 }
814 }
815
816 Py_RETURN_NONE;
817}
818
819static PyObject*
820element_findtext(ElementObject* self, PyObject* args)
821{
822 int i;
823
824 PyObject* tag;
825 PyObject* default_value = Py_None;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000826 PyObject* namespaces = Py_None;
827 if (!PyArg_ParseTuple(args, "O|OO:findtext", &tag, &default_value, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000828 return NULL;
829
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000830 if (checkpath(tag) || namespaces != Py_None)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000831 return PyObject_CallMethod(
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000832 elementpath_obj, "findtext", "OOOO", self, tag, default_value, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000833 );
834
835 if (!self->extra) {
836 Py_INCREF(default_value);
837 return default_value;
838 }
839
840 for (i = 0; i < self->extra->length; i++) {
841 ElementObject* item = (ElementObject*) self->extra->children[i];
Mark Dickinson211c6252009-02-01 10:28:51 +0000842 if (Element_CheckExact(item) && (PyObject_RichCompareBool(item->tag, tag, Py_EQ) == 1)) {
843
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000844 PyObject* text = element_get_text(item);
845 if (text == Py_None)
Christian Heimes72b710a2008-05-26 13:28:38 +0000846 return PyBytes_FromString("");
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000847 Py_XINCREF(text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000848 return text;
849 }
850 }
851
852 Py_INCREF(default_value);
853 return default_value;
854}
855
856static PyObject*
857element_findall(ElementObject* self, PyObject* args)
858{
859 int i;
860 PyObject* out;
861
862 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000863 PyObject* namespaces = Py_None;
864 if (!PyArg_ParseTuple(args, "O|O:findall", &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000865 return NULL;
866
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000867 if (checkpath(tag) || namespaces != Py_None)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000868 return PyObject_CallMethod(
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000869 elementpath_obj, "findall", "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000870 );
871
872 out = PyList_New(0);
873 if (!out)
874 return NULL;
875
876 if (!self->extra)
877 return out;
878
879 for (i = 0; i < self->extra->length; i++) {
880 PyObject* item = self->extra->children[i];
881 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +0000882 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000883 if (PyList_Append(out, item) < 0) {
884 Py_DECREF(out);
885 return NULL;
886 }
887 }
888 }
889
890 return out;
891}
892
893static PyObject*
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000894element_iterfind(ElementObject* self, PyObject* args)
895{
896 PyObject* tag;
897 PyObject* namespaces = Py_None;
898 if (!PyArg_ParseTuple(args, "O|O:iterfind", &tag, &namespaces))
899 return NULL;
900
901 return PyObject_CallMethod(
902 elementpath_obj, "iterfind", "OOO", self, tag, namespaces
903 );
904}
905
906static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000907element_get(ElementObject* self, PyObject* args)
908{
909 PyObject* value;
910
911 PyObject* key;
912 PyObject* default_value = Py_None;
913 if (!PyArg_ParseTuple(args, "O|O:get", &key, &default_value))
914 return NULL;
915
916 if (!self->extra || self->extra->attrib == Py_None)
917 value = default_value;
918 else {
919 value = PyDict_GetItem(self->extra->attrib, key);
920 if (!value)
921 value = default_value;
922 }
923
924 Py_INCREF(value);
925 return value;
926}
927
928static PyObject*
929element_getchildren(ElementObject* self, PyObject* args)
930{
931 int i;
932 PyObject* list;
933
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000934 /* FIXME: report as deprecated? */
935
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000936 if (!PyArg_ParseTuple(args, ":getchildren"))
937 return NULL;
938
939 if (!self->extra)
940 return PyList_New(0);
941
942 list = PyList_New(self->extra->length);
943 if (!list)
944 return NULL;
945
946 for (i = 0; i < self->extra->length; i++) {
947 PyObject* item = self->extra->children[i];
948 Py_INCREF(item);
949 PyList_SET_ITEM(list, i, item);
950 }
951
952 return list;
953}
954
955static PyObject*
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000956element_iter(ElementObject* self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000957{
958 PyObject* result;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100959
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000960 PyObject* tag = Py_None;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000961 if (!PyArg_ParseTuple(args, "|O:iter", &tag))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000962 return NULL;
963
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000964 if (!elementtree_iter_obj) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000965 PyErr_SetString(
966 PyExc_RuntimeError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000967 "iter helper not found"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000968 );
969 return NULL;
970 }
971
972 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000973 if (!args)
974 return NULL;
Neal Norwitz02876df2006-02-07 06:58:52 +0000975
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000976 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
977 Py_INCREF(tag); PyTuple_SET_ITEM(args, 1, (PyObject*) tag);
978
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000979 result = PyObject_CallObject(elementtree_iter_obj, args);
980
981 Py_DECREF(args);
982
983 return result;
984}
985
986
987static PyObject*
988element_itertext(ElementObject* self, PyObject* args)
989{
990 PyObject* result;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100991
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000992 if (!PyArg_ParseTuple(args, ":itertext"))
993 return NULL;
994
995 if (!elementtree_itertext_obj) {
996 PyErr_SetString(
997 PyExc_RuntimeError,
998 "itertext helper not found"
999 );
1000 return NULL;
1001 }
1002
1003 args = PyTuple_New(1);
1004 if (!args)
1005 return NULL;
1006
1007 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
1008
1009 result = PyObject_CallObject(elementtree_itertext_obj, args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001010
1011 Py_DECREF(args);
1012
1013 return result;
1014}
1015
1016static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001017element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001018{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001019 ElementObject* self = (ElementObject*) self_;
1020
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001021 if (!self->extra || index < 0 || index >= self->extra->length) {
1022 PyErr_SetString(
1023 PyExc_IndexError,
1024 "child index out of range"
1025 );
1026 return NULL;
1027 }
1028
1029 Py_INCREF(self->extra->children[index]);
1030 return self->extra->children[index];
1031}
1032
1033static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001034element_insert(ElementObject* self, PyObject* args)
1035{
1036 int i;
1037
1038 int index;
1039 PyObject* element;
1040 if (!PyArg_ParseTuple(args, "iO!:insert", &index,
1041 &Element_Type, &element))
1042 return NULL;
1043
1044 if (!self->extra)
1045 element_new_extra(self, NULL);
1046
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001047 if (index < 0) {
1048 index += self->extra->length;
1049 if (index < 0)
1050 index = 0;
1051 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001052 if (index > self->extra->length)
1053 index = self->extra->length;
1054
1055 if (element_resize(self, 1) < 0)
1056 return NULL;
1057
1058 for (i = self->extra->length; i > index; i--)
1059 self->extra->children[i] = self->extra->children[i-1];
1060
1061 Py_INCREF(element);
1062 self->extra->children[index] = element;
1063
1064 self->extra->length++;
1065
1066 Py_RETURN_NONE;
1067}
1068
1069static PyObject*
1070element_items(ElementObject* self, PyObject* args)
1071{
1072 if (!PyArg_ParseTuple(args, ":items"))
1073 return NULL;
1074
1075 if (!self->extra || self->extra->attrib == Py_None)
1076 return PyList_New(0);
1077
1078 return PyDict_Items(self->extra->attrib);
1079}
1080
1081static PyObject*
1082element_keys(ElementObject* self, PyObject* args)
1083{
1084 if (!PyArg_ParseTuple(args, ":keys"))
1085 return NULL;
1086
1087 if (!self->extra || self->extra->attrib == Py_None)
1088 return PyList_New(0);
1089
1090 return PyDict_Keys(self->extra->attrib);
1091}
1092
Martin v. Löwis18e16552006-02-15 17:27:45 +00001093static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001094element_length(ElementObject* self)
1095{
1096 if (!self->extra)
1097 return 0;
1098
1099 return self->extra->length;
1100}
1101
1102static PyObject*
1103element_makeelement(PyObject* self, PyObject* args, PyObject* kw)
1104{
1105 PyObject* elem;
1106
1107 PyObject* tag;
1108 PyObject* attrib;
1109 if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib))
1110 return NULL;
1111
1112 attrib = PyDict_Copy(attrib);
1113 if (!attrib)
1114 return NULL;
1115
1116 elem = element_new(tag, attrib);
1117
1118 Py_DECREF(attrib);
1119
1120 return elem;
1121}
1122
1123static PyObject*
1124element_reduce(ElementObject* self, PyObject* args)
1125{
1126 if (!PyArg_ParseTuple(args, ":__reduce__"))
1127 return NULL;
1128
1129 /* Hack alert: This method is used to work around a __copy__
1130 problem on certain 2.3 and 2.4 versions. To save time and
1131 simplify the code, we create the copy in here, and use a dummy
1132 copyelement helper to trick the copy module into doing the
1133 right thing. */
1134
1135 if (!elementtree_copyelement_obj) {
1136 PyErr_SetString(
1137 PyExc_RuntimeError,
1138 "copyelement helper not found"
1139 );
1140 return NULL;
1141 }
1142
1143 return Py_BuildValue(
1144 "O(N)", elementtree_copyelement_obj, element_copy(self, args)
1145 );
1146}
1147
1148static PyObject*
1149element_remove(ElementObject* self, PyObject* args)
1150{
1151 int i;
1152
1153 PyObject* element;
1154 if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element))
1155 return NULL;
1156
1157 if (!self->extra) {
1158 /* element has no children, so raise exception */
1159 PyErr_SetString(
1160 PyExc_ValueError,
1161 "list.remove(x): x not in list"
1162 );
1163 return NULL;
1164 }
1165
1166 for (i = 0; i < self->extra->length; i++) {
1167 if (self->extra->children[i] == element)
1168 break;
Mark Dickinson211c6252009-02-01 10:28:51 +00001169 if (PyObject_RichCompareBool(self->extra->children[i], element, Py_EQ) == 1)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001170 break;
1171 }
1172
1173 if (i == self->extra->length) {
1174 /* element is not in children, so raise exception */
1175 PyErr_SetString(
1176 PyExc_ValueError,
1177 "list.remove(x): x not in list"
1178 );
1179 return NULL;
1180 }
1181
1182 Py_DECREF(self->extra->children[i]);
1183
1184 self->extra->length--;
1185
1186 for (; i < self->extra->length; i++)
1187 self->extra->children[i] = self->extra->children[i+1];
1188
1189 Py_RETURN_NONE;
1190}
1191
1192static PyObject*
1193element_repr(ElementObject* self)
1194{
Walter Dörwald7569dfe2007-05-19 21:49:49 +00001195 return PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001196}
1197
1198static PyObject*
1199element_set(ElementObject* self, PyObject* args)
1200{
1201 PyObject* attrib;
1202
1203 PyObject* key;
1204 PyObject* value;
1205 if (!PyArg_ParseTuple(args, "OO:set", &key, &value))
1206 return NULL;
1207
1208 if (!self->extra)
1209 element_new_extra(self, NULL);
1210
1211 attrib = element_get_attrib(self);
1212 if (!attrib)
1213 return NULL;
1214
1215 if (PyDict_SetItem(attrib, key, value) < 0)
1216 return NULL;
1217
1218 Py_RETURN_NONE;
1219}
1220
1221static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001222element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001223{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001224 ElementObject* self = (ElementObject*) self_;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001225 int i;
1226 PyObject* old;
1227
1228 if (!self->extra || index < 0 || index >= self->extra->length) {
1229 PyErr_SetString(
1230 PyExc_IndexError,
1231 "child assignment index out of range");
1232 return -1;
1233 }
1234
1235 old = self->extra->children[index];
1236
1237 if (item) {
1238 Py_INCREF(item);
1239 self->extra->children[index] = item;
1240 } else {
1241 self->extra->length--;
1242 for (i = index; i < self->extra->length; i++)
1243 self->extra->children[i] = self->extra->children[i+1];
1244 }
1245
1246 Py_DECREF(old);
1247
1248 return 0;
1249}
1250
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001251static PyObject*
1252element_subscr(PyObject* self_, PyObject* item)
1253{
1254 ElementObject* self = (ElementObject*) self_;
1255
1256#if (PY_VERSION_HEX < 0x02050000)
1257 if (PyInt_Check(item) || PyLong_Check(item)) {
1258 long i = PyInt_AsLong(item);
1259#else
1260 if (PyIndex_Check(item)) {
1261 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1262#endif
1263
1264 if (i == -1 && PyErr_Occurred()) {
1265 return NULL;
1266 }
1267 if (i < 0 && self->extra)
1268 i += self->extra->length;
1269 return element_getitem(self_, i);
1270 }
1271 else if (PySlice_Check(item)) {
1272 Py_ssize_t start, stop, step, slicelen, cur, i;
1273 PyObject* list;
1274
1275 if (!self->extra)
1276 return PyList_New(0);
1277
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001278 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001279 self->extra->length,
1280 &start, &stop, &step, &slicelen) < 0) {
1281 return NULL;
1282 }
1283
1284 if (slicelen <= 0)
1285 return PyList_New(0);
1286 else {
1287 list = PyList_New(slicelen);
1288 if (!list)
1289 return NULL;
1290
1291 for (cur = start, i = 0; i < slicelen;
1292 cur += step, i++) {
1293 PyObject* item = self->extra->children[cur];
1294 Py_INCREF(item);
1295 PyList_SET_ITEM(list, i, item);
1296 }
1297
1298 return list;
1299 }
1300 }
1301 else {
1302 PyErr_SetString(PyExc_TypeError,
1303 "element indices must be integers");
1304 return NULL;
1305 }
1306}
1307
1308static int
1309element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1310{
1311 ElementObject* self = (ElementObject*) self_;
1312
1313#if (PY_VERSION_HEX < 0x02050000)
1314 if (PyInt_Check(item) || PyLong_Check(item)) {
1315 long i = PyInt_AsLong(item);
1316#else
1317 if (PyIndex_Check(item)) {
1318 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1319#endif
1320
1321 if (i == -1 && PyErr_Occurred()) {
1322 return -1;
1323 }
1324 if (i < 0 && self->extra)
1325 i += self->extra->length;
1326 return element_setitem(self_, i, value);
1327 }
1328 else if (PySlice_Check(item)) {
1329 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1330
1331 PyObject* recycle = NULL;
1332 PyObject* seq = NULL;
1333
1334 if (!self->extra)
1335 element_new_extra(self, NULL);
1336
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001337 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001338 self->extra->length,
1339 &start, &stop, &step, &slicelen) < 0) {
1340 return -1;
1341 }
1342
1343 if (value == NULL)
1344 newlen = 0;
1345 else {
1346 seq = PySequence_Fast(value, "");
1347 if (!seq) {
1348 PyErr_Format(
1349 PyExc_TypeError,
1350 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1351 );
1352 return -1;
1353 }
1354 newlen = PySequence_Size(seq);
1355 }
1356
1357 if (step != 1 && newlen != slicelen)
1358 {
1359 PyErr_Format(PyExc_ValueError,
1360#if (PY_VERSION_HEX < 0x02050000)
1361 "attempt to assign sequence of size %d "
1362 "to extended slice of size %d",
1363#else
1364 "attempt to assign sequence of size %zd "
1365 "to extended slice of size %zd",
1366#endif
1367 newlen, slicelen
1368 );
1369 return -1;
1370 }
1371
1372
1373 /* Resize before creating the recycle bin, to prevent refleaks. */
1374 if (newlen > slicelen) {
1375 if (element_resize(self, newlen - slicelen) < 0) {
1376 if (seq) {
1377 Py_DECREF(seq);
1378 }
1379 return -1;
1380 }
1381 }
1382
1383 if (slicelen > 0) {
1384 /* to avoid recursive calls to this method (via decref), move
1385 old items to the recycle bin here, and get rid of them when
1386 we're done modifying the element */
1387 recycle = PyList_New(slicelen);
1388 if (!recycle) {
1389 if (seq) {
1390 Py_DECREF(seq);
1391 }
1392 return -1;
1393 }
1394 for (cur = start, i = 0; i < slicelen;
1395 cur += step, i++)
1396 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1397 }
1398
1399 if (newlen < slicelen) {
1400 /* delete slice */
1401 for (i = stop; i < self->extra->length; i++)
1402 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1403 } else if (newlen > slicelen) {
1404 /* insert slice */
1405 for (i = self->extra->length-1; i >= stop; i--)
1406 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1407 }
1408
1409 /* replace the slice */
1410 for (cur = start, i = 0; i < newlen;
1411 cur += step, i++) {
1412 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1413 Py_INCREF(element);
1414 self->extra->children[cur] = element;
1415 }
1416
1417 self->extra->length += newlen - slicelen;
1418
1419 if (seq) {
1420 Py_DECREF(seq);
1421 }
1422
1423 /* discard the recycle bin, and everything in it */
1424 Py_XDECREF(recycle);
1425
1426 return 0;
1427 }
1428 else {
1429 PyErr_SetString(PyExc_TypeError,
1430 "element indices must be integers");
1431 return -1;
1432 }
1433}
1434
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001435static PyMethodDef element_methods[] = {
1436
1437 {"clear", (PyCFunction) element_clear, METH_VARARGS},
1438
1439 {"get", (PyCFunction) element_get, METH_VARARGS},
1440 {"set", (PyCFunction) element_set, METH_VARARGS},
1441
1442 {"find", (PyCFunction) element_find, METH_VARARGS},
1443 {"findtext", (PyCFunction) element_findtext, METH_VARARGS},
1444 {"findall", (PyCFunction) element_findall, METH_VARARGS},
1445
1446 {"append", (PyCFunction) element_append, METH_VARARGS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001447 {"extend", (PyCFunction) element_extend, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001448 {"insert", (PyCFunction) element_insert, METH_VARARGS},
1449 {"remove", (PyCFunction) element_remove, METH_VARARGS},
1450
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001451 {"iter", (PyCFunction) element_iter, METH_VARARGS},
1452 {"itertext", (PyCFunction) element_itertext, METH_VARARGS},
1453 {"iterfind", (PyCFunction) element_iterfind, METH_VARARGS},
1454
1455 {"getiterator", (PyCFunction) element_iter, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001456 {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS},
1457
1458 {"items", (PyCFunction) element_items, METH_VARARGS},
1459 {"keys", (PyCFunction) element_keys, METH_VARARGS},
1460
1461 {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS},
1462
1463 {"__copy__", (PyCFunction) element_copy, METH_VARARGS},
1464 {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS},
1465
1466 /* Some 2.3 and 2.4 versions do not handle the __copy__ method on
1467 C objects correctly, so we have to fake it using a __reduce__-
1468 based hack (see the element_reduce implementation above for
1469 details). */
1470
1471 /* The behaviour has been changed in 2.3.5 and 2.4.1, so we're
1472 using a runtime test to figure out if we need to fake things
1473 or now (see the init code below). The following entry is
1474 enabled only if the hack is needed. */
1475
1476 {"!__reduce__", (PyCFunction) element_reduce, METH_VARARGS},
1477
1478 {NULL, NULL}
1479};
1480
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001481static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001482element_getattro(ElementObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001483{
1484 PyObject* res;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001485 char *name = "";
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001486
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001487 if (PyUnicode_Check(nameobj))
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001488 name = _PyUnicode_AsString(nameobj);
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001489
Alexander Belopolskye239d232010-12-08 23:31:48 +00001490 if (name == NULL)
1491 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001492
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001493 /* handle common attributes first */
1494 if (strcmp(name, "tag") == 0) {
1495 res = self->tag;
1496 Py_INCREF(res);
1497 return res;
1498 } else if (strcmp(name, "text") == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001499 res = element_get_text(self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001500 Py_INCREF(res);
1501 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001502 }
1503
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001504 /* methods */
1505 res = PyObject_GenericGetAttr((PyObject*) self, nameobj);
1506 if (res)
1507 return res;
1508
1509 /* less common attributes */
1510 if (strcmp(name, "tail") == 0) {
1511 PyErr_Clear();
1512 res = element_get_tail(self);
1513 } else if (strcmp(name, "attrib") == 0) {
1514 PyErr_Clear();
1515 if (!self->extra)
1516 element_new_extra(self, NULL);
1517 res = element_get_attrib(self);
1518 }
1519
1520 if (!res)
1521 return NULL;
1522
1523 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001524 return res;
1525}
1526
1527static int
1528element_setattr(ElementObject* self, const char* name, PyObject* value)
1529{
1530 if (value == NULL) {
1531 PyErr_SetString(
1532 PyExc_AttributeError,
1533 "can't delete element attributes"
1534 );
1535 return -1;
1536 }
1537
1538 if (strcmp(name, "tag") == 0) {
1539 Py_DECREF(self->tag);
1540 self->tag = value;
1541 Py_INCREF(self->tag);
1542 } else if (strcmp(name, "text") == 0) {
1543 Py_DECREF(JOIN_OBJ(self->text));
1544 self->text = value;
1545 Py_INCREF(self->text);
1546 } else if (strcmp(name, "tail") == 0) {
1547 Py_DECREF(JOIN_OBJ(self->tail));
1548 self->tail = value;
1549 Py_INCREF(self->tail);
1550 } else if (strcmp(name, "attrib") == 0) {
1551 if (!self->extra)
1552 element_new_extra(self, NULL);
1553 Py_DECREF(self->extra->attrib);
1554 self->extra->attrib = value;
1555 Py_INCREF(self->extra->attrib);
1556 } else {
1557 PyErr_SetString(PyExc_AttributeError, name);
1558 return -1;
1559 }
1560
1561 return 0;
1562}
1563
1564static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001565 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001566 0, /* sq_concat */
1567 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001568 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001569 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001570 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001571 0,
1572};
1573
1574static PyMappingMethods element_as_mapping = {
1575 (lenfunc) element_length,
1576 (binaryfunc) element_subscr,
1577 (objobjargproc) element_ass_subscr,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001578};
1579
Neal Norwitz227b5332006-03-22 09:28:35 +00001580static PyTypeObject Element_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001581 PyVarObject_HEAD_INIT(NULL, 0)
1582 "Element", sizeof(ElementObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001583 /* methods */
1584 (destructor)element_dealloc, /* tp_dealloc */
1585 0, /* tp_print */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001586 0, /* tp_getattr */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001587 (setattrfunc)element_setattr, /* tp_setattr */
Mark Dickinsone94c6792009-02-02 20:36:42 +00001588 0, /* tp_reserved */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001589 (reprfunc)element_repr, /* tp_repr */
1590 0, /* tp_as_number */
1591 &element_as_sequence, /* tp_as_sequence */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001592 &element_as_mapping, /* tp_as_mapping */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001593 0, /* tp_hash */
1594 0, /* tp_call */
1595 0, /* tp_str */
1596 (getattrofunc)element_getattro, /* tp_getattro */
1597 0, /* tp_setattro */
1598 0, /* tp_as_buffer */
1599 Py_TPFLAGS_DEFAULT, /* tp_flags */
1600 0, /* tp_doc */
1601 0, /* tp_traverse */
1602 0, /* tp_clear */
1603 0, /* tp_richcompare */
1604 0, /* tp_weaklistoffset */
1605 0, /* tp_iter */
1606 0, /* tp_iternext */
1607 element_methods, /* tp_methods */
1608 0, /* tp_members */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001609};
1610
1611/* ==================================================================== */
1612/* the tree builder type */
1613
1614typedef struct {
1615 PyObject_HEAD
1616
1617 PyObject* root; /* root node (first created node) */
1618
1619 ElementObject* this; /* current node */
1620 ElementObject* last; /* most recently created node */
1621
1622 PyObject* data; /* data collector (string or list), or NULL */
1623
1624 PyObject* stack; /* element stack */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001625 Py_ssize_t index; /* current stack size (0=empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001626
1627 /* element tracing */
1628 PyObject* events; /* list of events, or NULL if not collecting */
1629 PyObject* start_event_obj; /* event objects (NULL to ignore) */
1630 PyObject* end_event_obj;
1631 PyObject* start_ns_event_obj;
1632 PyObject* end_ns_event_obj;
1633
1634} TreeBuilderObject;
1635
Neal Norwitz227b5332006-03-22 09:28:35 +00001636static PyTypeObject TreeBuilder_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001637
Christian Heimes90aa7642007-12-19 02:45:37 +00001638#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001639
1640/* -------------------------------------------------------------------- */
1641/* constructor and destructor */
1642
1643LOCAL(PyObject*)
1644treebuilder_new(void)
1645{
1646 TreeBuilderObject* self;
1647
1648 self = PyObject_New(TreeBuilderObject, &TreeBuilder_Type);
1649 if (self == NULL)
1650 return NULL;
1651
1652 self->root = NULL;
1653
1654 Py_INCREF(Py_None);
1655 self->this = (ElementObject*) Py_None;
1656
1657 Py_INCREF(Py_None);
1658 self->last = (ElementObject*) Py_None;
1659
1660 self->data = NULL;
1661
1662 self->stack = PyList_New(20);
1663 self->index = 0;
1664
1665 self->events = NULL;
1666 self->start_event_obj = self->end_event_obj = NULL;
1667 self->start_ns_event_obj = self->end_ns_event_obj = NULL;
1668
1669 ALLOC(sizeof(TreeBuilderObject), "create treebuilder");
1670
1671 return (PyObject*) self;
1672}
1673
1674static PyObject*
Thomas Wouters73e5a5b2006-06-08 15:35:45 +00001675treebuilder(PyObject* self_, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001676{
1677 if (!PyArg_ParseTuple(args, ":TreeBuilder"))
1678 return NULL;
1679
1680 return treebuilder_new();
1681}
1682
1683static void
1684treebuilder_dealloc(TreeBuilderObject* self)
1685{
1686 Py_XDECREF(self->end_ns_event_obj);
1687 Py_XDECREF(self->start_ns_event_obj);
1688 Py_XDECREF(self->end_event_obj);
1689 Py_XDECREF(self->start_event_obj);
1690 Py_XDECREF(self->events);
1691 Py_DECREF(self->stack);
1692 Py_XDECREF(self->data);
1693 Py_DECREF(self->last);
1694 Py_DECREF(self->this);
1695 Py_XDECREF(self->root);
1696
1697 RELEASE(sizeof(TreeBuilderObject), "destroy treebuilder");
1698
1699 PyObject_Del(self);
1700}
1701
1702/* -------------------------------------------------------------------- */
1703/* handlers */
1704
1705LOCAL(PyObject*)
1706treebuilder_handle_xml(TreeBuilderObject* self, PyObject* encoding,
1707 PyObject* standalone)
1708{
1709 Py_RETURN_NONE;
1710}
1711
1712LOCAL(PyObject*)
1713treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
1714 PyObject* attrib)
1715{
1716 PyObject* node;
1717 PyObject* this;
1718
1719 if (self->data) {
1720 if (self->this == self->last) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001721 Py_DECREF(JOIN_OBJ(self->last->text));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001722 self->last->text = JOIN_SET(
1723 self->data, PyList_CheckExact(self->data)
1724 );
1725 } else {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001726 Py_DECREF(JOIN_OBJ(self->last->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001727 self->last->tail = JOIN_SET(
1728 self->data, PyList_CheckExact(self->data)
1729 );
1730 }
1731 self->data = NULL;
1732 }
1733
1734 node = element_new(tag, attrib);
1735 if (!node)
1736 return NULL;
1737
1738 this = (PyObject*) self->this;
1739
1740 if (this != Py_None) {
1741 if (element_add_subelement((ElementObject*) this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001742 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001743 } else {
1744 if (self->root) {
1745 PyErr_SetString(
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001746 elementtree_parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001747 "multiple elements on top level"
1748 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001749 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001750 }
1751 Py_INCREF(node);
1752 self->root = node;
1753 }
1754
1755 if (self->index < PyList_GET_SIZE(self->stack)) {
1756 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001757 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001758 Py_INCREF(this);
1759 } else {
1760 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001761 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001762 }
1763 self->index++;
1764
1765 Py_DECREF(this);
1766 Py_INCREF(node);
1767 self->this = (ElementObject*) node;
1768
1769 Py_DECREF(self->last);
1770 Py_INCREF(node);
1771 self->last = (ElementObject*) node;
1772
1773 if (self->start_event_obj) {
1774 PyObject* res;
1775 PyObject* action = self->start_event_obj;
1776 res = PyTuple_New(2);
1777 if (res) {
1778 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
1779 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
1780 PyList_Append(self->events, res);
1781 Py_DECREF(res);
1782 } else
1783 PyErr_Clear(); /* FIXME: propagate error */
1784 }
1785
1786 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001787
1788 error:
1789 Py_DECREF(node);
1790 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001791}
1792
1793LOCAL(PyObject*)
1794treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
1795{
1796 if (!self->data) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001797 if (self->last == (ElementObject*) Py_None) {
1798 /* ignore calls to data before the first call to start */
1799 Py_RETURN_NONE;
1800 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001801 /* store the first item as is */
1802 Py_INCREF(data); self->data = data;
1803 } else {
1804 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00001805 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
1806 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001807 /* expat often generates single character data sections; handle
1808 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00001809 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
1810 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001811 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00001812 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001813 } else if (PyList_CheckExact(self->data)) {
1814 if (PyList_Append(self->data, data) < 0)
1815 return NULL;
1816 } else {
1817 PyObject* list = PyList_New(2);
1818 if (!list)
1819 return NULL;
1820 PyList_SET_ITEM(list, 0, self->data);
1821 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
1822 self->data = list;
1823 }
1824 }
1825
1826 Py_RETURN_NONE;
1827}
1828
1829LOCAL(PyObject*)
1830treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
1831{
1832 PyObject* item;
1833
1834 if (self->data) {
1835 if (self->this == self->last) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001836 Py_DECREF(JOIN_OBJ(self->last->text));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001837 self->last->text = JOIN_SET(
1838 self->data, PyList_CheckExact(self->data)
1839 );
1840 } else {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001841 Py_DECREF(JOIN_OBJ(self->last->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001842 self->last->tail = JOIN_SET(
1843 self->data, PyList_CheckExact(self->data)
1844 );
1845 }
1846 self->data = NULL;
1847 }
1848
1849 if (self->index == 0) {
1850 PyErr_SetString(
1851 PyExc_IndexError,
1852 "pop from empty stack"
1853 );
1854 return NULL;
1855 }
1856
1857 self->index--;
1858
1859 item = PyList_GET_ITEM(self->stack, self->index);
1860 Py_INCREF(item);
1861
1862 Py_DECREF(self->last);
1863
1864 self->last = (ElementObject*) self->this;
1865 self->this = (ElementObject*) item;
1866
1867 if (self->end_event_obj) {
1868 PyObject* res;
1869 PyObject* action = self->end_event_obj;
1870 PyObject* node = (PyObject*) self->last;
1871 res = PyTuple_New(2);
1872 if (res) {
1873 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
1874 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
1875 PyList_Append(self->events, res);
1876 Py_DECREF(res);
1877 } else
1878 PyErr_Clear(); /* FIXME: propagate error */
1879 }
1880
1881 Py_INCREF(self->last);
1882 return (PyObject*) self->last;
1883}
1884
1885LOCAL(void)
1886treebuilder_handle_namespace(TreeBuilderObject* self, int start,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001887 PyObject *prefix, PyObject *uri)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001888{
1889 PyObject* res;
1890 PyObject* action;
1891 PyObject* parcel;
1892
1893 if (!self->events)
1894 return;
1895
1896 if (start) {
1897 if (!self->start_ns_event_obj)
1898 return;
1899 action = self->start_ns_event_obj;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001900 parcel = Py_BuildValue("OO", prefix, uri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001901 if (!parcel)
1902 return;
1903 Py_INCREF(action);
1904 } else {
1905 if (!self->end_ns_event_obj)
1906 return;
1907 action = self->end_ns_event_obj;
1908 Py_INCREF(action);
1909 parcel = Py_None;
1910 Py_INCREF(parcel);
1911 }
1912
1913 res = PyTuple_New(2);
1914
1915 if (res) {
1916 PyTuple_SET_ITEM(res, 0, action);
1917 PyTuple_SET_ITEM(res, 1, parcel);
1918 PyList_Append(self->events, res);
1919 Py_DECREF(res);
1920 } else
1921 PyErr_Clear(); /* FIXME: propagate error */
1922}
1923
1924/* -------------------------------------------------------------------- */
1925/* methods (in alphabetical order) */
1926
1927static PyObject*
1928treebuilder_data(TreeBuilderObject* self, PyObject* args)
1929{
1930 PyObject* data;
1931 if (!PyArg_ParseTuple(args, "O:data", &data))
1932 return NULL;
1933
1934 return treebuilder_handle_data(self, data);
1935}
1936
1937static PyObject*
1938treebuilder_end(TreeBuilderObject* self, PyObject* args)
1939{
1940 PyObject* tag;
1941 if (!PyArg_ParseTuple(args, "O:end", &tag))
1942 return NULL;
1943
1944 return treebuilder_handle_end(self, tag);
1945}
1946
1947LOCAL(PyObject*)
1948treebuilder_done(TreeBuilderObject* self)
1949{
1950 PyObject* res;
1951
1952 /* FIXME: check stack size? */
1953
1954 if (self->root)
1955 res = self->root;
1956 else
1957 res = Py_None;
1958
1959 Py_INCREF(res);
1960 return res;
1961}
1962
1963static PyObject*
1964treebuilder_close(TreeBuilderObject* self, PyObject* args)
1965{
1966 if (!PyArg_ParseTuple(args, ":close"))
1967 return NULL;
1968
1969 return treebuilder_done(self);
1970}
1971
1972static PyObject*
1973treebuilder_start(TreeBuilderObject* self, PyObject* args)
1974{
1975 PyObject* tag;
1976 PyObject* attrib = Py_None;
1977 if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib))
1978 return NULL;
1979
1980 return treebuilder_handle_start(self, tag, attrib);
1981}
1982
1983static PyObject*
1984treebuilder_xml(TreeBuilderObject* self, PyObject* args)
1985{
1986 PyObject* encoding;
1987 PyObject* standalone;
1988 if (!PyArg_ParseTuple(args, "OO:xml", &encoding, &standalone))
1989 return NULL;
1990
1991 return treebuilder_handle_xml(self, encoding, standalone);
1992}
1993
1994static PyMethodDef treebuilder_methods[] = {
1995 {"data", (PyCFunction) treebuilder_data, METH_VARARGS},
1996 {"start", (PyCFunction) treebuilder_start, METH_VARARGS},
1997 {"end", (PyCFunction) treebuilder_end, METH_VARARGS},
1998 {"xml", (PyCFunction) treebuilder_xml, METH_VARARGS},
1999 {"close", (PyCFunction) treebuilder_close, METH_VARARGS},
2000 {NULL, NULL}
2001};
2002
Neal Norwitz227b5332006-03-22 09:28:35 +00002003static PyTypeObject TreeBuilder_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002004 PyVarObject_HEAD_INIT(NULL, 0)
2005 "TreeBuilder", sizeof(TreeBuilderObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002006 /* methods */
2007 (destructor)treebuilder_dealloc, /* tp_dealloc */
2008 0, /* tp_print */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002009 0, /* tp_getattr */
2010 0, /* tp_setattr */
Mark Dickinsone94c6792009-02-02 20:36:42 +00002011 0, /* tp_reserved */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002012 0, /* tp_repr */
2013 0, /* tp_as_number */
2014 0, /* tp_as_sequence */
2015 0, /* tp_as_mapping */
2016 0, /* tp_hash */
2017 0, /* tp_call */
2018 0, /* tp_str */
2019 0, /* tp_getattro */
2020 0, /* tp_setattro */
2021 0, /* tp_as_buffer */
2022 Py_TPFLAGS_DEFAULT, /* tp_flags */
2023 0, /* tp_doc */
2024 0, /* tp_traverse */
2025 0, /* tp_clear */
2026 0, /* tp_richcompare */
2027 0, /* tp_weaklistoffset */
2028 0, /* tp_iter */
2029 0, /* tp_iternext */
2030 treebuilder_methods, /* tp_methods */
2031 0, /* tp_members */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002032};
2033
2034/* ==================================================================== */
2035/* the expat interface */
2036
2037#if defined(USE_EXPAT)
2038
2039#include "expat.h"
2040
2041#if defined(USE_PYEXPAT_CAPI)
2042#include "pyexpat.h"
2043static struct PyExpat_CAPI* expat_capi;
2044#define EXPAT(func) (expat_capi->func)
2045#else
2046#define EXPAT(func) (XML_##func)
2047#endif
2048
2049typedef struct {
2050 PyObject_HEAD
2051
2052 XML_Parser parser;
2053
2054 PyObject* target;
2055 PyObject* entity;
2056
2057 PyObject* names;
2058
2059 PyObject* handle_xml;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002060
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002061 PyObject* handle_start;
2062 PyObject* handle_data;
2063 PyObject* handle_end;
2064
2065 PyObject* handle_comment;
2066 PyObject* handle_pi;
2067
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002068 PyObject* handle_close;
2069
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002070} XMLParserObject;
2071
Neal Norwitz227b5332006-03-22 09:28:35 +00002072static PyTypeObject XMLParser_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002073
2074/* helpers */
2075
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002076LOCAL(PyObject*)
2077makeuniversal(XMLParserObject* self, const char* string)
2078{
2079 /* convert a UTF-8 tag/attribute name from the expat parser
2080 to a universal name string */
2081
2082 int size = strlen(string);
2083 PyObject* key;
2084 PyObject* value;
2085
2086 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00002087 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002088 if (!key)
2089 return NULL;
2090
2091 value = PyDict_GetItem(self->names, key);
2092
2093 if (value) {
2094 Py_INCREF(value);
2095 } else {
2096 /* new name. convert to universal name, and decode as
2097 necessary */
2098
2099 PyObject* tag;
2100 char* p;
2101 int i;
2102
2103 /* look for namespace separator */
2104 for (i = 0; i < size; i++)
2105 if (string[i] == '}')
2106 break;
2107 if (i != size) {
2108 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002109 tag = PyBytes_FromStringAndSize(NULL, size+1);
2110 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002111 p[0] = '{';
2112 memcpy(p+1, string, size);
2113 size++;
2114 } else {
2115 /* plain name; use key as tag */
2116 Py_INCREF(key);
2117 tag = key;
2118 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002119
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002120 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002121 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00002122 value = PyUnicode_DecodeUTF8(p, size, "strict");
2123 Py_DECREF(tag);
2124 if (!value) {
2125 Py_DECREF(key);
2126 return NULL;
2127 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002128
2129 /* add to names dictionary */
2130 if (PyDict_SetItem(self->names, key, value) < 0) {
2131 Py_DECREF(key);
2132 Py_DECREF(value);
2133 return NULL;
2134 }
2135 }
2136
2137 Py_DECREF(key);
2138 return value;
2139}
2140
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002141static void
2142expat_set_error(const char* message, int line, int column)
2143{
Victor Stinner499dfcf2011-03-21 13:26:24 +01002144 PyObject *errmsg, *error, *position;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002145
Victor Stinner499dfcf2011-03-21 13:26:24 +01002146 errmsg = PyUnicode_FromFormat("%s: line %d, column %d",
2147 message, line, column);
2148 if (errmsg == NULL)
2149 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002150
Victor Stinner499dfcf2011-03-21 13:26:24 +01002151 error = PyObject_CallFunction(elementtree_parseerror_obj, "O", errmsg);
2152 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002153 if (!error)
2154 return;
2155
2156 /* add position attribute */
2157 position = Py_BuildValue("(ii)", line, column);
2158 if (!position) {
2159 Py_DECREF(error);
2160 return;
2161 }
2162 if (PyObject_SetAttrString(error, "position", position) == -1) {
2163 Py_DECREF(error);
2164 Py_DECREF(position);
2165 return;
2166 }
2167 Py_DECREF(position);
2168
2169 PyErr_SetObject(elementtree_parseerror_obj, error);
2170 Py_DECREF(error);
2171}
2172
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002173/* -------------------------------------------------------------------- */
2174/* handlers */
2175
2176static void
2177expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2178 int data_len)
2179{
2180 PyObject* key;
2181 PyObject* value;
2182 PyObject* res;
2183
2184 if (data_len < 2 || data_in[0] != '&')
2185 return;
2186
Neal Norwitz0269b912007-08-08 06:56:02 +00002187 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002188 if (!key)
2189 return;
2190
2191 value = PyDict_GetItem(self->entity, key);
2192
2193 if (value) {
2194 if (TreeBuilder_CheckExact(self->target))
2195 res = treebuilder_handle_data(
2196 (TreeBuilderObject*) self->target, value
2197 );
2198 else if (self->handle_data)
2199 res = PyObject_CallFunction(self->handle_data, "O", value);
2200 else
2201 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002202 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002203 } else if (!PyErr_Occurred()) {
2204 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00002205 char message[128] = "undefined entity ";
2206 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002207 expat_set_error(
2208 message,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002209 EXPAT(GetErrorLineNumber)(self->parser),
2210 EXPAT(GetErrorColumnNumber)(self->parser)
2211 );
2212 }
2213
2214 Py_DECREF(key);
2215}
2216
2217static void
2218expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2219 const XML_Char **attrib_in)
2220{
2221 PyObject* res;
2222 PyObject* tag;
2223 PyObject* attrib;
2224 int ok;
2225
2226 /* tag name */
2227 tag = makeuniversal(self, tag_in);
2228 if (!tag)
2229 return; /* parser will look for errors */
2230
2231 /* attributes */
2232 if (attrib_in[0]) {
2233 attrib = PyDict_New();
2234 if (!attrib)
2235 return;
2236 while (attrib_in[0] && attrib_in[1]) {
2237 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00002238 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002239 if (!key || !value) {
2240 Py_XDECREF(value);
2241 Py_XDECREF(key);
2242 Py_DECREF(attrib);
2243 return;
2244 }
2245 ok = PyDict_SetItem(attrib, key, value);
2246 Py_DECREF(value);
2247 Py_DECREF(key);
2248 if (ok < 0) {
2249 Py_DECREF(attrib);
2250 return;
2251 }
2252 attrib_in += 2;
2253 }
2254 } else {
2255 Py_INCREF(Py_None);
2256 attrib = Py_None;
2257 }
2258
2259 if (TreeBuilder_CheckExact(self->target))
2260 /* shortcut */
2261 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2262 tag, attrib);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002263 else if (self->handle_start) {
2264 if (attrib == Py_None) {
2265 Py_DECREF(attrib);
2266 attrib = PyDict_New();
2267 if (!attrib)
2268 return;
2269 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002270 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002271 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002272 res = NULL;
2273
2274 Py_DECREF(tag);
2275 Py_DECREF(attrib);
2276
2277 Py_XDECREF(res);
2278}
2279
2280static void
2281expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2282 int data_len)
2283{
2284 PyObject* data;
2285 PyObject* res;
2286
Neal Norwitz0269b912007-08-08 06:56:02 +00002287 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002288 if (!data)
2289 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002290
2291 if (TreeBuilder_CheckExact(self->target))
2292 /* shortcut */
2293 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
2294 else if (self->handle_data)
2295 res = PyObject_CallFunction(self->handle_data, "O", data);
2296 else
2297 res = NULL;
2298
2299 Py_DECREF(data);
2300
2301 Py_XDECREF(res);
2302}
2303
2304static void
2305expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
2306{
2307 PyObject* tag;
2308 PyObject* res = NULL;
2309
2310 if (TreeBuilder_CheckExact(self->target))
2311 /* shortcut */
2312 /* the standard tree builder doesn't look at the end tag */
2313 res = treebuilder_handle_end(
2314 (TreeBuilderObject*) self->target, Py_None
2315 );
2316 else if (self->handle_end) {
2317 tag = makeuniversal(self, tag_in);
2318 if (tag) {
2319 res = PyObject_CallFunction(self->handle_end, "O", tag);
2320 Py_DECREF(tag);
2321 }
2322 }
2323
2324 Py_XDECREF(res);
2325}
2326
2327static void
2328expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
2329 const XML_Char *uri)
2330{
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002331 PyObject* sprefix = NULL;
2332 PyObject* suri = NULL;
2333
2334 suri = PyUnicode_DecodeUTF8(uri, strlen(uri), "strict");
2335 if (!suri)
2336 return;
2337
2338 if (prefix)
2339 sprefix = PyUnicode_DecodeUTF8(prefix, strlen(prefix), "strict");
2340 else
2341 sprefix = PyUnicode_FromString("");
2342 if (!sprefix) {
2343 Py_DECREF(suri);
2344 return;
2345 }
2346
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002347 treebuilder_handle_namespace(
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002348 (TreeBuilderObject*) self->target, 1, sprefix, suri
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002349 );
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002350
2351 Py_DECREF(sprefix);
2352 Py_DECREF(suri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002353}
2354
2355static void
2356expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
2357{
2358 treebuilder_handle_namespace(
2359 (TreeBuilderObject*) self->target, 0, NULL, NULL
2360 );
2361}
2362
2363static void
2364expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
2365{
2366 PyObject* comment;
2367 PyObject* res;
2368
2369 if (self->handle_comment) {
Neal Norwitz0269b912007-08-08 06:56:02 +00002370 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002371 if (comment) {
2372 res = PyObject_CallFunction(self->handle_comment, "O", comment);
2373 Py_XDECREF(res);
2374 Py_DECREF(comment);
2375 }
2376 }
2377}
2378
2379static void
2380expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
2381 const XML_Char* data_in)
2382{
2383 PyObject* target;
2384 PyObject* data;
2385 PyObject* res;
2386
2387 if (self->handle_pi) {
Neal Norwitz0269b912007-08-08 06:56:02 +00002388 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
2389 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002390 if (target && data) {
2391 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
2392 Py_XDECREF(res);
2393 Py_DECREF(data);
2394 Py_DECREF(target);
2395 } else {
2396 Py_XDECREF(data);
2397 Py_XDECREF(target);
2398 }
2399 }
2400}
2401
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002402static int
2403expat_unknown_encoding_handler(XMLParserObject *self, const XML_Char *name,
2404 XML_Encoding *info)
2405{
2406 PyObject* u;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002407 unsigned char s[256];
2408 int i;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002409 void *data;
2410 unsigned int kind;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002411
2412 memset(info, 0, sizeof(XML_Encoding));
2413
2414 for (i = 0; i < 256; i++)
2415 s[i] = i;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002416
Fredrik Lundhc3389992005-12-25 11:40:19 +00002417 u = PyUnicode_Decode((char*) s, 256, name, "replace");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002418 if (!u)
2419 return XML_STATUS_ERROR;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002420 if (PyUnicode_READY(u))
2421 return XML_STATUS_ERROR;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002422
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002423 if (PyUnicode_GET_LENGTH(u) != 256) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002424 Py_DECREF(u);
2425 return XML_STATUS_ERROR;
2426 }
2427
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002428 kind = PyUnicode_KIND(u);
2429 data = PyUnicode_DATA(u);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002430 for (i = 0; i < 256; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002431 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
2432 if (ch != Py_UNICODE_REPLACEMENT_CHARACTER)
2433 info->map[i] = ch;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002434 else
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002435 info->map[i] = -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002436 }
2437
2438 Py_DECREF(u);
2439
2440 return XML_STATUS_OK;
2441}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002442
2443/* -------------------------------------------------------------------- */
2444/* constructor and destructor */
2445
2446static PyObject*
Thomas Wouters73e5a5b2006-06-08 15:35:45 +00002447xmlparser(PyObject* self_, PyObject* args, PyObject* kw)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002448{
2449 XMLParserObject* self;
2450 /* FIXME: does this need to be static? */
2451 static XML_Memory_Handling_Suite memory_handler;
2452
2453 PyObject* target = NULL;
2454 char* encoding = NULL;
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +00002455 static char* kwlist[] = { "target", "encoding", NULL };
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002456 if (!PyArg_ParseTupleAndKeywords(args, kw, "|Oz:XMLParser", kwlist,
2457 &target, &encoding))
2458 return NULL;
2459
2460#if defined(USE_PYEXPAT_CAPI)
2461 if (!expat_capi) {
2462 PyErr_SetString(
2463 PyExc_RuntimeError, "cannot load dispatch table from pyexpat"
2464 );
2465 return NULL;
2466 }
2467#endif
2468
2469 self = PyObject_New(XMLParserObject, &XMLParser_Type);
2470 if (self == NULL)
2471 return NULL;
2472
2473 self->entity = PyDict_New();
2474 if (!self->entity) {
2475 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002476 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002477 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002478
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002479 self->names = PyDict_New();
2480 if (!self->names) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002481 PyObject_Del(self->entity);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002482 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002483 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002484 }
2485
2486 memory_handler.malloc_fcn = PyObject_Malloc;
2487 memory_handler.realloc_fcn = PyObject_Realloc;
2488 memory_handler.free_fcn = PyObject_Free;
2489
2490 self->parser = EXPAT(ParserCreate_MM)(encoding, &memory_handler, "}");
2491 if (!self->parser) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002492 PyObject_Del(self->names);
2493 PyObject_Del(self->entity);
2494 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002495 PyErr_NoMemory();
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002496 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002497 }
2498
2499 /* setup target handlers */
2500 if (!target) {
2501 target = treebuilder_new();
2502 if (!target) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002503 EXPAT(ParserFree)(self->parser);
2504 PyObject_Del(self->names);
2505 PyObject_Del(self->entity);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002506 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002507 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002508 }
2509 } else
2510 Py_INCREF(target);
2511 self->target = target;
2512
2513 self->handle_xml = PyObject_GetAttrString(target, "xml");
2514 self->handle_start = PyObject_GetAttrString(target, "start");
2515 self->handle_data = PyObject_GetAttrString(target, "data");
2516 self->handle_end = PyObject_GetAttrString(target, "end");
2517 self->handle_comment = PyObject_GetAttrString(target, "comment");
2518 self->handle_pi = PyObject_GetAttrString(target, "pi");
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002519 self->handle_close = PyObject_GetAttrString(target, "close");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002520
2521 PyErr_Clear();
2522
2523 /* configure parser */
2524 EXPAT(SetUserData)(self->parser, self);
2525 EXPAT(SetElementHandler)(
2526 self->parser,
2527 (XML_StartElementHandler) expat_start_handler,
2528 (XML_EndElementHandler) expat_end_handler
2529 );
2530 EXPAT(SetDefaultHandlerExpand)(
2531 self->parser,
2532 (XML_DefaultHandler) expat_default_handler
2533 );
2534 EXPAT(SetCharacterDataHandler)(
2535 self->parser,
2536 (XML_CharacterDataHandler) expat_data_handler
2537 );
2538 if (self->handle_comment)
2539 EXPAT(SetCommentHandler)(
2540 self->parser,
2541 (XML_CommentHandler) expat_comment_handler
2542 );
2543 if (self->handle_pi)
2544 EXPAT(SetProcessingInstructionHandler)(
2545 self->parser,
2546 (XML_ProcessingInstructionHandler) expat_pi_handler
2547 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002548 EXPAT(SetUnknownEncodingHandler)(
2549 self->parser,
2550 (XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL
2551 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002552
2553 ALLOC(sizeof(XMLParserObject), "create expatparser");
2554
2555 return (PyObject*) self;
2556}
2557
2558static void
2559xmlparser_dealloc(XMLParserObject* self)
2560{
2561 EXPAT(ParserFree)(self->parser);
2562
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002563 Py_XDECREF(self->handle_close);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002564 Py_XDECREF(self->handle_pi);
2565 Py_XDECREF(self->handle_comment);
2566 Py_XDECREF(self->handle_end);
2567 Py_XDECREF(self->handle_data);
2568 Py_XDECREF(self->handle_start);
2569 Py_XDECREF(self->handle_xml);
2570
2571 Py_DECREF(self->target);
2572 Py_DECREF(self->entity);
2573 Py_DECREF(self->names);
2574
2575 RELEASE(sizeof(XMLParserObject), "destroy expatparser");
2576
2577 PyObject_Del(self);
2578}
2579
2580/* -------------------------------------------------------------------- */
2581/* methods (in alphabetical order) */
2582
2583LOCAL(PyObject*)
2584expat_parse(XMLParserObject* self, char* data, int data_len, int final)
2585{
2586 int ok;
2587
2588 ok = EXPAT(Parse)(self->parser, data, data_len, final);
2589
2590 if (PyErr_Occurred())
2591 return NULL;
2592
2593 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002594 expat_set_error(
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002595 EXPAT(ErrorString)(EXPAT(GetErrorCode)(self->parser)),
2596 EXPAT(GetErrorLineNumber)(self->parser),
2597 EXPAT(GetErrorColumnNumber)(self->parser)
2598 );
2599 return NULL;
2600 }
2601
2602 Py_RETURN_NONE;
2603}
2604
2605static PyObject*
2606xmlparser_close(XMLParserObject* self, PyObject* args)
2607{
2608 /* end feeding data to parser */
2609
2610 PyObject* res;
2611 if (!PyArg_ParseTuple(args, ":close"))
2612 return NULL;
2613
2614 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002615 if (!res)
2616 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002617
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002618 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002619 Py_DECREF(res);
2620 return treebuilder_done((TreeBuilderObject*) self->target);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002621 } if (self->handle_close) {
2622 Py_DECREF(res);
2623 return PyObject_CallFunction(self->handle_close, "");
2624 } else
2625 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002626}
2627
2628static PyObject*
2629xmlparser_feed(XMLParserObject* self, PyObject* args)
2630{
2631 /* feed data to parser */
2632
2633 char* data;
2634 int data_len;
2635 if (!PyArg_ParseTuple(args, "s#:feed", &data, &data_len))
2636 return NULL;
2637
2638 return expat_parse(self, data, data_len, 0);
2639}
2640
2641static PyObject*
2642xmlparser_parse(XMLParserObject* self, PyObject* args)
2643{
2644 /* (internal) parse until end of input stream */
2645
2646 PyObject* reader;
2647 PyObject* buffer;
2648 PyObject* res;
2649
2650 PyObject* fileobj;
2651 if (!PyArg_ParseTuple(args, "O:_parse", &fileobj))
2652 return NULL;
2653
2654 reader = PyObject_GetAttrString(fileobj, "read");
2655 if (!reader)
2656 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002657
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002658 /* read from open file object */
2659 for (;;) {
2660
2661 buffer = PyObject_CallFunction(reader, "i", 64*1024);
2662
2663 if (!buffer) {
2664 /* read failed (e.g. due to KeyboardInterrupt) */
2665 Py_DECREF(reader);
2666 return NULL;
2667 }
2668
Christian Heimes72b710a2008-05-26 13:28:38 +00002669 if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002670 Py_DECREF(buffer);
2671 break;
2672 }
2673
2674 res = expat_parse(
Christian Heimes72b710a2008-05-26 13:28:38 +00002675 self, PyBytes_AS_STRING(buffer), PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002676 );
2677
2678 Py_DECREF(buffer);
2679
2680 if (!res) {
2681 Py_DECREF(reader);
2682 return NULL;
2683 }
2684 Py_DECREF(res);
2685
2686 }
2687
2688 Py_DECREF(reader);
2689
2690 res = expat_parse(self, "", 0, 1);
2691
2692 if (res && TreeBuilder_CheckExact(self->target)) {
2693 Py_DECREF(res);
2694 return treebuilder_done((TreeBuilderObject*) self->target);
2695 }
2696
2697 return res;
2698}
2699
2700static PyObject*
2701xmlparser_setevents(XMLParserObject* self, PyObject* args)
2702{
2703 /* activate element event reporting */
2704
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002705 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002706 TreeBuilderObject* target;
2707
2708 PyObject* events; /* event collector */
2709 PyObject* event_set = Py_None;
2710 if (!PyArg_ParseTuple(args, "O!|O:_setevents", &PyList_Type, &events,
2711 &event_set))
2712 return NULL;
2713
2714 if (!TreeBuilder_CheckExact(self->target)) {
2715 PyErr_SetString(
2716 PyExc_TypeError,
2717 "event handling only supported for cElementTree.Treebuilder "
2718 "targets"
2719 );
2720 return NULL;
2721 }
2722
2723 target = (TreeBuilderObject*) self->target;
2724
2725 Py_INCREF(events);
2726 Py_XDECREF(target->events);
2727 target->events = events;
2728
2729 /* clear out existing events */
2730 Py_XDECREF(target->start_event_obj); target->start_event_obj = NULL;
2731 Py_XDECREF(target->end_event_obj); target->end_event_obj = NULL;
2732 Py_XDECREF(target->start_ns_event_obj); target->start_ns_event_obj = NULL;
2733 Py_XDECREF(target->end_ns_event_obj); target->end_ns_event_obj = NULL;
2734
2735 if (event_set == Py_None) {
2736 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002737 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002738 Py_RETURN_NONE;
2739 }
2740
2741 if (!PyTuple_Check(event_set)) /* FIXME: handle arbitrary sequences */
2742 goto error;
2743
2744 for (i = 0; i < PyTuple_GET_SIZE(event_set); i++) {
2745 PyObject* item = PyTuple_GET_ITEM(event_set, i);
2746 char* event;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002747 if (PyUnicode_Check(item)) {
2748 event = _PyUnicode_AsString(item);
Victor Stinner0477bf32010-03-22 12:11:44 +00002749 if (event == NULL)
2750 goto error;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002751 } else if (PyBytes_Check(item))
2752 event = PyBytes_AS_STRING(item);
2753 else {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002754 goto error;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002755 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002756 if (strcmp(event, "start") == 0) {
2757 Py_INCREF(item);
2758 target->start_event_obj = item;
2759 } else if (strcmp(event, "end") == 0) {
2760 Py_INCREF(item);
2761 Py_XDECREF(target->end_event_obj);
2762 target->end_event_obj = item;
2763 } else if (strcmp(event, "start-ns") == 0) {
2764 Py_INCREF(item);
2765 Py_XDECREF(target->start_ns_event_obj);
2766 target->start_ns_event_obj = item;
2767 EXPAT(SetNamespaceDeclHandler)(
2768 self->parser,
2769 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2770 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2771 );
2772 } else if (strcmp(event, "end-ns") == 0) {
2773 Py_INCREF(item);
2774 Py_XDECREF(target->end_ns_event_obj);
2775 target->end_ns_event_obj = item;
2776 EXPAT(SetNamespaceDeclHandler)(
2777 self->parser,
2778 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2779 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2780 );
2781 } else {
2782 PyErr_Format(
2783 PyExc_ValueError,
2784 "unknown event '%s'", event
2785 );
2786 return NULL;
2787 }
2788 }
2789
2790 Py_RETURN_NONE;
2791
2792 error:
2793 PyErr_SetString(
2794 PyExc_TypeError,
2795 "invalid event tuple"
2796 );
2797 return NULL;
2798}
2799
2800static PyMethodDef xmlparser_methods[] = {
2801 {"feed", (PyCFunction) xmlparser_feed, METH_VARARGS},
2802 {"close", (PyCFunction) xmlparser_close, METH_VARARGS},
2803 {"_parse", (PyCFunction) xmlparser_parse, METH_VARARGS},
2804 {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
2805 {NULL, NULL}
2806};
2807
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002808static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002809xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002810{
Alexander Belopolskye239d232010-12-08 23:31:48 +00002811 if (PyUnicode_Check(nameobj)) {
2812 PyObject* res;
2813 if (PyUnicode_CompareWithASCIIString(nameobj, "entity") == 0)
2814 res = self->entity;
2815 else if (PyUnicode_CompareWithASCIIString(nameobj, "target") == 0)
2816 res = self->target;
2817 else if (PyUnicode_CompareWithASCIIString(nameobj, "version") == 0) {
2818 return PyUnicode_FromFormat(
2819 "Expat %d.%d.%d", XML_MAJOR_VERSION,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002820 XML_MINOR_VERSION, XML_MICRO_VERSION);
Alexander Belopolskye239d232010-12-08 23:31:48 +00002821 }
2822 else
2823 goto generic;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002824
Alexander Belopolskye239d232010-12-08 23:31:48 +00002825 Py_INCREF(res);
2826 return res;
2827 }
2828 generic:
2829 return PyObject_GenericGetAttr((PyObject*) self, nameobj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002830}
2831
Neal Norwitz227b5332006-03-22 09:28:35 +00002832static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002833 PyVarObject_HEAD_INIT(NULL, 0)
2834 "XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002835 /* methods */
2836 (destructor)xmlparser_dealloc, /* tp_dealloc */
2837 0, /* tp_print */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002838 0, /* tp_getattr */
2839 0, /* tp_setattr */
Mark Dickinsone94c6792009-02-02 20:36:42 +00002840 0, /* tp_reserved */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002841 0, /* tp_repr */
2842 0, /* tp_as_number */
2843 0, /* tp_as_sequence */
2844 0, /* tp_as_mapping */
2845 0, /* tp_hash */
2846 0, /* tp_call */
2847 0, /* tp_str */
2848 (getattrofunc)xmlparser_getattro, /* tp_getattro */
2849 0, /* tp_setattro */
2850 0, /* tp_as_buffer */
2851 Py_TPFLAGS_DEFAULT, /* tp_flags */
2852 0, /* tp_doc */
2853 0, /* tp_traverse */
2854 0, /* tp_clear */
2855 0, /* tp_richcompare */
2856 0, /* tp_weaklistoffset */
2857 0, /* tp_iter */
2858 0, /* tp_iternext */
2859 xmlparser_methods, /* tp_methods */
2860 0, /* tp_members */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002861};
2862
2863#endif
2864
2865/* ==================================================================== */
2866/* python module interface */
2867
2868static PyMethodDef _functions[] = {
2869 {"Element", (PyCFunction) element, METH_VARARGS|METH_KEYWORDS},
2870 {"SubElement", (PyCFunction) subelement, METH_VARARGS|METH_KEYWORDS},
2871 {"TreeBuilder", (PyCFunction) treebuilder, METH_VARARGS},
2872#if defined(USE_EXPAT)
2873 {"XMLParser", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
2874 {"XMLTreeBuilder", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
2875#endif
2876 {NULL, NULL}
2877};
2878
Martin v. Löwis1a214512008-06-11 05:26:20 +00002879
2880static struct PyModuleDef _elementtreemodule = {
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00002881 PyModuleDef_HEAD_INIT,
2882 "_elementtree",
2883 NULL,
2884 -1,
2885 _functions,
2886 NULL,
2887 NULL,
2888 NULL,
2889 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00002890};
2891
Neal Norwitzf6657e62006-12-28 04:47:50 +00002892PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00002893PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002894{
2895 PyObject* m;
2896 PyObject* g;
2897 char* bootstrap;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002898
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002899 /* Initialize object types */
2900 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00002901 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002902 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00002903 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002904#if defined(USE_EXPAT)
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002905 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00002906 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002907#endif
2908
Martin v. Löwis1a214512008-06-11 05:26:20 +00002909 m = PyModule_Create(&_elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002910 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00002911 return NULL;
2912
2913 /* The code below requires that the module gets already added
2914 to sys.modules. */
2915 PyDict_SetItemString(PyImport_GetModuleDict(),
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00002916 _elementtreemodule.m_name,
2917 m);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002918
2919 /* python glue code */
2920
2921 g = PyDict_New();
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002922 if (!g)
Martin v. Löwis1a214512008-06-11 05:26:20 +00002923 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002924
2925 PyDict_SetItemString(g, "__builtins__", PyEval_GetBuiltins());
2926
2927 bootstrap = (
2928
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002929 "from copy import copy, deepcopy\n"
2930
2931 "try:\n"
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002932 " from xml.etree import ElementTree\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002933 "except ImportError:\n"
2934 " import ElementTree\n"
2935 "ET = ElementTree\n"
2936 "del ElementTree\n"
2937
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002938 "import _elementtree as cElementTree\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002939
2940 "try:\n" /* check if copy works as is */
2941 " copy(cElementTree.Element('x'))\n"
2942 "except:\n"
2943 " def copyelement(elem):\n"
2944 " return elem\n"
2945
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002946 "class CommentProxy:\n"
2947 " def __call__(self, text=None):\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002948 " element = cElementTree.Element(ET.Comment)\n"
2949 " element.text = text\n"
2950 " return element\n"
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002951 " def __eq__(self, other):\n"
2952 " return ET.Comment == other\n"
2953 "cElementTree.Comment = CommentProxy()\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002954
2955 "class ElementTree(ET.ElementTree):\n" /* public */
2956 " def parse(self, source, parser=None):\n"
Antoine Pitroue033e062010-10-29 10:38:18 +00002957 " close_source = False\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002958 " if not hasattr(source, 'read'):\n"
2959 " source = open(source, 'rb')\n"
Antoine Pitroue033e062010-10-29 10:38:18 +00002960 " close_source = True\n"
2961 " try:\n"
2962 " if parser is not None:\n"
2963 " while 1:\n"
2964 " data = source.read(65536)\n"
2965 " if not data:\n"
2966 " break\n"
2967 " parser.feed(data)\n"
2968 " self._root = parser.close()\n"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002969 " else:\n"
Antoine Pitroue033e062010-10-29 10:38:18 +00002970 " parser = cElementTree.XMLParser()\n"
2971 " self._root = parser._parse(source)\n"
2972 " return self._root\n"
2973 " finally:\n"
2974 " if close_source:\n"
2975 " source.close()\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002976 "cElementTree.ElementTree = ElementTree\n"
2977
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002978 "def iter(node, tag=None):\n" /* helper */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002979 " if tag == '*':\n"
2980 " tag = None\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002981 " if tag is None or node.tag == tag:\n"
2982 " yield node\n"
2983 " for node in node:\n"
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002984 " for node in iter(node, tag):\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002985 " yield node\n"
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002986
2987 "def itertext(node):\n" /* helper */
2988 " if node.text:\n"
2989 " yield node.text\n"
2990 " for e in node:\n"
2991 " for s in e.itertext():\n"
2992 " yield s\n"
2993 " if e.tail:\n"
2994 " yield e.tail\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002995
2996 "def parse(source, parser=None):\n" /* public */
2997 " tree = ElementTree()\n"
2998 " tree.parse(source, parser)\n"
2999 " return tree\n"
3000 "cElementTree.parse = parse\n"
3001
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003002 "class iterparse:\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003003 " root = None\n"
3004 " def __init__(self, file, events=None):\n"
Antoine Pitroue033e062010-10-29 10:38:18 +00003005 " self._close_file = False\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003006 " if not hasattr(file, 'read'):\n"
3007 " file = open(file, 'rb')\n"
Antoine Pitroue033e062010-10-29 10:38:18 +00003008 " self._close_file = True\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003009 " self._file = file\n"
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003010 " self._events = []\n"
3011 " self._index = 0\n"
3012 " self.root = self._root = None\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003013 " b = cElementTree.TreeBuilder()\n"
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003014 " self._parser = cElementTree.XMLParser(b)\n"
3015 " self._parser._setevents(self._events, events)\n"
3016 " def __next__(self):\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003017 " while 1:\n"
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003018 " try:\n"
3019 " item = self._events[self._index]\n"
3020 " except IndexError:\n"
3021 " if self._parser is None:\n"
3022 " self.root = self._root\n"
Antoine Pitroue033e062010-10-29 10:38:18 +00003023 " if self._close_file:\n"
3024 " self._file.close()\n"
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003025 " raise StopIteration\n"
3026 " # load event buffer\n"
3027 " del self._events[:]\n"
3028 " self._index = 0\n"
3029 " data = self._file.read(16384)\n"
3030 " if data:\n"
3031 " self._parser.feed(data)\n"
3032 " else:\n"
3033 " self._root = self._parser.close()\n"
3034 " self._parser = None\n"
3035 " else:\n"
3036 " self._index = self._index + 1\n"
3037 " return item\n"
3038 " def __iter__(self):\n"
3039 " return self\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003040 "cElementTree.iterparse = iterparse\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003041
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003042 "class PIProxy:\n"
3043 " def __call__(self, target, text=None):\n"
3044 " element = cElementTree.Element(ET.PI)\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003045 " element.text = target\n"
3046 " if text:\n"
3047 " element.text = element.text + ' ' + text\n"
3048 " return element\n"
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003049 " def __eq__(self, other):\n"
3050 " return ET.PI == other\n"
3051 "cElementTree.PI = cElementTree.ProcessingInstruction = PIProxy()\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003052
3053 "def XML(text):\n" /* public */
3054 " parser = cElementTree.XMLParser()\n"
3055 " parser.feed(text)\n"
3056 " return parser.close()\n"
3057 "cElementTree.XML = cElementTree.fromstring = XML\n"
3058
3059 "def XMLID(text):\n" /* public */
3060 " tree = XML(text)\n"
3061 " ids = {}\n"
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003062 " for elem in tree.iter():\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003063 " id = elem.get('id')\n"
3064 " if id:\n"
3065 " ids[id] = elem\n"
3066 " return tree, ids\n"
3067 "cElementTree.XMLID = XMLID\n"
3068
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003069 "try:\n"
3070 " register_namespace = ET.register_namespace\n"
3071 "except AttributeError:\n"
3072 " def register_namespace(prefix, uri):\n"
3073 " ET._namespace_map[uri] = prefix\n"
3074 "cElementTree.register_namespace = register_namespace\n"
3075
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003076 "cElementTree.dump = ET.dump\n"
3077 "cElementTree.ElementPath = ElementPath = ET.ElementPath\n"
3078 "cElementTree.iselement = ET.iselement\n"
3079 "cElementTree.QName = ET.QName\n"
3080 "cElementTree.tostring = ET.tostring\n"
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003081 "cElementTree.fromstringlist = ET.fromstringlist\n"
3082 "cElementTree.tostringlist = ET.tostringlist\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003083 "cElementTree.VERSION = '" VERSION "'\n"
3084 "cElementTree.__version__ = '" VERSION "'\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003085
3086 );
3087
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003088 if (!PyRun_String(bootstrap, Py_file_input, g, NULL))
3089 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003090
3091 elementpath_obj = PyDict_GetItemString(g, "ElementPath");
3092
3093 elementtree_copyelement_obj = PyDict_GetItemString(g, "copyelement");
3094 if (elementtree_copyelement_obj) {
3095 /* reduce hack needed; enable reduce method */
3096 PyMethodDef* mp;
3097 for (mp = element_methods; mp->ml_name; mp++)
3098 if (mp->ml_meth == (PyCFunction) element_reduce) {
3099 mp->ml_name = "__reduce__";
3100 break;
3101 }
3102 } else
3103 PyErr_Clear();
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003104
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003105 elementtree_deepcopy_obj = PyDict_GetItemString(g, "deepcopy");
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003106 elementtree_iter_obj = PyDict_GetItemString(g, "iter");
3107 elementtree_itertext_obj = PyDict_GetItemString(g, "itertext");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003108
3109#if defined(USE_PYEXPAT_CAPI)
3110 /* link against pyexpat, if possible */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003111 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
3112 if (expat_capi) {
3113 /* check that it's usable */
3114 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
3115 expat_capi->size < sizeof(struct PyExpat_CAPI) ||
3116 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
3117 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
3118 expat_capi->MICRO_VERSION != XML_MICRO_VERSION)
3119 expat_capi = NULL;
3120 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003121#endif
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003122
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003123 elementtree_parseerror_obj = PyErr_NewException(
3124 "cElementTree.ParseError", PyExc_SyntaxError, NULL
3125 );
3126 Py_INCREF(elementtree_parseerror_obj);
3127 PyModule_AddObject(m, "ParseError", elementtree_parseerror_obj);
3128
3129 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003130}