blob: 837164cd4b4a551e5cc589d2c394e18958d7ed22 [file] [log] [blame]
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001/*
2 * ElementTree
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003 * $Id: _elementtree.c 3473 2009-01-11 22:53:55Z fredrik $
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
5 * elementtree accelerator
6 *
7 * History:
8 * 1999-06-20 fl created (as part of sgmlop)
9 * 2001-05-29 fl effdom edition
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000010 * 2003-02-27 fl elementtree edition (alpha)
11 * 2004-06-03 fl updates for elementtree 1.2
Florent Xicluna3e8c1892010-03-11 14:36:19 +000012 * 2005-01-05 fl major optimization effort
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000013 * 2005-01-11 fl first public release (cElementTree 0.8)
14 * 2005-01-12 fl split element object into base and extras
15 * 2005-01-13 fl use tagged pointers for tail/text (cElementTree 0.9)
16 * 2005-01-17 fl added treebuilder close method
17 * 2005-01-17 fl fixed crash in getchildren
18 * 2005-01-18 fl removed observer api, added iterparse (cElementTree 0.9.3)
19 * 2005-01-23 fl revised iterparse api; added namespace event support (0.9.8)
20 * 2005-01-26 fl added VERSION module property (cElementTree 1.0)
21 * 2005-01-28 fl added remove method (1.0.1)
22 * 2005-03-01 fl added iselement function; fixed makeelement aliasing (1.0.2)
23 * 2005-03-13 fl export Comment and ProcessingInstruction/PI helpers
24 * 2005-03-26 fl added Comment and PI support to XMLParser
25 * 2005-03-27 fl event optimizations; complain about bogus events
26 * 2005-08-08 fl fixed read error handling in parse
27 * 2005-08-11 fl added runtime test for copy workaround (1.0.3)
28 * 2005-12-13 fl added expat_capi support (for xml.etree) (1.0.4)
29 * 2005-12-16 fl added support for non-standard encodings
Fredrik Lundh44ed4db2006-03-12 21:06:35 +000030 * 2006-03-08 fl fixed a couple of potential null-refs and leaks
31 * 2006-03-12 fl merge in 2.5 ssize_t changes
Florent Xicluna3e8c1892010-03-11 14:36:19 +000032 * 2007-08-25 fl call custom builder's close method from XMLParser
33 * 2007-08-31 fl added iter, extend from ET 1.3
34 * 2007-09-01 fl fixed ParseError exception, setslice source type, etc
35 * 2007-09-03 fl fixed handling of negative insert indexes
36 * 2007-09-04 fl added itertext from ET 1.3
37 * 2007-09-06 fl added position attribute to ParseError exception
38 * 2008-06-06 fl delay error reporting in iterparse (from Hrvoje Niksic)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000039 *
Florent Xicluna3e8c1892010-03-11 14:36:19 +000040 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
41 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000042 *
43 * info@pythonware.com
44 * http://www.pythonware.com
45 */
46
Fredrik Lundh6d52b552005-12-16 22:06:43 +000047/* Licensed to PSF under a Contributor Agreement. */
Florent Xicluna3e8c1892010-03-11 14:36:19 +000048/* See http://www.python.org/psf/license for licensing details. */
Fredrik Lundh6d52b552005-12-16 22:06:43 +000049
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000050#include "Python.h"
51
Fredrik Lundhdc075b92006-08-16 16:47:07 +000052#define VERSION "1.0.6"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000053
54/* -------------------------------------------------------------------- */
55/* configuration */
56
57/* Leave defined to include the expat-based XMLParser type */
58#define USE_EXPAT
59
Florent Xicluna3e8c1892010-03-11 14:36:19 +000060/* Define to do all expat calls via pyexpat's embedded expat library */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000061/* #define USE_PYEXPAT_CAPI */
62
63/* An element can hold this many children without extra memory
64 allocations. */
65#define STATIC_CHILDREN 4
66
67/* For best performance, chose a value so that 80-90% of all nodes
68 have no more than the given number of children. Set this to zero
69 to minimize the size of the element structure itself (this only
70 helps if you have lots of leaf nodes with attributes). */
71
72/* Also note that pymalloc always allocates blocks in multiples of
73 eight bytes. For the current version of cElementTree, this means
74 that the number of children should be an even number, at least on
75 32-bit platforms. */
76
77/* -------------------------------------------------------------------- */
78
79#if 0
80static int memory = 0;
81#define ALLOC(size, comment)\
82do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
83#define RELEASE(size, comment)\
84do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
85#else
86#define ALLOC(size, comment)
87#define RELEASE(size, comment)
88#endif
89
90/* compiler tweaks */
91#if defined(_MSC_VER)
92#define LOCAL(type) static __inline type __fastcall
93#else
94#define LOCAL(type) static type
95#endif
96
97/* compatibility macros */
Florent Xicluna3e8c1892010-03-11 14:36:19 +000098#if (PY_VERSION_HEX < 0x02060000)
99#define Py_REFCNT(ob) (((PyObject*)(ob))->ob_refcnt)
100#define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)
101#endif
102
Martin v. Löwis18e16552006-02-15 17:27:45 +0000103#if (PY_VERSION_HEX < 0x02050000)
104typedef int Py_ssize_t;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000105#define lenfunc inquiry
Martin v. Löwis18e16552006-02-15 17:27:45 +0000106#endif
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000107
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000108#if (PY_VERSION_HEX < 0x02040000)
109#define PyDict_CheckExact PyDict_Check
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000110
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000111#if !defined(Py_RETURN_NONE)
112#define Py_RETURN_NONE return Py_INCREF(Py_None), Py_None
113#endif
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000114#endif
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000115
116/* macros used to store 'join' flags in string object pointers. note
117 that all use of text and tail as object pointers must be wrapped in
118 JOIN_OBJ. see comments in the ElementObject definition for more
119 info. */
120#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
121#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
122#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~1))
123
124/* glue functions (see the init function for details) */
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000125static PyObject* elementtree_parseerror_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000126static PyObject* elementtree_copyelement_obj;
127static PyObject* elementtree_deepcopy_obj;
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000128static PyObject* elementtree_iter_obj;
129static PyObject* elementtree_itertext_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000130static PyObject* elementpath_obj;
131
132/* helpers */
133
134LOCAL(PyObject*)
135deepcopy(PyObject* object, PyObject* memo)
136{
137 /* do a deep copy of the given object */
138
139 PyObject* args;
140 PyObject* result;
141
142 if (!elementtree_deepcopy_obj) {
143 PyErr_SetString(
144 PyExc_RuntimeError,
145 "deepcopy helper not found"
146 );
147 return NULL;
148 }
149
150 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000151 if (!args)
152 return NULL;
153
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000154 Py_INCREF(object); PyTuple_SET_ITEM(args, 0, (PyObject*) object);
155 Py_INCREF(memo); PyTuple_SET_ITEM(args, 1, (PyObject*) memo);
156
157 result = PyObject_CallObject(elementtree_deepcopy_obj, args);
158
159 Py_DECREF(args);
160
161 return result;
162}
163
164LOCAL(PyObject*)
165list_join(PyObject* list)
166{
167 /* join list elements (destroying the list in the process) */
168
169 PyObject* joiner;
170 PyObject* function;
171 PyObject* args;
172 PyObject* result;
173
174 switch (PyList_GET_SIZE(list)) {
175 case 0:
176 Py_DECREF(list);
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000177 return PyString_FromString("");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000178 case 1:
179 result = PyList_GET_ITEM(list, 0);
180 Py_INCREF(result);
181 Py_DECREF(list);
182 return result;
183 }
184
185 /* two or more elements: slice out a suitable separator from the
186 first member, and use that to join the entire list */
187
188 joiner = PySequence_GetSlice(PyList_GET_ITEM(list, 0), 0, 0);
189 if (!joiner)
190 return NULL;
191
192 function = PyObject_GetAttrString(joiner, "join");
193 if (!function) {
194 Py_DECREF(joiner);
195 return NULL;
196 }
197
198 args = PyTuple_New(1);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000199 if (!args)
200 return NULL;
201
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000202 PyTuple_SET_ITEM(args, 0, list);
203
204 result = PyObject_CallObject(function, args);
205
206 Py_DECREF(args); /* also removes list */
207 Py_DECREF(function);
208 Py_DECREF(joiner);
209
210 return result;
211}
212
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000213/* -------------------------------------------------------------------- */
214/* the element type */
215
216typedef struct {
217
218 /* attributes (a dictionary object), or None if no attributes */
219 PyObject* attrib;
220
221 /* child elements */
222 int length; /* actual number of items */
223 int allocated; /* allocated items */
224
225 /* this either points to _children or to a malloced buffer */
226 PyObject* *children;
227
228 PyObject* _children[STATIC_CHILDREN];
229
230} ElementObjectExtra;
231
232typedef struct {
233 PyObject_HEAD
234
235 /* element tag (a string). */
236 PyObject* tag;
237
238 /* text before first child. note that this is a tagged pointer;
239 use JOIN_OBJ to get the object pointer. the join flag is used
240 to distinguish lists created by the tree builder from lists
241 assigned to the attribute by application code; the former
242 should be joined before being returned to the user, the latter
243 should be left intact. */
244 PyObject* text;
245
246 /* text after this element, in parent. note that this is a tagged
247 pointer; use JOIN_OBJ to get the object pointer. */
248 PyObject* tail;
249
250 ElementObjectExtra* extra;
251
252} ElementObject;
253
254staticforward PyTypeObject Element_Type;
255
Christian Heimese93237d2007-12-19 02:37:44 +0000256#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000257
258/* -------------------------------------------------------------------- */
259/* element constructor and destructor */
260
261LOCAL(int)
262element_new_extra(ElementObject* self, PyObject* attrib)
263{
264 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
265 if (!self->extra)
266 return -1;
267
268 if (!attrib)
269 attrib = Py_None;
270
271 Py_INCREF(attrib);
272 self->extra->attrib = attrib;
273
274 self->extra->length = 0;
275 self->extra->allocated = STATIC_CHILDREN;
276 self->extra->children = self->extra->_children;
277
278 return 0;
279}
280
281LOCAL(void)
282element_dealloc_extra(ElementObject* self)
283{
284 int i;
285
286 Py_DECREF(self->extra->attrib);
287
288 for (i = 0; i < self->extra->length; i++)
289 Py_DECREF(self->extra->children[i]);
290
291 if (self->extra->children != self->extra->_children)
292 PyObject_Free(self->extra->children);
293
294 PyObject_Free(self->extra);
295}
296
297LOCAL(PyObject*)
298element_new(PyObject* tag, PyObject* attrib)
299{
300 ElementObject* self;
301
302 self = PyObject_New(ElementObject, &Element_Type);
303 if (self == NULL)
304 return NULL;
305
306 /* use None for empty dictionaries */
307 if (PyDict_CheckExact(attrib) && !PyDict_Size(attrib))
308 attrib = Py_None;
309
310 self->extra = NULL;
311
312 if (attrib != Py_None) {
313
Neal Norwitzc6a989a2006-05-10 06:57:58 +0000314 if (element_new_extra(self, attrib) < 0) {
315 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000316 return NULL;
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000317 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000318
319 self->extra->length = 0;
320 self->extra->allocated = STATIC_CHILDREN;
321 self->extra->children = self->extra->_children;
322
323 }
324
325 Py_INCREF(tag);
326 self->tag = tag;
327
328 Py_INCREF(Py_None);
329 self->text = Py_None;
330
331 Py_INCREF(Py_None);
332 self->tail = Py_None;
333
334 ALLOC(sizeof(ElementObject), "create element");
335
336 return (PyObject*) self;
337}
338
339LOCAL(int)
340element_resize(ElementObject* self, int extra)
341{
342 int size;
343 PyObject* *children;
344
345 /* make sure self->children can hold the given number of extra
346 elements. set an exception and return -1 if allocation failed */
347
348 if (!self->extra)
349 element_new_extra(self, NULL);
350
351 size = self->extra->length + extra;
352
353 if (size > self->extra->allocated) {
354 /* use Python 2.4's list growth strategy */
355 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes87dcf3d2008-01-18 08:04:57 +0000356 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
357 * which needs at least 4 bytes.
358 * Although it's a false alarm always assume at least one child to
359 * be safe.
360 */
361 size = size ? size : 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000362 if (self->extra->children != self->extra->_children) {
Christian Heimes87dcf3d2008-01-18 08:04:57 +0000363 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
364 * "children", which needs at least 4 bytes. Although it's a
365 * false alarm always assume at least one child to be safe.
366 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000367 children = PyObject_Realloc(self->extra->children,
368 size * sizeof(PyObject*));
369 if (!children)
370 goto nomemory;
371 } else {
372 children = PyObject_Malloc(size * sizeof(PyObject*));
373 if (!children)
374 goto nomemory;
375 /* copy existing children from static area to malloc buffer */
376 memcpy(children, self->extra->children,
377 self->extra->length * sizeof(PyObject*));
378 }
379 self->extra->children = children;
380 self->extra->allocated = size;
381 }
382
383 return 0;
384
385 nomemory:
386 PyErr_NoMemory();
387 return -1;
388}
389
390LOCAL(int)
391element_add_subelement(ElementObject* self, PyObject* element)
392{
393 /* add a child element to a parent */
394
395 if (element_resize(self, 1) < 0)
396 return -1;
397
398 Py_INCREF(element);
399 self->extra->children[self->extra->length] = element;
400
401 self->extra->length++;
402
403 return 0;
404}
405
406LOCAL(PyObject*)
407element_get_attrib(ElementObject* self)
408{
409 /* return borrowed reference to attrib dictionary */
410 /* note: this function assumes that the extra section exists */
411
412 PyObject* res = self->extra->attrib;
413
414 if (res == Py_None) {
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000415 Py_DECREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000416 /* create missing dictionary */
417 res = PyDict_New();
418 if (!res)
419 return NULL;
420 self->extra->attrib = res;
421 }
422
423 return res;
424}
425
426LOCAL(PyObject*)
427element_get_text(ElementObject* self)
428{
429 /* return borrowed reference to text attribute */
430
431 PyObject* res = self->text;
432
433 if (JOIN_GET(res)) {
434 res = JOIN_OBJ(res);
435 if (PyList_CheckExact(res)) {
436 res = list_join(res);
437 if (!res)
438 return NULL;
439 self->text = res;
440 }
441 }
442
443 return res;
444}
445
446LOCAL(PyObject*)
447element_get_tail(ElementObject* self)
448{
449 /* return borrowed reference to text attribute */
450
451 PyObject* res = self->tail;
452
453 if (JOIN_GET(res)) {
454 res = JOIN_OBJ(res);
455 if (PyList_CheckExact(res)) {
456 res = list_join(res);
457 if (!res)
458 return NULL;
459 self->tail = res;
460 }
461 }
462
463 return res;
464}
465
466static PyObject*
467element(PyObject* self, PyObject* args, PyObject* kw)
468{
469 PyObject* elem;
470
471 PyObject* tag;
472 PyObject* attrib = NULL;
473 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag,
474 &PyDict_Type, &attrib))
475 return NULL;
476
477 if (attrib || kw) {
478 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
479 if (!attrib)
480 return NULL;
481 if (kw)
482 PyDict_Update(attrib, kw);
483 } else {
484 Py_INCREF(Py_None);
485 attrib = Py_None;
486 }
487
488 elem = element_new(tag, attrib);
489
490 Py_DECREF(attrib);
491
492 return elem;
493}
494
495static PyObject*
496subelement(PyObject* self, PyObject* args, PyObject* kw)
497{
498 PyObject* elem;
499
500 ElementObject* parent;
501 PyObject* tag;
502 PyObject* attrib = NULL;
503 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
504 &Element_Type, &parent, &tag,
505 &PyDict_Type, &attrib))
506 return NULL;
507
508 if (attrib || kw) {
509 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
510 if (!attrib)
511 return NULL;
512 if (kw)
513 PyDict_Update(attrib, kw);
514 } else {
515 Py_INCREF(Py_None);
516 attrib = Py_None;
517 }
518
519 elem = element_new(tag, attrib);
520
521 Py_DECREF(attrib);
522
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000523 if (element_add_subelement(parent, elem) < 0) {
524 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000525 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000526 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000527
528 return elem;
529}
530
531static void
532element_dealloc(ElementObject* self)
533{
534 if (self->extra)
535 element_dealloc_extra(self);
536
537 /* discard attributes */
538 Py_DECREF(self->tag);
539 Py_DECREF(JOIN_OBJ(self->text));
540 Py_DECREF(JOIN_OBJ(self->tail));
541
542 RELEASE(sizeof(ElementObject), "destroy element");
543
544 PyObject_Del(self);
545}
546
547/* -------------------------------------------------------------------- */
548/* methods (in alphabetical order) */
549
550static PyObject*
551element_append(ElementObject* self, PyObject* args)
552{
553 PyObject* element;
554 if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element))
555 return NULL;
556
557 if (element_add_subelement(self, element) < 0)
558 return NULL;
559
560 Py_RETURN_NONE;
561}
562
563static PyObject*
564element_clear(ElementObject* self, PyObject* args)
565{
566 if (!PyArg_ParseTuple(args, ":clear"))
567 return NULL;
568
569 if (self->extra) {
570 element_dealloc_extra(self);
571 self->extra = NULL;
572 }
573
574 Py_INCREF(Py_None);
575 Py_DECREF(JOIN_OBJ(self->text));
576 self->text = Py_None;
577
578 Py_INCREF(Py_None);
579 Py_DECREF(JOIN_OBJ(self->tail));
580 self->tail = Py_None;
581
582 Py_RETURN_NONE;
583}
584
585static PyObject*
586element_copy(ElementObject* self, PyObject* args)
587{
588 int i;
589 ElementObject* element;
590
591 if (!PyArg_ParseTuple(args, ":__copy__"))
592 return NULL;
593
594 element = (ElementObject*) element_new(
595 self->tag, (self->extra) ? self->extra->attrib : Py_None
596 );
597 if (!element)
598 return NULL;
599
600 Py_DECREF(JOIN_OBJ(element->text));
601 element->text = self->text;
602 Py_INCREF(JOIN_OBJ(element->text));
603
604 Py_DECREF(JOIN_OBJ(element->tail));
605 element->tail = self->tail;
606 Py_INCREF(JOIN_OBJ(element->tail));
607
608 if (self->extra) {
609
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000610 if (element_resize(element, self->extra->length) < 0) {
611 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000612 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000613 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000614
615 for (i = 0; i < self->extra->length; i++) {
616 Py_INCREF(self->extra->children[i]);
617 element->extra->children[i] = self->extra->children[i];
618 }
619
620 element->extra->length = self->extra->length;
621
622 }
623
624 return (PyObject*) element;
625}
626
627static PyObject*
628element_deepcopy(ElementObject* self, PyObject* args)
629{
630 int i;
631 ElementObject* element;
632 PyObject* tag;
633 PyObject* attrib;
634 PyObject* text;
635 PyObject* tail;
636 PyObject* id;
637
638 PyObject* memo;
639 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
640 return NULL;
641
642 tag = deepcopy(self->tag, memo);
643 if (!tag)
644 return NULL;
645
646 if (self->extra) {
647 attrib = deepcopy(self->extra->attrib, memo);
648 if (!attrib) {
649 Py_DECREF(tag);
650 return NULL;
651 }
652 } else {
653 Py_INCREF(Py_None);
654 attrib = Py_None;
655 }
656
657 element = (ElementObject*) element_new(tag, attrib);
658
659 Py_DECREF(tag);
660 Py_DECREF(attrib);
661
662 if (!element)
663 return NULL;
664
665 text = deepcopy(JOIN_OBJ(self->text), memo);
666 if (!text)
667 goto error;
668 Py_DECREF(element->text);
669 element->text = JOIN_SET(text, JOIN_GET(self->text));
670
671 tail = deepcopy(JOIN_OBJ(self->tail), memo);
672 if (!tail)
673 goto error;
674 Py_DECREF(element->tail);
675 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
676
677 if (self->extra) {
678
679 if (element_resize(element, self->extra->length) < 0)
680 goto error;
681
682 for (i = 0; i < self->extra->length; i++) {
683 PyObject* child = deepcopy(self->extra->children[i], memo);
684 if (!child) {
685 element->extra->length = i;
686 goto error;
687 }
688 element->extra->children[i] = child;
689 }
690
691 element->extra->length = self->extra->length;
692
693 }
694
695 /* add object to memo dictionary (so deepcopy won't visit it again) */
696 id = PyInt_FromLong((Py_uintptr_t) self);
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000697 if (!id)
698 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000699
700 i = PyDict_SetItem(memo, id, (PyObject*) element);
701
702 Py_DECREF(id);
703
704 if (i < 0)
705 goto error;
706
707 return (PyObject*) element;
708
709 error:
710 Py_DECREF(element);
711 return NULL;
712}
713
714LOCAL(int)
715checkpath(PyObject* tag)
716{
Neal Norwitzc7074382006-06-12 02:06:17 +0000717 Py_ssize_t i;
718 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000719
720 /* check if a tag contains an xpath character */
721
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000722#define PATHCHAR(ch) \
723 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000724
725#if defined(Py_USING_UNICODE)
726 if (PyUnicode_Check(tag)) {
727 Py_UNICODE *p = PyUnicode_AS_UNICODE(tag);
728 for (i = 0; i < PyUnicode_GET_SIZE(tag); i++) {
729 if (p[i] == '{')
730 check = 0;
731 else if (p[i] == '}')
732 check = 1;
733 else if (check && PATHCHAR(p[i]))
734 return 1;
735 }
736 return 0;
737 }
738#endif
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000739 if (PyString_Check(tag)) {
740 char *p = PyString_AS_STRING(tag);
741 for (i = 0; i < PyString_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000742 if (p[i] == '{')
743 check = 0;
744 else if (p[i] == '}')
745 check = 1;
746 else if (check && PATHCHAR(p[i]))
747 return 1;
748 }
749 return 0;
750 }
751
752 return 1; /* unknown type; might be path expression */
753}
754
755static PyObject*
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000756element_extend(ElementObject* self, PyObject* args)
757{
758 PyObject* seq;
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300759 Py_ssize_t i;
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000760
761 PyObject* seq_in;
762 if (!PyArg_ParseTuple(args, "O:extend", &seq_in))
763 return NULL;
764
765 seq = PySequence_Fast(seq_in, "");
766 if (!seq) {
767 PyErr_Format(
768 PyExc_TypeError,
769 "expected sequence, not \"%.200s\"", Py_TYPE(seq_in)->tp_name
770 );
771 return NULL;
772 }
773
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300774 for (i = 0; i < PySequence_Fast_GET_SIZE(seq); i++) {
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000775 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
776 if (element_add_subelement(self, element) < 0) {
777 Py_DECREF(seq);
778 return NULL;
779 }
780 }
781
782 Py_DECREF(seq);
783
784 Py_RETURN_NONE;
785}
786
787static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000788element_find(ElementObject* self, PyObject* args)
789{
790 int i;
791
792 PyObject* tag;
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000793 PyObject* namespaces = Py_None;
794 if (!PyArg_ParseTuple(args, "O|O:find", &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000795 return NULL;
796
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000797 if (checkpath(tag) || namespaces != Py_None)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000798 return PyObject_CallMethod(
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000799 elementpath_obj, "find", "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000800 );
801
802 if (!self->extra)
803 Py_RETURN_NONE;
804
805 for (i = 0; i < self->extra->length; i++) {
806 PyObject* item = self->extra->children[i];
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300807 int rc;
808 if (!Element_CheckExact(item))
809 continue;
810 Py_INCREF(item);
811 rc = PyObject_Compare(((ElementObject*)item)->tag, tag);
812 if (rc == 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000813 return item;
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300814 Py_DECREF(item);
815 if (rc < 0 && PyErr_Occurred())
816 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000817 }
818
819 Py_RETURN_NONE;
820}
821
822static PyObject*
823element_findtext(ElementObject* self, PyObject* args)
824{
825 int i;
826
827 PyObject* tag;
828 PyObject* default_value = Py_None;
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000829 PyObject* namespaces = Py_None;
830 if (!PyArg_ParseTuple(args, "O|OO:findtext", &tag, &default_value, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000831 return NULL;
832
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000833 if (checkpath(tag) || namespaces != Py_None)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000834 return PyObject_CallMethod(
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000835 elementpath_obj, "findtext", "OOOO", self, tag, default_value, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000836 );
837
838 if (!self->extra) {
839 Py_INCREF(default_value);
840 return default_value;
841 }
842
843 for (i = 0; i < self->extra->length; i++) {
844 ElementObject* item = (ElementObject*) self->extra->children[i];
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300845 int rc;
846 if (!Element_CheckExact(item))
847 continue;
848 Py_INCREF(item);
849 rc = PyObject_Compare(item->tag, tag);
850 if (rc == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000851 PyObject* text = element_get_text(item);
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300852 if (text == Py_None) {
853 Py_DECREF(item);
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000854 return PyString_FromString("");
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300855 }
Neal Norwitz6f5ff3f2006-08-12 01:43:40 +0000856 Py_XINCREF(text);
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300857 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000858 return text;
859 }
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300860 Py_DECREF(item);
861 if (rc < 0 && PyErr_Occurred())
862 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000863 }
864
865 Py_INCREF(default_value);
866 return default_value;
867}
868
869static PyObject*
870element_findall(ElementObject* self, PyObject* args)
871{
872 int i;
873 PyObject* out;
874
875 PyObject* tag;
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000876 PyObject* namespaces = Py_None;
877 if (!PyArg_ParseTuple(args, "O|O:findall", &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000878 return NULL;
879
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000880 if (checkpath(tag) || namespaces != Py_None)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000881 return PyObject_CallMethod(
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000882 elementpath_obj, "findall", "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000883 );
884
885 out = PyList_New(0);
886 if (!out)
887 return NULL;
888
889 if (!self->extra)
890 return out;
891
892 for (i = 0; i < self->extra->length; i++) {
893 PyObject* item = self->extra->children[i];
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300894 int rc;
895 if (!Element_CheckExact(item))
896 continue;
897 Py_INCREF(item);
898 rc = PyObject_Compare(((ElementObject*)item)->tag, tag);
899 if (rc == 0)
900 rc = PyList_Append(out, item);
901 Py_DECREF(item);
902 if (rc < 0 && PyErr_Occurred()) {
903 Py_DECREF(out);
904 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000905 }
906 }
907
908 return out;
909}
910
911static PyObject*
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000912element_iterfind(ElementObject* self, PyObject* args)
913{
914 PyObject* tag;
915 PyObject* namespaces = Py_None;
916 if (!PyArg_ParseTuple(args, "O|O:iterfind", &tag, &namespaces))
917 return NULL;
918
919 return PyObject_CallMethod(
920 elementpath_obj, "iterfind", "OOO", self, tag, namespaces
921 );
922}
923
924static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000925element_get(ElementObject* self, PyObject* args)
926{
927 PyObject* value;
928
929 PyObject* key;
930 PyObject* default_value = Py_None;
931 if (!PyArg_ParseTuple(args, "O|O:get", &key, &default_value))
932 return NULL;
933
934 if (!self->extra || self->extra->attrib == Py_None)
935 value = default_value;
936 else {
937 value = PyDict_GetItem(self->extra->attrib, key);
938 if (!value)
939 value = default_value;
940 }
941
942 Py_INCREF(value);
943 return value;
944}
945
946static PyObject*
947element_getchildren(ElementObject* self, PyObject* args)
948{
949 int i;
950 PyObject* list;
951
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000952 /* FIXME: report as deprecated? */
953
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000954 if (!PyArg_ParseTuple(args, ":getchildren"))
955 return NULL;
956
957 if (!self->extra)
958 return PyList_New(0);
959
960 list = PyList_New(self->extra->length);
961 if (!list)
962 return NULL;
963
964 for (i = 0; i < self->extra->length; i++) {
965 PyObject* item = self->extra->children[i];
966 Py_INCREF(item);
967 PyList_SET_ITEM(list, i, item);
968 }
969
970 return list;
971}
972
973static PyObject*
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000974element_iter(ElementObject* self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000975{
976 PyObject* result;
977
978 PyObject* tag = Py_None;
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000979 if (!PyArg_ParseTuple(args, "|O:iter", &tag))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000980 return NULL;
981
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000982 if (!elementtree_iter_obj) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000983 PyErr_SetString(
984 PyExc_RuntimeError,
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000985 "iter helper not found"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000986 );
987 return NULL;
988 }
989
990 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000991 if (!args)
992 return NULL;
Neal Norwitz02876df2006-02-07 06:58:52 +0000993
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000994 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
995 Py_INCREF(tag); PyTuple_SET_ITEM(args, 1, (PyObject*) tag);
996
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000997 result = PyObject_CallObject(elementtree_iter_obj, args);
998
999 Py_DECREF(args);
1000
1001 return result;
1002}
1003
1004
1005static PyObject*
1006element_itertext(ElementObject* self, PyObject* args)
1007{
1008 PyObject* result;
1009
1010 if (!PyArg_ParseTuple(args, ":itertext"))
1011 return NULL;
1012
1013 if (!elementtree_itertext_obj) {
1014 PyErr_SetString(
1015 PyExc_RuntimeError,
1016 "itertext helper not found"
1017 );
1018 return NULL;
1019 }
1020
1021 args = PyTuple_New(1);
1022 if (!args)
1023 return NULL;
1024
1025 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
1026
1027 result = PyObject_CallObject(elementtree_itertext_obj, args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001028
1029 Py_DECREF(args);
1030
1031 return result;
1032}
1033
1034static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001035element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001036{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001037 ElementObject* self = (ElementObject*) self_;
1038
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001039 if (!self->extra || index < 0 || index >= self->extra->length) {
1040 PyErr_SetString(
1041 PyExc_IndexError,
1042 "child index out of range"
1043 );
1044 return NULL;
1045 }
1046
1047 Py_INCREF(self->extra->children[index]);
1048 return self->extra->children[index];
1049}
1050
1051static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001052element_insert(ElementObject* self, PyObject* args)
1053{
1054 int i;
1055
1056 int index;
1057 PyObject* element;
1058 if (!PyArg_ParseTuple(args, "iO!:insert", &index,
1059 &Element_Type, &element))
1060 return NULL;
1061
1062 if (!self->extra)
1063 element_new_extra(self, NULL);
1064
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001065 if (index < 0) {
1066 index += self->extra->length;
1067 if (index < 0)
1068 index = 0;
1069 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001070 if (index > self->extra->length)
1071 index = self->extra->length;
1072
1073 if (element_resize(self, 1) < 0)
1074 return NULL;
1075
1076 for (i = self->extra->length; i > index; i--)
1077 self->extra->children[i] = self->extra->children[i-1];
1078
1079 Py_INCREF(element);
1080 self->extra->children[index] = element;
1081
1082 self->extra->length++;
1083
1084 Py_RETURN_NONE;
1085}
1086
1087static PyObject*
1088element_items(ElementObject* self, PyObject* args)
1089{
1090 if (!PyArg_ParseTuple(args, ":items"))
1091 return NULL;
1092
1093 if (!self->extra || self->extra->attrib == Py_None)
1094 return PyList_New(0);
1095
1096 return PyDict_Items(self->extra->attrib);
1097}
1098
1099static PyObject*
1100element_keys(ElementObject* self, PyObject* args)
1101{
1102 if (!PyArg_ParseTuple(args, ":keys"))
1103 return NULL;
1104
1105 if (!self->extra || self->extra->attrib == Py_None)
1106 return PyList_New(0);
1107
1108 return PyDict_Keys(self->extra->attrib);
1109}
1110
Martin v. Löwis18e16552006-02-15 17:27:45 +00001111static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001112element_length(ElementObject* self)
1113{
1114 if (!self->extra)
1115 return 0;
1116
1117 return self->extra->length;
1118}
1119
1120static PyObject*
1121element_makeelement(PyObject* self, PyObject* args, PyObject* kw)
1122{
1123 PyObject* elem;
1124
1125 PyObject* tag;
1126 PyObject* attrib;
1127 if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib))
1128 return NULL;
1129
1130 attrib = PyDict_Copy(attrib);
1131 if (!attrib)
1132 return NULL;
1133
1134 elem = element_new(tag, attrib);
1135
1136 Py_DECREF(attrib);
1137
1138 return elem;
1139}
1140
1141static PyObject*
1142element_reduce(ElementObject* self, PyObject* args)
1143{
1144 if (!PyArg_ParseTuple(args, ":__reduce__"))
1145 return NULL;
1146
1147 /* Hack alert: This method is used to work around a __copy__
1148 problem on certain 2.3 and 2.4 versions. To save time and
1149 simplify the code, we create the copy in here, and use a dummy
1150 copyelement helper to trick the copy module into doing the
1151 right thing. */
1152
1153 if (!elementtree_copyelement_obj) {
1154 PyErr_SetString(
1155 PyExc_RuntimeError,
1156 "copyelement helper not found"
1157 );
1158 return NULL;
1159 }
1160
1161 return Py_BuildValue(
1162 "O(N)", elementtree_copyelement_obj, element_copy(self, args)
1163 );
1164}
1165
1166static PyObject*
1167element_remove(ElementObject* self, PyObject* args)
1168{
1169 int i;
Serhiy Storchaka25598f32015-05-18 18:28:57 +03001170 int rc;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001171 PyObject* element;
Serhiy Storchaka25598f32015-05-18 18:28:57 +03001172 PyObject* found;
1173
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001174 if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element))
1175 return NULL;
1176
1177 if (!self->extra) {
1178 /* element has no children, so raise exception */
1179 PyErr_SetString(
1180 PyExc_ValueError,
1181 "list.remove(x): x not in list"
1182 );
1183 return NULL;
1184 }
1185
1186 for (i = 0; i < self->extra->length; i++) {
1187 if (self->extra->children[i] == element)
1188 break;
Serhiy Storchaka25598f32015-05-18 18:28:57 +03001189 rc = PyObject_Compare(self->extra->children[i], element);
1190 if (rc == 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001191 break;
Serhiy Storchaka25598f32015-05-18 18:28:57 +03001192 if (rc < 0 && PyErr_Occurred())
1193 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001194 }
1195
Serhiy Storchaka25598f32015-05-18 18:28:57 +03001196 if (i >= self->extra->length) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001197 /* element is not in children, so raise exception */
1198 PyErr_SetString(
1199 PyExc_ValueError,
1200 "list.remove(x): x not in list"
1201 );
1202 return NULL;
1203 }
1204
Serhiy Storchaka25598f32015-05-18 18:28:57 +03001205 found = self->extra->children[i];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001206
1207 self->extra->length--;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001208 for (; i < self->extra->length; i++)
1209 self->extra->children[i] = self->extra->children[i+1];
1210
Serhiy Storchaka25598f32015-05-18 18:28:57 +03001211 Py_DECREF(found);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001212 Py_RETURN_NONE;
1213}
1214
1215static PyObject*
1216element_repr(ElementObject* self)
1217{
Florent Xiclunae2e81e82010-03-11 15:55:11 +00001218 PyObject *repr, *tag;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001219
Florent Xiclunae2e81e82010-03-11 15:55:11 +00001220 tag = PyObject_Repr(self->tag);
1221 if (!tag)
1222 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001223
Florent Xiclunae2e81e82010-03-11 15:55:11 +00001224 repr = PyString_FromFormat("<Element %s at %p>",
1225 PyString_AS_STRING(tag), self);
1226
1227 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001228
1229 return repr;
1230}
1231
1232static PyObject*
1233element_set(ElementObject* self, PyObject* args)
1234{
1235 PyObject* attrib;
1236
1237 PyObject* key;
1238 PyObject* value;
1239 if (!PyArg_ParseTuple(args, "OO:set", &key, &value))
1240 return NULL;
1241
1242 if (!self->extra)
1243 element_new_extra(self, NULL);
1244
1245 attrib = element_get_attrib(self);
1246 if (!attrib)
1247 return NULL;
1248
1249 if (PyDict_SetItem(attrib, key, value) < 0)
1250 return NULL;
1251
1252 Py_RETURN_NONE;
1253}
1254
1255static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001256element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001257{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001258 ElementObject* self = (ElementObject*) self_;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001259 int i;
1260 PyObject* old;
1261
1262 if (!self->extra || index < 0 || index >= self->extra->length) {
1263 PyErr_SetString(
1264 PyExc_IndexError,
1265 "child assignment index out of range");
1266 return -1;
1267 }
1268
1269 old = self->extra->children[index];
1270
1271 if (item) {
1272 Py_INCREF(item);
1273 self->extra->children[index] = item;
1274 } else {
1275 self->extra->length--;
1276 for (i = index; i < self->extra->length; i++)
1277 self->extra->children[i] = self->extra->children[i+1];
1278 }
1279
1280 Py_DECREF(old);
1281
1282 return 0;
1283}
1284
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001285static PyObject*
1286element_subscr(PyObject* self_, PyObject* item)
1287{
1288 ElementObject* self = (ElementObject*) self_;
1289
1290#if (PY_VERSION_HEX < 0x02050000)
1291 if (PyInt_Check(item) || PyLong_Check(item)) {
1292 long i = PyInt_AsLong(item);
1293#else
1294 if (PyIndex_Check(item)) {
1295 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1296#endif
1297
1298 if (i == -1 && PyErr_Occurred()) {
1299 return NULL;
1300 }
1301 if (i < 0 && self->extra)
1302 i += self->extra->length;
1303 return element_getitem(self_, i);
1304 }
1305 else if (PySlice_Check(item)) {
1306 Py_ssize_t start, stop, step, slicelen, cur, i;
1307 PyObject* list;
1308
1309 if (!self->extra)
1310 return PyList_New(0);
1311
1312 if (PySlice_GetIndicesEx((PySliceObject *)item,
1313 self->extra->length,
1314 &start, &stop, &step, &slicelen) < 0) {
1315 return NULL;
1316 }
1317
1318 if (slicelen <= 0)
1319 return PyList_New(0);
1320 else {
1321 list = PyList_New(slicelen);
1322 if (!list)
1323 return NULL;
1324
1325 for (cur = start, i = 0; i < slicelen;
1326 cur += step, i++) {
1327 PyObject* item = self->extra->children[cur];
1328 Py_INCREF(item);
1329 PyList_SET_ITEM(list, i, item);
1330 }
1331
1332 return list;
1333 }
1334 }
1335 else {
1336 PyErr_SetString(PyExc_TypeError,
1337 "element indices must be integers");
1338 return NULL;
1339 }
1340}
1341
1342static int
1343element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1344{
1345 ElementObject* self = (ElementObject*) self_;
1346
1347#if (PY_VERSION_HEX < 0x02050000)
1348 if (PyInt_Check(item) || PyLong_Check(item)) {
1349 long i = PyInt_AsLong(item);
1350#else
1351 if (PyIndex_Check(item)) {
1352 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1353#endif
1354
1355 if (i == -1 && PyErr_Occurred()) {
1356 return -1;
1357 }
1358 if (i < 0 && self->extra)
1359 i += self->extra->length;
1360 return element_setitem(self_, i, value);
1361 }
1362 else if (PySlice_Check(item)) {
1363 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1364
1365 PyObject* recycle = NULL;
1366 PyObject* seq = NULL;
1367
1368 if (!self->extra)
1369 element_new_extra(self, NULL);
1370
1371 if (PySlice_GetIndicesEx((PySliceObject *)item,
1372 self->extra->length,
1373 &start, &stop, &step, &slicelen) < 0) {
1374 return -1;
1375 }
1376
1377 if (value == NULL)
1378 newlen = 0;
1379 else {
1380 seq = PySequence_Fast(value, "");
1381 if (!seq) {
1382 PyErr_Format(
1383 PyExc_TypeError,
1384 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1385 );
1386 return -1;
1387 }
1388 newlen = PySequence_Size(seq);
1389 }
1390
1391 if (step != 1 && newlen != slicelen)
1392 {
1393 PyErr_Format(PyExc_ValueError,
1394#if (PY_VERSION_HEX < 0x02050000)
1395 "attempt to assign sequence of size %d "
1396 "to extended slice of size %d",
1397#else
1398 "attempt to assign sequence of size %zd "
1399 "to extended slice of size %zd",
1400#endif
1401 newlen, slicelen
1402 );
1403 return -1;
1404 }
1405
1406
1407 /* Resize before creating the recycle bin, to prevent refleaks. */
1408 if (newlen > slicelen) {
1409 if (element_resize(self, newlen - slicelen) < 0) {
1410 if (seq) {
1411 Py_DECREF(seq);
1412 }
1413 return -1;
1414 }
1415 }
1416
1417 if (slicelen > 0) {
1418 /* to avoid recursive calls to this method (via decref), move
1419 old items to the recycle bin here, and get rid of them when
1420 we're done modifying the element */
1421 recycle = PyList_New(slicelen);
1422 if (!recycle) {
1423 if (seq) {
1424 Py_DECREF(seq);
1425 }
1426 return -1;
1427 }
1428 for (cur = start, i = 0; i < slicelen;
1429 cur += step, i++)
1430 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1431 }
1432
1433 if (newlen < slicelen) {
1434 /* delete slice */
1435 for (i = stop; i < self->extra->length; i++)
1436 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1437 } else if (newlen > slicelen) {
1438 /* insert slice */
1439 for (i = self->extra->length-1; i >= stop; i--)
1440 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1441 }
1442
1443 /* replace the slice */
1444 for (cur = start, i = 0; i < newlen;
1445 cur += step, i++) {
1446 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1447 Py_INCREF(element);
1448 self->extra->children[cur] = element;
1449 }
1450
1451 self->extra->length += newlen - slicelen;
1452
1453 if (seq) {
1454 Py_DECREF(seq);
1455 }
1456
1457 /* discard the recycle bin, and everything in it */
1458 Py_XDECREF(recycle);
1459
1460 return 0;
1461 }
1462 else {
1463 PyErr_SetString(PyExc_TypeError,
1464 "element indices must be integers");
1465 return -1;
1466 }
1467}
1468
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001469static PyMethodDef element_methods[] = {
1470
1471 {"clear", (PyCFunction) element_clear, METH_VARARGS},
1472
1473 {"get", (PyCFunction) element_get, METH_VARARGS},
1474 {"set", (PyCFunction) element_set, METH_VARARGS},
1475
1476 {"find", (PyCFunction) element_find, METH_VARARGS},
1477 {"findtext", (PyCFunction) element_findtext, METH_VARARGS},
1478 {"findall", (PyCFunction) element_findall, METH_VARARGS},
1479
1480 {"append", (PyCFunction) element_append, METH_VARARGS},
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001481 {"extend", (PyCFunction) element_extend, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001482 {"insert", (PyCFunction) element_insert, METH_VARARGS},
1483 {"remove", (PyCFunction) element_remove, METH_VARARGS},
1484
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001485 {"iter", (PyCFunction) element_iter, METH_VARARGS},
1486 {"itertext", (PyCFunction) element_itertext, METH_VARARGS},
1487 {"iterfind", (PyCFunction) element_iterfind, METH_VARARGS},
1488
1489 {"getiterator", (PyCFunction) element_iter, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001490 {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS},
1491
1492 {"items", (PyCFunction) element_items, METH_VARARGS},
1493 {"keys", (PyCFunction) element_keys, METH_VARARGS},
1494
1495 {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS},
1496
1497 {"__copy__", (PyCFunction) element_copy, METH_VARARGS},
1498 {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS},
1499
1500 /* Some 2.3 and 2.4 versions do not handle the __copy__ method on
1501 C objects correctly, so we have to fake it using a __reduce__-
1502 based hack (see the element_reduce implementation above for
1503 details). */
1504
1505 /* The behaviour has been changed in 2.3.5 and 2.4.1, so we're
1506 using a runtime test to figure out if we need to fake things
1507 or now (see the init code below). The following entry is
1508 enabled only if the hack is needed. */
1509
1510 {"!__reduce__", (PyCFunction) element_reduce, METH_VARARGS},
1511
1512 {NULL, NULL}
1513};
1514
1515static PyObject*
1516element_getattr(ElementObject* self, char* name)
1517{
1518 PyObject* res;
1519
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001520 /* handle common attributes first */
1521 if (strcmp(name, "tag") == 0) {
1522 res = self->tag;
1523 Py_INCREF(res);
1524 return res;
1525 } else if (strcmp(name, "text") == 0) {
1526 res = element_get_text(self);
1527 Py_INCREF(res);
1528 return res;
1529 }
1530
1531 /* methods */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001532 res = Py_FindMethod(element_methods, (PyObject*) self, name);
1533 if (res)
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001534 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001535
1536 PyErr_Clear();
1537
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001538 /* less common attributes */
1539 if (strcmp(name, "tail") == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001540 res = element_get_tail(self);
1541 } else if (strcmp(name, "attrib") == 0) {
1542 if (!self->extra)
1543 element_new_extra(self, NULL);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001544 res = element_get_attrib(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001545 } else {
1546 PyErr_SetString(PyExc_AttributeError, name);
1547 return NULL;
1548 }
1549
1550 if (!res)
1551 return NULL;
1552
1553 Py_INCREF(res);
1554 return res;
1555}
1556
1557static int
1558element_setattr(ElementObject* self, const char* name, PyObject* value)
1559{
1560 if (value == NULL) {
1561 PyErr_SetString(
1562 PyExc_AttributeError,
1563 "can't delete element attributes"
1564 );
1565 return -1;
1566 }
1567
1568 if (strcmp(name, "tag") == 0) {
1569 Py_DECREF(self->tag);
1570 self->tag = value;
1571 Py_INCREF(self->tag);
1572 } else if (strcmp(name, "text") == 0) {
1573 Py_DECREF(JOIN_OBJ(self->text));
1574 self->text = value;
1575 Py_INCREF(self->text);
1576 } else if (strcmp(name, "tail") == 0) {
1577 Py_DECREF(JOIN_OBJ(self->tail));
1578 self->tail = value;
1579 Py_INCREF(self->tail);
1580 } else if (strcmp(name, "attrib") == 0) {
1581 if (!self->extra)
1582 element_new_extra(self, NULL);
1583 Py_DECREF(self->extra->attrib);
1584 self->extra->attrib = value;
1585 Py_INCREF(self->extra->attrib);
1586 } else {
1587 PyErr_SetString(PyExc_AttributeError, name);
1588 return -1;
1589 }
1590
1591 return 0;
1592}
1593
1594static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001595 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001596 0, /* sq_concat */
1597 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001598 element_getitem,
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001599 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001600 element_setitem,
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001601 0,
1602};
1603
1604static PyMappingMethods element_as_mapping = {
1605 (lenfunc) element_length,
1606 (binaryfunc) element_subscr,
1607 (objobjargproc) element_ass_subscr,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001608};
1609
1610statichere PyTypeObject Element_Type = {
1611 PyObject_HEAD_INIT(NULL)
1612 0, "Element", sizeof(ElementObject), 0,
1613 /* methods */
1614 (destructor)element_dealloc, /* tp_dealloc */
1615 0, /* tp_print */
1616 (getattrfunc)element_getattr, /* tp_getattr */
1617 (setattrfunc)element_setattr, /* tp_setattr */
1618 0, /* tp_compare */
1619 (reprfunc)element_repr, /* tp_repr */
1620 0, /* tp_as_number */
1621 &element_as_sequence, /* tp_as_sequence */
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001622 &element_as_mapping, /* tp_as_mapping */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001623};
1624
1625/* ==================================================================== */
1626/* the tree builder type */
1627
1628typedef struct {
1629 PyObject_HEAD
1630
1631 PyObject* root; /* root node (first created node) */
1632
1633 ElementObject* this; /* current node */
1634 ElementObject* last; /* most recently created node */
1635
1636 PyObject* data; /* data collector (string or list), or NULL */
1637
1638 PyObject* stack; /* element stack */
Neal Norwitzc7074382006-06-12 02:06:17 +00001639 Py_ssize_t index; /* current stack size (0=empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001640
1641 /* element tracing */
1642 PyObject* events; /* list of events, or NULL if not collecting */
1643 PyObject* start_event_obj; /* event objects (NULL to ignore) */
1644 PyObject* end_event_obj;
1645 PyObject* start_ns_event_obj;
1646 PyObject* end_ns_event_obj;
1647
1648} TreeBuilderObject;
1649
1650staticforward PyTypeObject TreeBuilder_Type;
1651
Christian Heimese93237d2007-12-19 02:37:44 +00001652#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001653
1654/* -------------------------------------------------------------------- */
1655/* constructor and destructor */
1656
1657LOCAL(PyObject*)
1658treebuilder_new(void)
1659{
1660 TreeBuilderObject* self;
1661
1662 self = PyObject_New(TreeBuilderObject, &TreeBuilder_Type);
1663 if (self == NULL)
1664 return NULL;
1665
1666 self->root = NULL;
1667
1668 Py_INCREF(Py_None);
1669 self->this = (ElementObject*) Py_None;
1670
1671 Py_INCREF(Py_None);
1672 self->last = (ElementObject*) Py_None;
1673
1674 self->data = NULL;
1675
1676 self->stack = PyList_New(20);
1677 self->index = 0;
1678
1679 self->events = NULL;
1680 self->start_event_obj = self->end_event_obj = NULL;
1681 self->start_ns_event_obj = self->end_ns_event_obj = NULL;
1682
1683 ALLOC(sizeof(TreeBuilderObject), "create treebuilder");
1684
1685 return (PyObject*) self;
1686}
1687
1688static PyObject*
Fredrik Lundh81707f12006-06-03 21:56:05 +00001689treebuilder(PyObject* self_, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001690{
1691 if (!PyArg_ParseTuple(args, ":TreeBuilder"))
1692 return NULL;
1693
1694 return treebuilder_new();
1695}
1696
1697static void
1698treebuilder_dealloc(TreeBuilderObject* self)
1699{
1700 Py_XDECREF(self->end_ns_event_obj);
1701 Py_XDECREF(self->start_ns_event_obj);
1702 Py_XDECREF(self->end_event_obj);
1703 Py_XDECREF(self->start_event_obj);
1704 Py_XDECREF(self->events);
1705 Py_DECREF(self->stack);
1706 Py_XDECREF(self->data);
1707 Py_DECREF(self->last);
1708 Py_DECREF(self->this);
1709 Py_XDECREF(self->root);
1710
1711 RELEASE(sizeof(TreeBuilderObject), "destroy treebuilder");
1712
1713 PyObject_Del(self);
1714}
1715
1716/* -------------------------------------------------------------------- */
1717/* handlers */
1718
1719LOCAL(PyObject*)
1720treebuilder_handle_xml(TreeBuilderObject* self, PyObject* encoding,
1721 PyObject* standalone)
1722{
1723 Py_RETURN_NONE;
1724}
1725
1726LOCAL(PyObject*)
1727treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
1728 PyObject* attrib)
1729{
1730 PyObject* node;
1731 PyObject* this;
1732
1733 if (self->data) {
1734 if (self->this == self->last) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001735 Py_DECREF(JOIN_OBJ(self->last->text));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001736 self->last->text = JOIN_SET(
1737 self->data, PyList_CheckExact(self->data)
1738 );
1739 } else {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001740 Py_DECREF(JOIN_OBJ(self->last->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001741 self->last->tail = JOIN_SET(
1742 self->data, PyList_CheckExact(self->data)
1743 );
1744 }
1745 self->data = NULL;
1746 }
1747
1748 node = element_new(tag, attrib);
1749 if (!node)
1750 return NULL;
1751
1752 this = (PyObject*) self->this;
1753
1754 if (this != Py_None) {
1755 if (element_add_subelement((ElementObject*) this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001756 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001757 } else {
1758 if (self->root) {
1759 PyErr_SetString(
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001760 elementtree_parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001761 "multiple elements on top level"
1762 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001763 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001764 }
1765 Py_INCREF(node);
1766 self->root = node;
1767 }
1768
1769 if (self->index < PyList_GET_SIZE(self->stack)) {
1770 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001771 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001772 Py_INCREF(this);
1773 } else {
1774 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001775 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001776 }
1777 self->index++;
1778
1779 Py_DECREF(this);
1780 Py_INCREF(node);
1781 self->this = (ElementObject*) node;
1782
1783 Py_DECREF(self->last);
1784 Py_INCREF(node);
1785 self->last = (ElementObject*) node;
1786
1787 if (self->start_event_obj) {
1788 PyObject* res;
1789 PyObject* action = self->start_event_obj;
1790 res = PyTuple_New(2);
1791 if (res) {
1792 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
1793 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
1794 PyList_Append(self->events, res);
1795 Py_DECREF(res);
1796 } else
1797 PyErr_Clear(); /* FIXME: propagate error */
1798 }
1799
1800 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001801
1802 error:
1803 Py_DECREF(node);
1804 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001805}
1806
1807LOCAL(PyObject*)
1808treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
1809{
1810 if (!self->data) {
Fredrik Lundhdc075b92006-08-16 16:47:07 +00001811 if (self->last == (ElementObject*) Py_None) {
1812 /* ignore calls to data before the first call to start */
1813 Py_RETURN_NONE;
1814 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001815 /* store the first item as is */
1816 Py_INCREF(data); self->data = data;
1817 } else {
1818 /* more than one item; use a list to collect items */
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001819 if (PyString_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
1820 PyString_CheckExact(data) && PyString_GET_SIZE(data) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001821 /* expat often generates single character data sections; handle
1822 the most common case by resizing the existing string... */
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001823 Py_ssize_t size = PyString_GET_SIZE(self->data);
1824 if (_PyString_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001825 return NULL;
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001826 PyString_AS_STRING(self->data)[size] = PyString_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001827 } else if (PyList_CheckExact(self->data)) {
1828 if (PyList_Append(self->data, data) < 0)
1829 return NULL;
1830 } else {
1831 PyObject* list = PyList_New(2);
1832 if (!list)
1833 return NULL;
1834 PyList_SET_ITEM(list, 0, self->data);
1835 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
1836 self->data = list;
1837 }
1838 }
1839
1840 Py_RETURN_NONE;
1841}
1842
1843LOCAL(PyObject*)
1844treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
1845{
1846 PyObject* item;
1847
1848 if (self->data) {
1849 if (self->this == self->last) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001850 Py_DECREF(JOIN_OBJ(self->last->text));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001851 self->last->text = JOIN_SET(
1852 self->data, PyList_CheckExact(self->data)
1853 );
1854 } else {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001855 Py_DECREF(JOIN_OBJ(self->last->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001856 self->last->tail = JOIN_SET(
1857 self->data, PyList_CheckExact(self->data)
1858 );
1859 }
1860 self->data = NULL;
1861 }
1862
1863 if (self->index == 0) {
1864 PyErr_SetString(
1865 PyExc_IndexError,
1866 "pop from empty stack"
1867 );
1868 return NULL;
1869 }
1870
1871 self->index--;
1872
1873 item = PyList_GET_ITEM(self->stack, self->index);
1874 Py_INCREF(item);
1875
1876 Py_DECREF(self->last);
1877
1878 self->last = (ElementObject*) self->this;
1879 self->this = (ElementObject*) item;
1880
1881 if (self->end_event_obj) {
1882 PyObject* res;
1883 PyObject* action = self->end_event_obj;
1884 PyObject* node = (PyObject*) self->last;
1885 res = PyTuple_New(2);
1886 if (res) {
1887 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
1888 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
1889 PyList_Append(self->events, res);
1890 Py_DECREF(res);
1891 } else
1892 PyErr_Clear(); /* FIXME: propagate error */
1893 }
1894
1895 Py_INCREF(self->last);
1896 return (PyObject*) self->last;
1897}
1898
1899LOCAL(void)
1900treebuilder_handle_namespace(TreeBuilderObject* self, int start,
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001901 PyObject *prefix, PyObject *uri)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001902{
1903 PyObject* res;
1904 PyObject* action;
1905 PyObject* parcel;
1906
1907 if (!self->events)
1908 return;
1909
1910 if (start) {
1911 if (!self->start_ns_event_obj)
1912 return;
1913 action = self->start_ns_event_obj;
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001914 parcel = Py_BuildValue("OO", prefix, uri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001915 if (!parcel)
1916 return;
1917 Py_INCREF(action);
1918 } else {
1919 if (!self->end_ns_event_obj)
1920 return;
1921 action = self->end_ns_event_obj;
1922 Py_INCREF(action);
1923 parcel = Py_None;
1924 Py_INCREF(parcel);
1925 }
1926
1927 res = PyTuple_New(2);
1928
1929 if (res) {
1930 PyTuple_SET_ITEM(res, 0, action);
1931 PyTuple_SET_ITEM(res, 1, parcel);
1932 PyList_Append(self->events, res);
1933 Py_DECREF(res);
1934 } else
1935 PyErr_Clear(); /* FIXME: propagate error */
1936}
1937
1938/* -------------------------------------------------------------------- */
1939/* methods (in alphabetical order) */
1940
1941static PyObject*
1942treebuilder_data(TreeBuilderObject* self, PyObject* args)
1943{
1944 PyObject* data;
1945 if (!PyArg_ParseTuple(args, "O:data", &data))
1946 return NULL;
1947
1948 return treebuilder_handle_data(self, data);
1949}
1950
1951static PyObject*
1952treebuilder_end(TreeBuilderObject* self, PyObject* args)
1953{
1954 PyObject* tag;
1955 if (!PyArg_ParseTuple(args, "O:end", &tag))
1956 return NULL;
1957
1958 return treebuilder_handle_end(self, tag);
1959}
1960
1961LOCAL(PyObject*)
1962treebuilder_done(TreeBuilderObject* self)
1963{
1964 PyObject* res;
1965
1966 /* FIXME: check stack size? */
1967
1968 if (self->root)
1969 res = self->root;
1970 else
1971 res = Py_None;
1972
1973 Py_INCREF(res);
1974 return res;
1975}
1976
1977static PyObject*
1978treebuilder_close(TreeBuilderObject* self, PyObject* args)
1979{
1980 if (!PyArg_ParseTuple(args, ":close"))
1981 return NULL;
1982
1983 return treebuilder_done(self);
1984}
1985
1986static PyObject*
1987treebuilder_start(TreeBuilderObject* self, PyObject* args)
1988{
1989 PyObject* tag;
1990 PyObject* attrib = Py_None;
1991 if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib))
1992 return NULL;
1993
1994 return treebuilder_handle_start(self, tag, attrib);
1995}
1996
1997static PyObject*
1998treebuilder_xml(TreeBuilderObject* self, PyObject* args)
1999{
2000 PyObject* encoding;
2001 PyObject* standalone;
2002 if (!PyArg_ParseTuple(args, "OO:xml", &encoding, &standalone))
2003 return NULL;
2004
2005 return treebuilder_handle_xml(self, encoding, standalone);
2006}
2007
2008static PyMethodDef treebuilder_methods[] = {
2009 {"data", (PyCFunction) treebuilder_data, METH_VARARGS},
2010 {"start", (PyCFunction) treebuilder_start, METH_VARARGS},
2011 {"end", (PyCFunction) treebuilder_end, METH_VARARGS},
2012 {"xml", (PyCFunction) treebuilder_xml, METH_VARARGS},
2013 {"close", (PyCFunction) treebuilder_close, METH_VARARGS},
2014 {NULL, NULL}
2015};
2016
2017static PyObject*
2018treebuilder_getattr(TreeBuilderObject* self, char* name)
2019{
2020 return Py_FindMethod(treebuilder_methods, (PyObject*) self, name);
2021}
2022
2023statichere PyTypeObject TreeBuilder_Type = {
2024 PyObject_HEAD_INIT(NULL)
2025 0, "TreeBuilder", sizeof(TreeBuilderObject), 0,
2026 /* methods */
2027 (destructor)treebuilder_dealloc, /* tp_dealloc */
2028 0, /* tp_print */
2029 (getattrfunc)treebuilder_getattr, /* tp_getattr */
2030};
2031
2032/* ==================================================================== */
2033/* the expat interface */
2034
2035#if defined(USE_EXPAT)
2036
2037#include "expat.h"
2038
2039#if defined(USE_PYEXPAT_CAPI)
2040#include "pyexpat.h"
2041static struct PyExpat_CAPI* expat_capi;
2042#define EXPAT(func) (expat_capi->func)
2043#else
2044#define EXPAT(func) (XML_##func)
2045#endif
2046
2047typedef struct {
2048 PyObject_HEAD
2049
2050 XML_Parser parser;
2051
2052 PyObject* target;
2053 PyObject* entity;
2054
2055 PyObject* names;
2056
2057 PyObject* handle_xml;
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002058
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002059 PyObject* handle_start;
2060 PyObject* handle_data;
2061 PyObject* handle_end;
2062
2063 PyObject* handle_comment;
2064 PyObject* handle_pi;
2065
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002066 PyObject* handle_close;
2067
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002068} XMLParserObject;
2069
2070staticforward PyTypeObject XMLParser_Type;
2071
2072/* helpers */
2073
2074#if defined(Py_USING_UNICODE)
2075LOCAL(int)
2076checkstring(const char* string, int size)
2077{
2078 int i;
2079
2080 /* check if an 8-bit string contains UTF-8 characters */
2081 for (i = 0; i < size; i++)
2082 if (string[i] & 0x80)
2083 return 1;
2084
2085 return 0;
2086}
2087#endif
2088
2089LOCAL(PyObject*)
2090makestring(const char* string, int size)
2091{
2092 /* convert a UTF-8 string to either a 7-bit ascii string or a
2093 Unicode string */
2094
2095#if defined(Py_USING_UNICODE)
2096 if (checkstring(string, size))
2097 return PyUnicode_DecodeUTF8(string, size, "strict");
2098#endif
2099
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002100 return PyString_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002101}
2102
2103LOCAL(PyObject*)
2104makeuniversal(XMLParserObject* self, const char* string)
2105{
2106 /* convert a UTF-8 tag/attribute name from the expat parser
2107 to a universal name string */
2108
2109 int size = strlen(string);
2110 PyObject* key;
2111 PyObject* value;
2112
2113 /* look the 'raw' name up in the names dictionary */
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002114 key = PyString_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002115 if (!key)
2116 return NULL;
2117
2118 value = PyDict_GetItem(self->names, key);
2119
2120 if (value) {
2121 Py_INCREF(value);
2122 } else {
2123 /* new name. convert to universal name, and decode as
2124 necessary */
2125
2126 PyObject* tag;
2127 char* p;
2128 int i;
2129
2130 /* look for namespace separator */
2131 for (i = 0; i < size; i++)
2132 if (string[i] == '}')
2133 break;
2134 if (i != size) {
2135 /* convert to universal name */
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002136 tag = PyString_FromStringAndSize(NULL, size+1);
2137 p = PyString_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002138 p[0] = '{';
2139 memcpy(p+1, string, size);
2140 size++;
2141 } else {
2142 /* plain name; use key as tag */
2143 Py_INCREF(key);
2144 tag = key;
2145 }
2146
2147 /* decode universal name */
2148#if defined(Py_USING_UNICODE)
2149 /* inline makestring, to avoid duplicating the source string if
2150 it's not an utf-8 string */
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002151 p = PyString_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002152 if (checkstring(p, size)) {
2153 value = PyUnicode_DecodeUTF8(p, size, "strict");
2154 Py_DECREF(tag);
2155 if (!value) {
2156 Py_DECREF(key);
2157 return NULL;
2158 }
2159 } else
2160#endif
2161 value = tag; /* use tag as is */
2162
2163 /* add to names dictionary */
2164 if (PyDict_SetItem(self->names, key, value) < 0) {
2165 Py_DECREF(key);
2166 Py_DECREF(value);
2167 return NULL;
2168 }
2169 }
2170
2171 Py_DECREF(key);
2172 return value;
2173}
2174
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002175static void
2176expat_set_error(const char* message, int line, int column)
2177{
2178 PyObject *error;
2179 PyObject *position;
2180 char buffer[256];
2181
2182 sprintf(buffer, "%s: line %d, column %d", message, line, column);
2183
2184 error = PyObject_CallFunction(elementtree_parseerror_obj, "s", buffer);
2185 if (!error)
2186 return;
2187
2188 /* add position attribute */
2189 position = Py_BuildValue("(ii)", line, column);
2190 if (!position) {
2191 Py_DECREF(error);
2192 return;
2193 }
2194 if (PyObject_SetAttrString(error, "position", position) == -1) {
2195 Py_DECREF(error);
2196 Py_DECREF(position);
2197 return;
2198 }
2199 Py_DECREF(position);
2200
2201 PyErr_SetObject(elementtree_parseerror_obj, error);
2202 Py_DECREF(error);
2203}
2204
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002205/* -------------------------------------------------------------------- */
2206/* handlers */
2207
2208static void
2209expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2210 int data_len)
2211{
2212 PyObject* key;
2213 PyObject* value;
2214 PyObject* res;
2215
2216 if (data_len < 2 || data_in[0] != '&')
2217 return;
2218
2219 key = makestring(data_in + 1, data_len - 2);
2220 if (!key)
2221 return;
2222
2223 value = PyDict_GetItem(self->entity, key);
2224
2225 if (value) {
2226 if (TreeBuilder_CheckExact(self->target))
2227 res = treebuilder_handle_data(
2228 (TreeBuilderObject*) self->target, value
2229 );
2230 else if (self->handle_data)
2231 res = PyObject_CallFunction(self->handle_data, "O", value);
2232 else
2233 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002234 Py_XDECREF(res);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002235 } else if (!PyErr_Occurred()) {
2236 /* Report the first error, not the last */
2237 char message[128];
2238 sprintf(message, "undefined entity &%.100s;", PyString_AS_STRING(key));
2239 expat_set_error(
2240 message,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002241 EXPAT(GetErrorLineNumber)(self->parser),
2242 EXPAT(GetErrorColumnNumber)(self->parser)
2243 );
2244 }
2245
2246 Py_DECREF(key);
2247}
2248
2249static void
2250expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2251 const XML_Char **attrib_in)
2252{
2253 PyObject* res;
2254 PyObject* tag;
2255 PyObject* attrib;
2256 int ok;
2257
2258 /* tag name */
2259 tag = makeuniversal(self, tag_in);
2260 if (!tag)
2261 return; /* parser will look for errors */
2262
2263 /* attributes */
2264 if (attrib_in[0]) {
2265 attrib = PyDict_New();
2266 if (!attrib)
2267 return;
2268 while (attrib_in[0] && attrib_in[1]) {
2269 PyObject* key = makeuniversal(self, attrib_in[0]);
2270 PyObject* value = makestring(attrib_in[1], strlen(attrib_in[1]));
2271 if (!key || !value) {
2272 Py_XDECREF(value);
2273 Py_XDECREF(key);
2274 Py_DECREF(attrib);
2275 return;
2276 }
2277 ok = PyDict_SetItem(attrib, key, value);
2278 Py_DECREF(value);
2279 Py_DECREF(key);
2280 if (ok < 0) {
2281 Py_DECREF(attrib);
2282 return;
2283 }
2284 attrib_in += 2;
2285 }
2286 } else {
2287 Py_INCREF(Py_None);
2288 attrib = Py_None;
2289 }
2290
2291 if (TreeBuilder_CheckExact(self->target))
2292 /* shortcut */
2293 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2294 tag, attrib);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002295 else if (self->handle_start) {
2296 if (attrib == Py_None) {
2297 Py_DECREF(attrib);
2298 attrib = PyDict_New();
2299 if (!attrib)
2300 return;
2301 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002302 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002303 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002304 res = NULL;
2305
2306 Py_DECREF(tag);
2307 Py_DECREF(attrib);
2308
2309 Py_XDECREF(res);
2310}
2311
2312static void
2313expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2314 int data_len)
2315{
2316 PyObject* data;
2317 PyObject* res;
2318
2319 data = makestring(data_in, data_len);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002320 if (!data)
2321 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002322
2323 if (TreeBuilder_CheckExact(self->target))
2324 /* shortcut */
2325 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
2326 else if (self->handle_data)
2327 res = PyObject_CallFunction(self->handle_data, "O", data);
2328 else
2329 res = NULL;
2330
2331 Py_DECREF(data);
2332
2333 Py_XDECREF(res);
2334}
2335
2336static void
2337expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
2338{
2339 PyObject* tag;
2340 PyObject* res = NULL;
2341
2342 if (TreeBuilder_CheckExact(self->target))
2343 /* shortcut */
2344 /* the standard tree builder doesn't look at the end tag */
2345 res = treebuilder_handle_end(
2346 (TreeBuilderObject*) self->target, Py_None
2347 );
2348 else if (self->handle_end) {
2349 tag = makeuniversal(self, tag_in);
2350 if (tag) {
2351 res = PyObject_CallFunction(self->handle_end, "O", tag);
2352 Py_DECREF(tag);
2353 }
2354 }
2355
2356 Py_XDECREF(res);
2357}
2358
2359static void
2360expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
2361 const XML_Char *uri)
2362{
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002363 PyObject* sprefix = NULL;
2364 PyObject* suri = NULL;
2365
Eli Benderskyf933e082013-11-28 06:25:45 -08002366 if (uri)
Eli Bendersky71142c42013-11-28 06:37:25 -08002367 suri = makestring(uri, strlen(uri));
Eli Benderskyf933e082013-11-28 06:25:45 -08002368 else
Eli Bendersky71142c42013-11-28 06:37:25 -08002369 suri = PyString_FromStringAndSize("", 0);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002370 if (!suri)
2371 return;
2372
2373 if (prefix)
2374 sprefix = makestring(prefix, strlen(prefix));
2375 else
2376 sprefix = PyString_FromStringAndSize("", 0);
2377 if (!sprefix) {
2378 Py_DECREF(suri);
2379 return;
2380 }
2381
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002382 treebuilder_handle_namespace(
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002383 (TreeBuilderObject*) self->target, 1, sprefix, suri
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002384 );
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002385
2386 Py_DECREF(sprefix);
2387 Py_DECREF(suri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002388}
2389
2390static void
2391expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
2392{
2393 treebuilder_handle_namespace(
2394 (TreeBuilderObject*) self->target, 0, NULL, NULL
2395 );
2396}
2397
2398static void
2399expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
2400{
2401 PyObject* comment;
2402 PyObject* res;
2403
2404 if (self->handle_comment) {
2405 comment = makestring(comment_in, strlen(comment_in));
2406 if (comment) {
2407 res = PyObject_CallFunction(self->handle_comment, "O", comment);
2408 Py_XDECREF(res);
2409 Py_DECREF(comment);
2410 }
2411 }
2412}
2413
2414static void
2415expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
2416 const XML_Char* data_in)
2417{
2418 PyObject* target;
2419 PyObject* data;
2420 PyObject* res;
2421
2422 if (self->handle_pi) {
2423 target = makestring(target_in, strlen(target_in));
2424 data = makestring(data_in, strlen(data_in));
2425 if (target && data) {
2426 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
2427 Py_XDECREF(res);
2428 Py_DECREF(data);
2429 Py_DECREF(target);
2430 } else {
2431 Py_XDECREF(data);
2432 Py_XDECREF(target);
2433 }
2434 }
2435}
2436
2437#if defined(Py_USING_UNICODE)
2438static int
2439expat_unknown_encoding_handler(XMLParserObject *self, const XML_Char *name,
2440 XML_Encoding *info)
2441{
2442 PyObject* u;
2443 Py_UNICODE* p;
2444 unsigned char s[256];
2445 int i;
2446
2447 memset(info, 0, sizeof(XML_Encoding));
2448
2449 for (i = 0; i < 256; i++)
2450 s[i] = i;
2451
Fredrik Lundhc3389992005-12-25 11:40:19 +00002452 u = PyUnicode_Decode((char*) s, 256, name, "replace");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002453 if (!u)
2454 return XML_STATUS_ERROR;
2455
2456 if (PyUnicode_GET_SIZE(u) != 256) {
2457 Py_DECREF(u);
Eli Benderskyb6717012013-08-04 06:09:49 -07002458 PyErr_SetString(PyExc_ValueError,
2459 "multi-byte encodings are not supported");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002460 return XML_STATUS_ERROR;
2461 }
2462
2463 p = PyUnicode_AS_UNICODE(u);
2464
2465 for (i = 0; i < 256; i++) {
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002466 if (p[i] != Py_UNICODE_REPLACEMENT_CHARACTER)
2467 info->map[i] = p[i];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002468 else
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002469 info->map[i] = -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002470 }
2471
2472 Py_DECREF(u);
2473
2474 return XML_STATUS_OK;
2475}
2476#endif
2477
2478/* -------------------------------------------------------------------- */
2479/* constructor and destructor */
2480
2481static PyObject*
Fredrik Lundh81707f12006-06-03 21:56:05 +00002482xmlparser(PyObject* self_, PyObject* args, PyObject* kw)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002483{
2484 XMLParserObject* self;
2485 /* FIXME: does this need to be static? */
2486 static XML_Memory_Handling_Suite memory_handler;
2487
2488 PyObject* target = NULL;
2489 char* encoding = NULL;
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +00002490 static char* kwlist[] = { "target", "encoding", NULL };
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002491 if (!PyArg_ParseTupleAndKeywords(args, kw, "|Oz:XMLParser", kwlist,
2492 &target, &encoding))
2493 return NULL;
2494
2495#if defined(USE_PYEXPAT_CAPI)
2496 if (!expat_capi) {
2497 PyErr_SetString(
2498 PyExc_RuntimeError, "cannot load dispatch table from pyexpat"
2499 );
2500 return NULL;
2501 }
2502#endif
2503
2504 self = PyObject_New(XMLParserObject, &XMLParser_Type);
2505 if (self == NULL)
2506 return NULL;
2507
2508 self->entity = PyDict_New();
2509 if (!self->entity) {
2510 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002511 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002512 }
2513
2514 self->names = PyDict_New();
2515 if (!self->names) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002516 PyObject_Del(self->entity);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002517 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002518 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002519 }
2520
2521 memory_handler.malloc_fcn = PyObject_Malloc;
2522 memory_handler.realloc_fcn = PyObject_Realloc;
2523 memory_handler.free_fcn = PyObject_Free;
2524
2525 self->parser = EXPAT(ParserCreate_MM)(encoding, &memory_handler, "}");
2526 if (!self->parser) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002527 PyObject_Del(self->names);
2528 PyObject_Del(self->entity);
2529 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002530 PyErr_NoMemory();
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002531 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002532 }
2533
2534 /* setup target handlers */
2535 if (!target) {
2536 target = treebuilder_new();
2537 if (!target) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002538 EXPAT(ParserFree)(self->parser);
2539 PyObject_Del(self->names);
2540 PyObject_Del(self->entity);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002541 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002542 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002543 }
2544 } else
2545 Py_INCREF(target);
2546 self->target = target;
2547
2548 self->handle_xml = PyObject_GetAttrString(target, "xml");
2549 self->handle_start = PyObject_GetAttrString(target, "start");
2550 self->handle_data = PyObject_GetAttrString(target, "data");
2551 self->handle_end = PyObject_GetAttrString(target, "end");
2552 self->handle_comment = PyObject_GetAttrString(target, "comment");
2553 self->handle_pi = PyObject_GetAttrString(target, "pi");
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002554 self->handle_close = PyObject_GetAttrString(target, "close");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002555
2556 PyErr_Clear();
2557
2558 /* configure parser */
2559 EXPAT(SetUserData)(self->parser, self);
2560 EXPAT(SetElementHandler)(
2561 self->parser,
2562 (XML_StartElementHandler) expat_start_handler,
2563 (XML_EndElementHandler) expat_end_handler
2564 );
2565 EXPAT(SetDefaultHandlerExpand)(
2566 self->parser,
2567 (XML_DefaultHandler) expat_default_handler
2568 );
2569 EXPAT(SetCharacterDataHandler)(
2570 self->parser,
2571 (XML_CharacterDataHandler) expat_data_handler
2572 );
2573 if (self->handle_comment)
2574 EXPAT(SetCommentHandler)(
2575 self->parser,
2576 (XML_CommentHandler) expat_comment_handler
2577 );
2578 if (self->handle_pi)
2579 EXPAT(SetProcessingInstructionHandler)(
2580 self->parser,
2581 (XML_ProcessingInstructionHandler) expat_pi_handler
2582 );
2583#if defined(Py_USING_UNICODE)
2584 EXPAT(SetUnknownEncodingHandler)(
2585 self->parser,
2586 (XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL
2587 );
2588#endif
2589
2590 ALLOC(sizeof(XMLParserObject), "create expatparser");
2591
2592 return (PyObject*) self;
2593}
2594
2595static void
2596xmlparser_dealloc(XMLParserObject* self)
2597{
2598 EXPAT(ParserFree)(self->parser);
2599
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002600 Py_XDECREF(self->handle_close);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002601 Py_XDECREF(self->handle_pi);
2602 Py_XDECREF(self->handle_comment);
2603 Py_XDECREF(self->handle_end);
2604 Py_XDECREF(self->handle_data);
2605 Py_XDECREF(self->handle_start);
2606 Py_XDECREF(self->handle_xml);
2607
2608 Py_DECREF(self->target);
2609 Py_DECREF(self->entity);
2610 Py_DECREF(self->names);
2611
2612 RELEASE(sizeof(XMLParserObject), "destroy expatparser");
2613
2614 PyObject_Del(self);
2615}
2616
2617/* -------------------------------------------------------------------- */
2618/* methods (in alphabetical order) */
2619
2620LOCAL(PyObject*)
2621expat_parse(XMLParserObject* self, char* data, int data_len, int final)
2622{
2623 int ok;
2624
2625 ok = EXPAT(Parse)(self->parser, data, data_len, final);
2626
2627 if (PyErr_Occurred())
2628 return NULL;
2629
2630 if (!ok) {
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002631 expat_set_error(
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002632 EXPAT(ErrorString)(EXPAT(GetErrorCode)(self->parser)),
2633 EXPAT(GetErrorLineNumber)(self->parser),
2634 EXPAT(GetErrorColumnNumber)(self->parser)
2635 );
2636 return NULL;
2637 }
2638
2639 Py_RETURN_NONE;
2640}
2641
2642static PyObject*
2643xmlparser_close(XMLParserObject* self, PyObject* args)
2644{
2645 /* end feeding data to parser */
2646
2647 PyObject* res;
2648 if (!PyArg_ParseTuple(args, ":close"))
2649 return NULL;
2650
2651 res = expat_parse(self, "", 0, 1);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002652 if (!res)
2653 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002654
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002655 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002656 Py_DECREF(res);
2657 return treebuilder_done((TreeBuilderObject*) self->target);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002658 } if (self->handle_close) {
2659 Py_DECREF(res);
2660 return PyObject_CallFunction(self->handle_close, "");
2661 } else
2662 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002663}
2664
2665static PyObject*
2666xmlparser_feed(XMLParserObject* self, PyObject* args)
2667{
2668 /* feed data to parser */
2669
2670 char* data;
2671 int data_len;
2672 if (!PyArg_ParseTuple(args, "s#:feed", &data, &data_len))
2673 return NULL;
2674
2675 return expat_parse(self, data, data_len, 0);
2676}
2677
2678static PyObject*
2679xmlparser_parse(XMLParserObject* self, PyObject* args)
2680{
2681 /* (internal) parse until end of input stream */
2682
2683 PyObject* reader;
2684 PyObject* buffer;
2685 PyObject* res;
2686
2687 PyObject* fileobj;
2688 if (!PyArg_ParseTuple(args, "O:_parse", &fileobj))
2689 return NULL;
2690
2691 reader = PyObject_GetAttrString(fileobj, "read");
2692 if (!reader)
2693 return NULL;
2694
2695 /* read from open file object */
2696 for (;;) {
2697
2698 buffer = PyObject_CallFunction(reader, "i", 64*1024);
2699
2700 if (!buffer) {
2701 /* read failed (e.g. due to KeyboardInterrupt) */
2702 Py_DECREF(reader);
2703 return NULL;
2704 }
2705
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002706 if (!PyString_CheckExact(buffer) || PyString_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002707 Py_DECREF(buffer);
2708 break;
2709 }
2710
2711 res = expat_parse(
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002712 self, PyString_AS_STRING(buffer), PyString_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002713 );
2714
2715 Py_DECREF(buffer);
2716
2717 if (!res) {
2718 Py_DECREF(reader);
2719 return NULL;
2720 }
2721 Py_DECREF(res);
2722
2723 }
2724
2725 Py_DECREF(reader);
2726
2727 res = expat_parse(self, "", 0, 1);
2728
2729 if (res && TreeBuilder_CheckExact(self->target)) {
2730 Py_DECREF(res);
2731 return treebuilder_done((TreeBuilderObject*) self->target);
2732 }
2733
2734 return res;
2735}
2736
2737static PyObject*
2738xmlparser_setevents(XMLParserObject* self, PyObject* args)
2739{
2740 /* activate element event reporting */
2741
Neal Norwitzc7074382006-06-12 02:06:17 +00002742 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002743 TreeBuilderObject* target;
2744
2745 PyObject* events; /* event collector */
2746 PyObject* event_set = Py_None;
2747 if (!PyArg_ParseTuple(args, "O!|O:_setevents", &PyList_Type, &events,
2748 &event_set))
2749 return NULL;
2750
2751 if (!TreeBuilder_CheckExact(self->target)) {
2752 PyErr_SetString(
2753 PyExc_TypeError,
2754 "event handling only supported for cElementTree.Treebuilder "
2755 "targets"
2756 );
2757 return NULL;
2758 }
2759
2760 target = (TreeBuilderObject*) self->target;
2761
2762 Py_INCREF(events);
2763 Py_XDECREF(target->events);
2764 target->events = events;
2765
2766 /* clear out existing events */
Serhiy Storchaka98a97222014-02-09 13:14:04 +02002767 Py_CLEAR(target->start_event_obj);
2768 Py_CLEAR(target->end_event_obj);
2769 Py_CLEAR(target->start_ns_event_obj);
2770 Py_CLEAR(target->end_ns_event_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002771
2772 if (event_set == Py_None) {
2773 /* default is "end" only */
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002774 target->end_event_obj = PyString_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002775 Py_RETURN_NONE;
2776 }
2777
2778 if (!PyTuple_Check(event_set)) /* FIXME: handle arbitrary sequences */
2779 goto error;
2780
2781 for (i = 0; i < PyTuple_GET_SIZE(event_set); i++) {
2782 PyObject* item = PyTuple_GET_ITEM(event_set, i);
2783 char* event;
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002784 if (!PyString_Check(item))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002785 goto error;
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002786 event = PyString_AS_STRING(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002787 if (strcmp(event, "start") == 0) {
2788 Py_INCREF(item);
2789 target->start_event_obj = item;
2790 } else if (strcmp(event, "end") == 0) {
2791 Py_INCREF(item);
2792 Py_XDECREF(target->end_event_obj);
2793 target->end_event_obj = item;
2794 } else if (strcmp(event, "start-ns") == 0) {
2795 Py_INCREF(item);
2796 Py_XDECREF(target->start_ns_event_obj);
2797 target->start_ns_event_obj = item;
2798 EXPAT(SetNamespaceDeclHandler)(
2799 self->parser,
2800 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2801 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2802 );
2803 } else if (strcmp(event, "end-ns") == 0) {
2804 Py_INCREF(item);
2805 Py_XDECREF(target->end_ns_event_obj);
2806 target->end_ns_event_obj = item;
2807 EXPAT(SetNamespaceDeclHandler)(
2808 self->parser,
2809 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2810 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2811 );
2812 } else {
2813 PyErr_Format(
2814 PyExc_ValueError,
2815 "unknown event '%s'", event
2816 );
2817 return NULL;
2818 }
2819 }
2820
2821 Py_RETURN_NONE;
2822
2823 error:
2824 PyErr_SetString(
2825 PyExc_TypeError,
2826 "invalid event tuple"
2827 );
2828 return NULL;
2829}
2830
2831static PyMethodDef xmlparser_methods[] = {
2832 {"feed", (PyCFunction) xmlparser_feed, METH_VARARGS},
2833 {"close", (PyCFunction) xmlparser_close, METH_VARARGS},
2834 {"_parse", (PyCFunction) xmlparser_parse, METH_VARARGS},
2835 {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
2836 {NULL, NULL}
2837};
2838
2839static PyObject*
2840xmlparser_getattr(XMLParserObject* self, char* name)
2841{
2842 PyObject* res;
2843
2844 res = Py_FindMethod(xmlparser_methods, (PyObject*) self, name);
2845 if (res)
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002846 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002847
2848 PyErr_Clear();
2849
2850 if (strcmp(name, "entity") == 0)
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002851 res = self->entity;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002852 else if (strcmp(name, "target") == 0)
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002853 res = self->target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002854 else if (strcmp(name, "version") == 0) {
2855 char buffer[100];
2856 sprintf(buffer, "Expat %d.%d.%d", XML_MAJOR_VERSION,
2857 XML_MINOR_VERSION, XML_MICRO_VERSION);
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002858 return PyString_FromString(buffer);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002859 } else {
2860 PyErr_SetString(PyExc_AttributeError, name);
2861 return NULL;
2862 }
2863
2864 Py_INCREF(res);
2865 return res;
2866}
2867
2868statichere PyTypeObject XMLParser_Type = {
2869 PyObject_HEAD_INIT(NULL)
2870 0, "XMLParser", sizeof(XMLParserObject), 0,
2871 /* methods */
2872 (destructor)xmlparser_dealloc, /* tp_dealloc */
2873 0, /* tp_print */
2874 (getattrfunc)xmlparser_getattr, /* tp_getattr */
2875};
2876
2877#endif
2878
2879/* ==================================================================== */
2880/* python module interface */
2881
2882static PyMethodDef _functions[] = {
2883 {"Element", (PyCFunction) element, METH_VARARGS|METH_KEYWORDS},
2884 {"SubElement", (PyCFunction) subelement, METH_VARARGS|METH_KEYWORDS},
2885 {"TreeBuilder", (PyCFunction) treebuilder, METH_VARARGS},
2886#if defined(USE_EXPAT)
2887 {"XMLParser", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
2888 {"XMLTreeBuilder", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
2889#endif
2890 {NULL, NULL}
2891};
2892
2893DL_EXPORT(void)
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002894init_elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002895{
2896 PyObject* m;
2897 PyObject* g;
2898 char* bootstrap;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002899
2900 /* Patch object type */
Christian Heimese93237d2007-12-19 02:37:44 +00002901 Py_TYPE(&Element_Type) = Py_TYPE(&TreeBuilder_Type) = &PyType_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002902#if defined(USE_EXPAT)
Christian Heimese93237d2007-12-19 02:37:44 +00002903 Py_TYPE(&XMLParser_Type) = &PyType_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002904#endif
2905
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002906 m = Py_InitModule("_elementtree", _functions);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002907 if (!m)
2908 return;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002909
2910 /* python glue code */
2911
2912 g = PyDict_New();
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002913 if (!g)
2914 return;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002915
2916 PyDict_SetItemString(g, "__builtins__", PyEval_GetBuiltins());
2917
2918 bootstrap = (
2919
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002920 "from copy import copy, deepcopy\n"
2921
2922 "try:\n"
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002923 " from xml.etree import ElementTree\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002924 "except ImportError:\n"
2925 " import ElementTree\n"
2926 "ET = ElementTree\n"
2927 "del ElementTree\n"
2928
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002929 "import _elementtree as cElementTree\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002930
2931 "try:\n" /* check if copy works as is */
2932 " copy(cElementTree.Element('x'))\n"
2933 "except:\n"
2934 " def copyelement(elem):\n"
2935 " return elem\n"
2936
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002937 "class CommentProxy:\n"
2938 " def __call__(self, text=None):\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002939 " element = cElementTree.Element(ET.Comment)\n"
2940 " element.text = text\n"
2941 " return element\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002942 " def __cmp__(self, other):\n"
2943 " return cmp(ET.Comment, other)\n"
2944 "cElementTree.Comment = CommentProxy()\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002945
2946 "class ElementTree(ET.ElementTree):\n" /* public */
2947 " def parse(self, source, parser=None):\n"
Florent Xicluna67d5d0e2011-10-29 03:38:56 +02002948 " close_source = False\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002949 " if not hasattr(source, 'read'):\n"
2950 " source = open(source, 'rb')\n"
Florent Xicluna67d5d0e2011-10-29 03:38:56 +02002951 " close_source = False\n"
2952 " try:\n"
2953 " if parser is not None:\n"
2954 " while 1:\n"
2955 " data = source.read(65536)\n"
2956 " if not data:\n"
2957 " break\n"
2958 " parser.feed(data)\n"
2959 " self._root = parser.close()\n"
2960 " else:\n"
2961 " parser = cElementTree.XMLParser()\n"
2962 " self._root = parser._parse(source)\n"
2963 " return self._root\n"
2964 " finally:\n"
2965 " if close_source:\n"
2966 " source.close()\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002967 "cElementTree.ElementTree = ElementTree\n"
2968
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002969 "def iter(node, tag=None):\n" /* helper */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002970 " if tag == '*':\n"
2971 " tag = None\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002972 " if tag is None or node.tag == tag:\n"
2973 " yield node\n"
2974 " for node in node:\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002975 " for node in iter(node, tag):\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002976 " yield node\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002977
2978 "def itertext(node):\n" /* helper */
2979 " if node.text:\n"
2980 " yield node.text\n"
2981 " for e in node:\n"
2982 " for s in e.itertext():\n"
2983 " yield s\n"
2984 " if e.tail:\n"
2985 " yield e.tail\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002986
2987 "def parse(source, parser=None):\n" /* public */
2988 " tree = ElementTree()\n"
2989 " tree.parse(source, parser)\n"
2990 " return tree\n"
2991 "cElementTree.parse = parse\n"
2992
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002993 "class iterparse(object):\n"
2994 " root = None\n"
2995 " def __init__(self, file, events=None):\n"
Florent Xicluna67d5d0e2011-10-29 03:38:56 +02002996 " self._close_file = False\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002997 " if not hasattr(file, 'read'):\n"
2998 " file = open(file, 'rb')\n"
Florent Xicluna67d5d0e2011-10-29 03:38:56 +02002999 " self._close_file = True\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003000 " self._file = file\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003001 " self._events = []\n"
3002 " self._index = 0\n"
Florent Xicluna0965ee22011-11-01 23:34:41 +01003003 " self._error = None\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003004 " self.root = self._root = None\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003005 " b = cElementTree.TreeBuilder()\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003006 " self._parser = cElementTree.XMLParser(b)\n"
3007 " self._parser._setevents(self._events, events)\n"
3008 " def next(self):\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003009 " while 1:\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003010 " try:\n"
3011 " item = self._events[self._index]\n"
Florent Xicluna0965ee22011-11-01 23:34:41 +01003012 " self._index += 1\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003013 " return item\n"
Florent Xicluna0965ee22011-11-01 23:34:41 +01003014 " except IndexError:\n"
3015 " pass\n"
3016 " if self._error:\n"
3017 " e = self._error\n"
3018 " self._error = None\n"
3019 " raise e\n"
3020 " if self._parser is None:\n"
3021 " self.root = self._root\n"
3022 " if self._close_file:\n"
3023 " self._file.close()\n"
3024 " raise StopIteration\n"
3025 " # load event buffer\n"
3026 " del self._events[:]\n"
3027 " self._index = 0\n"
3028 " data = self._file.read(16384)\n"
3029 " if data:\n"
3030 " try:\n"
3031 " self._parser.feed(data)\n"
3032 " except SyntaxError as exc:\n"
3033 " self._error = exc\n"
3034 " else:\n"
3035 " self._root = self._parser.close()\n"
3036 " self._parser = None\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003037 " def __iter__(self):\n"
3038 " return self\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003039 "cElementTree.iterparse = iterparse\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003040
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003041 "class PIProxy:\n"
3042 " def __call__(self, target, text=None):\n"
3043 " element = cElementTree.Element(ET.PI)\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003044 " element.text = target\n"
3045 " if text:\n"
3046 " element.text = element.text + ' ' + text\n"
3047 " return element\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003048 " def __cmp__(self, other):\n"
3049 " return cmp(ET.PI, other)\n"
3050 "cElementTree.PI = cElementTree.ProcessingInstruction = PIProxy()\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003051
3052 "def XML(text):\n" /* public */
3053 " parser = cElementTree.XMLParser()\n"
3054 " parser.feed(text)\n"
3055 " return parser.close()\n"
3056 "cElementTree.XML = cElementTree.fromstring = XML\n"
3057
3058 "def XMLID(text):\n" /* public */
3059 " tree = XML(text)\n"
3060 " ids = {}\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003061 " for elem in tree.iter():\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003062 " id = elem.get('id')\n"
3063 " if id:\n"
3064 " ids[id] = elem\n"
3065 " return tree, ids\n"
3066 "cElementTree.XMLID = XMLID\n"
3067
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003068 "try:\n"
3069 " register_namespace = ET.register_namespace\n"
3070 "except AttributeError:\n"
3071 " def register_namespace(prefix, uri):\n"
3072 " ET._namespace_map[uri] = prefix\n"
3073 "cElementTree.register_namespace = register_namespace\n"
3074
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003075 "cElementTree.dump = ET.dump\n"
3076 "cElementTree.ElementPath = ElementPath = ET.ElementPath\n"
3077 "cElementTree.iselement = ET.iselement\n"
3078 "cElementTree.QName = ET.QName\n"
3079 "cElementTree.tostring = ET.tostring\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003080 "cElementTree.fromstringlist = ET.fromstringlist\n"
3081 "cElementTree.tostringlist = ET.tostringlist\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003082 "cElementTree.VERSION = '" VERSION "'\n"
3083 "cElementTree.__version__ = '" VERSION "'\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003084
3085 );
3086
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003087 if (!PyRun_String(bootstrap, Py_file_input, g, NULL))
3088 return;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003089
3090 elementpath_obj = PyDict_GetItemString(g, "ElementPath");
3091
3092 elementtree_copyelement_obj = PyDict_GetItemString(g, "copyelement");
3093 if (elementtree_copyelement_obj) {
3094 /* reduce hack needed; enable reduce method */
3095 PyMethodDef* mp;
3096 for (mp = element_methods; mp->ml_name; mp++)
3097 if (mp->ml_meth == (PyCFunction) element_reduce) {
3098 mp->ml_name = "__reduce__";
3099 break;
3100 }
3101 } else
3102 PyErr_Clear();
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003103
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003104 elementtree_deepcopy_obj = PyDict_GetItemString(g, "deepcopy");
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003105 elementtree_iter_obj = PyDict_GetItemString(g, "iter");
3106 elementtree_itertext_obj = PyDict_GetItemString(g, "itertext");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003107
3108#if defined(USE_PYEXPAT_CAPI)
3109 /* link against pyexpat, if possible */
Larry Hastings402b73f2010-03-25 00:54:54 +00003110 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003111 if (expat_capi) {
3112 /* check that it's usable */
3113 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
3114 expat_capi->size < sizeof(struct PyExpat_CAPI) ||
3115 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
3116 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
3117 expat_capi->MICRO_VERSION != XML_MICRO_VERSION)
3118 expat_capi = NULL;
3119 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003120#endif
3121
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003122 elementtree_parseerror_obj = PyErr_NewException(
3123 "cElementTree.ParseError", PyExc_SyntaxError, NULL
3124 );
3125 Py_INCREF(elementtree_parseerror_obj);
3126 PyModule_AddObject(m, "ParseError", elementtree_parseerror_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003127}