blob: a035c5792026deb7091b71429aff61564e073460 [file] [log] [blame]
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001/*
2 * ElementTree
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003 * $Id: _elementtree.c 3473 2009-01-11 22:53:55Z fredrik $
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
5 * elementtree accelerator
6 *
7 * History:
8 * 1999-06-20 fl created (as part of sgmlop)
9 * 2001-05-29 fl effdom edition
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000010 * 2003-02-27 fl elementtree edition (alpha)
11 * 2004-06-03 fl updates for elementtree 1.2
Florent Xicluna3e8c1892010-03-11 14:36:19 +000012 * 2005-01-05 fl major optimization effort
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000013 * 2005-01-11 fl first public release (cElementTree 0.8)
14 * 2005-01-12 fl split element object into base and extras
15 * 2005-01-13 fl use tagged pointers for tail/text (cElementTree 0.9)
16 * 2005-01-17 fl added treebuilder close method
17 * 2005-01-17 fl fixed crash in getchildren
18 * 2005-01-18 fl removed observer api, added iterparse (cElementTree 0.9.3)
19 * 2005-01-23 fl revised iterparse api; added namespace event support (0.9.8)
20 * 2005-01-26 fl added VERSION module property (cElementTree 1.0)
21 * 2005-01-28 fl added remove method (1.0.1)
22 * 2005-03-01 fl added iselement function; fixed makeelement aliasing (1.0.2)
23 * 2005-03-13 fl export Comment and ProcessingInstruction/PI helpers
24 * 2005-03-26 fl added Comment and PI support to XMLParser
25 * 2005-03-27 fl event optimizations; complain about bogus events
26 * 2005-08-08 fl fixed read error handling in parse
27 * 2005-08-11 fl added runtime test for copy workaround (1.0.3)
28 * 2005-12-13 fl added expat_capi support (for xml.etree) (1.0.4)
29 * 2005-12-16 fl added support for non-standard encodings
Fredrik Lundh44ed4db2006-03-12 21:06:35 +000030 * 2006-03-08 fl fixed a couple of potential null-refs and leaks
31 * 2006-03-12 fl merge in 2.5 ssize_t changes
Florent Xicluna3e8c1892010-03-11 14:36:19 +000032 * 2007-08-25 fl call custom builder's close method from XMLParser
33 * 2007-08-31 fl added iter, extend from ET 1.3
34 * 2007-09-01 fl fixed ParseError exception, setslice source type, etc
35 * 2007-09-03 fl fixed handling of negative insert indexes
36 * 2007-09-04 fl added itertext from ET 1.3
37 * 2007-09-06 fl added position attribute to ParseError exception
38 * 2008-06-06 fl delay error reporting in iterparse (from Hrvoje Niksic)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000039 *
Florent Xicluna3e8c1892010-03-11 14:36:19 +000040 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
41 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000042 *
43 * info@pythonware.com
44 * http://www.pythonware.com
45 */
46
Fredrik Lundh6d52b552005-12-16 22:06:43 +000047/* Licensed to PSF under a Contributor Agreement. */
Florent Xicluna3e8c1892010-03-11 14:36:19 +000048/* See http://www.python.org/psf/license for licensing details. */
Fredrik Lundh6d52b552005-12-16 22:06:43 +000049
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000050#include "Python.h"
51
Fredrik Lundhdc075b92006-08-16 16:47:07 +000052#define VERSION "1.0.6"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000053
54/* -------------------------------------------------------------------- */
55/* configuration */
56
57/* Leave defined to include the expat-based XMLParser type */
58#define USE_EXPAT
59
Florent Xicluna3e8c1892010-03-11 14:36:19 +000060/* Define to do all expat calls via pyexpat's embedded expat library */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000061/* #define USE_PYEXPAT_CAPI */
62
63/* An element can hold this many children without extra memory
64 allocations. */
65#define STATIC_CHILDREN 4
66
67/* For best performance, chose a value so that 80-90% of all nodes
68 have no more than the given number of children. Set this to zero
69 to minimize the size of the element structure itself (this only
70 helps if you have lots of leaf nodes with attributes). */
71
72/* Also note that pymalloc always allocates blocks in multiples of
73 eight bytes. For the current version of cElementTree, this means
74 that the number of children should be an even number, at least on
75 32-bit platforms. */
76
77/* -------------------------------------------------------------------- */
78
79#if 0
80static int memory = 0;
81#define ALLOC(size, comment)\
82do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
83#define RELEASE(size, comment)\
84do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
85#else
86#define ALLOC(size, comment)
87#define RELEASE(size, comment)
88#endif
89
90/* compiler tweaks */
91#if defined(_MSC_VER)
92#define LOCAL(type) static __inline type __fastcall
93#else
94#define LOCAL(type) static type
95#endif
96
97/* compatibility macros */
Florent Xicluna3e8c1892010-03-11 14:36:19 +000098#if (PY_VERSION_HEX < 0x02060000)
99#define Py_REFCNT(ob) (((PyObject*)(ob))->ob_refcnt)
100#define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)
101#endif
102
Martin v. Löwis18e16552006-02-15 17:27:45 +0000103#if (PY_VERSION_HEX < 0x02050000)
104typedef int Py_ssize_t;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000105#define lenfunc inquiry
Martin v. Löwis18e16552006-02-15 17:27:45 +0000106#endif
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000107
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000108#if (PY_VERSION_HEX < 0x02040000)
109#define PyDict_CheckExact PyDict_Check
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000110
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000111#if !defined(Py_RETURN_NONE)
112#define Py_RETURN_NONE return Py_INCREF(Py_None), Py_None
113#endif
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000114#endif
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000115
116/* macros used to store 'join' flags in string object pointers. note
117 that all use of text and tail as object pointers must be wrapped in
118 JOIN_OBJ. see comments in the ElementObject definition for more
119 info. */
120#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
121#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
122#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~1))
123
124/* glue functions (see the init function for details) */
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000125static PyObject* elementtree_parseerror_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000126static PyObject* elementtree_copyelement_obj;
127static PyObject* elementtree_deepcopy_obj;
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000128static PyObject* elementtree_iter_obj;
129static PyObject* elementtree_itertext_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000130static PyObject* elementpath_obj;
131
132/* helpers */
133
134LOCAL(PyObject*)
135deepcopy(PyObject* object, PyObject* memo)
136{
137 /* do a deep copy of the given object */
138
139 PyObject* args;
140 PyObject* result;
141
142 if (!elementtree_deepcopy_obj) {
143 PyErr_SetString(
144 PyExc_RuntimeError,
145 "deepcopy helper not found"
146 );
147 return NULL;
148 }
149
150 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000151 if (!args)
152 return NULL;
153
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000154 Py_INCREF(object); PyTuple_SET_ITEM(args, 0, (PyObject*) object);
155 Py_INCREF(memo); PyTuple_SET_ITEM(args, 1, (PyObject*) memo);
156
157 result = PyObject_CallObject(elementtree_deepcopy_obj, args);
158
159 Py_DECREF(args);
160
161 return result;
162}
163
164LOCAL(PyObject*)
165list_join(PyObject* list)
166{
167 /* join list elements (destroying the list in the process) */
168
169 PyObject* joiner;
170 PyObject* function;
171 PyObject* args;
172 PyObject* result;
173
174 switch (PyList_GET_SIZE(list)) {
175 case 0:
176 Py_DECREF(list);
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000177 return PyString_FromString("");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000178 case 1:
179 result = PyList_GET_ITEM(list, 0);
180 Py_INCREF(result);
181 Py_DECREF(list);
182 return result;
183 }
184
185 /* two or more elements: slice out a suitable separator from the
186 first member, and use that to join the entire list */
187
188 joiner = PySequence_GetSlice(PyList_GET_ITEM(list, 0), 0, 0);
189 if (!joiner)
190 return NULL;
191
192 function = PyObject_GetAttrString(joiner, "join");
193 if (!function) {
194 Py_DECREF(joiner);
195 return NULL;
196 }
197
198 args = PyTuple_New(1);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000199 if (!args)
200 return NULL;
201
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000202 PyTuple_SET_ITEM(args, 0, list);
203
204 result = PyObject_CallObject(function, args);
205
206 Py_DECREF(args); /* also removes list */
207 Py_DECREF(function);
208 Py_DECREF(joiner);
209
210 return result;
211}
212
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000213/* -------------------------------------------------------------------- */
214/* the element type */
215
216typedef struct {
217
218 /* attributes (a dictionary object), or None if no attributes */
219 PyObject* attrib;
220
221 /* child elements */
222 int length; /* actual number of items */
223 int allocated; /* allocated items */
224
225 /* this either points to _children or to a malloced buffer */
226 PyObject* *children;
227
228 PyObject* _children[STATIC_CHILDREN];
229
230} ElementObjectExtra;
231
232typedef struct {
233 PyObject_HEAD
234
235 /* element tag (a string). */
236 PyObject* tag;
237
238 /* text before first child. note that this is a tagged pointer;
239 use JOIN_OBJ to get the object pointer. the join flag is used
240 to distinguish lists created by the tree builder from lists
241 assigned to the attribute by application code; the former
242 should be joined before being returned to the user, the latter
243 should be left intact. */
244 PyObject* text;
245
246 /* text after this element, in parent. note that this is a tagged
247 pointer; use JOIN_OBJ to get the object pointer. */
248 PyObject* tail;
249
250 ElementObjectExtra* extra;
251
252} ElementObject;
253
254staticforward PyTypeObject Element_Type;
255
Christian Heimese93237d2007-12-19 02:37:44 +0000256#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000257
258/* -------------------------------------------------------------------- */
259/* element constructor and destructor */
260
261LOCAL(int)
262element_new_extra(ElementObject* self, PyObject* attrib)
263{
264 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
265 if (!self->extra)
266 return -1;
267
268 if (!attrib)
269 attrib = Py_None;
270
271 Py_INCREF(attrib);
272 self->extra->attrib = attrib;
273
274 self->extra->length = 0;
275 self->extra->allocated = STATIC_CHILDREN;
276 self->extra->children = self->extra->_children;
277
278 return 0;
279}
280
281LOCAL(void)
282element_dealloc_extra(ElementObject* self)
283{
284 int i;
285
286 Py_DECREF(self->extra->attrib);
287
288 for (i = 0; i < self->extra->length; i++)
289 Py_DECREF(self->extra->children[i]);
290
291 if (self->extra->children != self->extra->_children)
292 PyObject_Free(self->extra->children);
293
294 PyObject_Free(self->extra);
295}
296
297LOCAL(PyObject*)
298element_new(PyObject* tag, PyObject* attrib)
299{
300 ElementObject* self;
301
302 self = PyObject_New(ElementObject, &Element_Type);
303 if (self == NULL)
304 return NULL;
305
306 /* use None for empty dictionaries */
307 if (PyDict_CheckExact(attrib) && !PyDict_Size(attrib))
308 attrib = Py_None;
309
310 self->extra = NULL;
311
312 if (attrib != Py_None) {
313
Neal Norwitzc6a989a2006-05-10 06:57:58 +0000314 if (element_new_extra(self, attrib) < 0) {
315 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000316 return NULL;
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000317 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000318
319 self->extra->length = 0;
320 self->extra->allocated = STATIC_CHILDREN;
321 self->extra->children = self->extra->_children;
322
323 }
324
325 Py_INCREF(tag);
326 self->tag = tag;
327
328 Py_INCREF(Py_None);
329 self->text = Py_None;
330
331 Py_INCREF(Py_None);
332 self->tail = Py_None;
333
334 ALLOC(sizeof(ElementObject), "create element");
335
336 return (PyObject*) self;
337}
338
339LOCAL(int)
Serhiy Storchakac4c64be2015-11-25 20:12:58 +0200340element_resize(ElementObject* self, Py_ssize_t extra)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000341{
Serhiy Storchakac4c64be2015-11-25 20:12:58 +0200342 Py_ssize_t size;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000343 PyObject* *children;
344
345 /* make sure self->children can hold the given number of extra
346 elements. set an exception and return -1 if allocation failed */
347
348 if (!self->extra)
349 element_new_extra(self, NULL);
350
351 size = self->extra->length + extra;
352
353 if (size > self->extra->allocated) {
354 /* use Python 2.4's list growth strategy */
355 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes87dcf3d2008-01-18 08:04:57 +0000356 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
357 * which needs at least 4 bytes.
358 * Although it's a false alarm always assume at least one child to
359 * be safe.
360 */
361 size = size ? size : 1;
Serhiy Storchakac4c64be2015-11-25 20:12:58 +0200362 if ((size_t)size > PY_SSIZE_T_MAX/sizeof(PyObject*))
363 goto nomemory;
364 if (size > INT_MAX) {
365 PyErr_SetString(PyExc_OverflowError,
366 "too many children");
367 return -1;
368 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000369 if (self->extra->children != self->extra->_children) {
Christian Heimes87dcf3d2008-01-18 08:04:57 +0000370 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
371 * "children", which needs at least 4 bytes. Although it's a
372 * false alarm always assume at least one child to be safe.
373 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000374 children = PyObject_Realloc(self->extra->children,
375 size * sizeof(PyObject*));
376 if (!children)
377 goto nomemory;
378 } else {
379 children = PyObject_Malloc(size * sizeof(PyObject*));
380 if (!children)
381 goto nomemory;
382 /* copy existing children from static area to malloc buffer */
383 memcpy(children, self->extra->children,
384 self->extra->length * sizeof(PyObject*));
385 }
386 self->extra->children = children;
387 self->extra->allocated = size;
388 }
389
390 return 0;
391
392 nomemory:
393 PyErr_NoMemory();
394 return -1;
395}
396
397LOCAL(int)
398element_add_subelement(ElementObject* self, PyObject* element)
399{
400 /* add a child element to a parent */
401
402 if (element_resize(self, 1) < 0)
403 return -1;
404
405 Py_INCREF(element);
406 self->extra->children[self->extra->length] = element;
407
408 self->extra->length++;
409
410 return 0;
411}
412
413LOCAL(PyObject*)
414element_get_attrib(ElementObject* self)
415{
416 /* return borrowed reference to attrib dictionary */
417 /* note: this function assumes that the extra section exists */
418
419 PyObject* res = self->extra->attrib;
420
421 if (res == Py_None) {
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000422 Py_DECREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000423 /* create missing dictionary */
424 res = PyDict_New();
425 if (!res)
426 return NULL;
427 self->extra->attrib = res;
428 }
429
430 return res;
431}
432
433LOCAL(PyObject*)
434element_get_text(ElementObject* self)
435{
436 /* return borrowed reference to text attribute */
437
438 PyObject* res = self->text;
439
440 if (JOIN_GET(res)) {
441 res = JOIN_OBJ(res);
442 if (PyList_CheckExact(res)) {
443 res = list_join(res);
444 if (!res)
445 return NULL;
446 self->text = res;
447 }
448 }
449
450 return res;
451}
452
453LOCAL(PyObject*)
454element_get_tail(ElementObject* self)
455{
456 /* return borrowed reference to text attribute */
457
458 PyObject* res = self->tail;
459
460 if (JOIN_GET(res)) {
461 res = JOIN_OBJ(res);
462 if (PyList_CheckExact(res)) {
463 res = list_join(res);
464 if (!res)
465 return NULL;
466 self->tail = res;
467 }
468 }
469
470 return res;
471}
472
473static PyObject*
474element(PyObject* self, PyObject* args, PyObject* kw)
475{
476 PyObject* elem;
477
478 PyObject* tag;
479 PyObject* attrib = NULL;
480 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag,
481 &PyDict_Type, &attrib))
482 return NULL;
483
484 if (attrib || kw) {
485 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
486 if (!attrib)
487 return NULL;
488 if (kw)
489 PyDict_Update(attrib, kw);
490 } else {
491 Py_INCREF(Py_None);
492 attrib = Py_None;
493 }
494
495 elem = element_new(tag, attrib);
496
497 Py_DECREF(attrib);
498
499 return elem;
500}
501
502static PyObject*
503subelement(PyObject* self, PyObject* args, PyObject* kw)
504{
505 PyObject* elem;
506
507 ElementObject* parent;
508 PyObject* tag;
509 PyObject* attrib = NULL;
510 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
511 &Element_Type, &parent, &tag,
512 &PyDict_Type, &attrib))
513 return NULL;
514
515 if (attrib || kw) {
516 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
517 if (!attrib)
518 return NULL;
519 if (kw)
520 PyDict_Update(attrib, kw);
521 } else {
522 Py_INCREF(Py_None);
523 attrib = Py_None;
524 }
525
526 elem = element_new(tag, attrib);
527
528 Py_DECREF(attrib);
529
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000530 if (element_add_subelement(parent, elem) < 0) {
531 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000532 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000533 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000534
535 return elem;
536}
537
538static void
539element_dealloc(ElementObject* self)
540{
541 if (self->extra)
542 element_dealloc_extra(self);
543
544 /* discard attributes */
545 Py_DECREF(self->tag);
546 Py_DECREF(JOIN_OBJ(self->text));
547 Py_DECREF(JOIN_OBJ(self->tail));
548
549 RELEASE(sizeof(ElementObject), "destroy element");
550
551 PyObject_Del(self);
552}
553
554/* -------------------------------------------------------------------- */
555/* methods (in alphabetical order) */
556
557static PyObject*
558element_append(ElementObject* self, PyObject* args)
559{
560 PyObject* element;
561 if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element))
562 return NULL;
563
564 if (element_add_subelement(self, element) < 0)
565 return NULL;
566
567 Py_RETURN_NONE;
568}
569
570static PyObject*
571element_clear(ElementObject* self, PyObject* args)
572{
573 if (!PyArg_ParseTuple(args, ":clear"))
574 return NULL;
575
576 if (self->extra) {
577 element_dealloc_extra(self);
578 self->extra = NULL;
579 }
580
581 Py_INCREF(Py_None);
582 Py_DECREF(JOIN_OBJ(self->text));
583 self->text = Py_None;
584
585 Py_INCREF(Py_None);
586 Py_DECREF(JOIN_OBJ(self->tail));
587 self->tail = Py_None;
588
589 Py_RETURN_NONE;
590}
591
592static PyObject*
593element_copy(ElementObject* self, PyObject* args)
594{
595 int i;
596 ElementObject* element;
597
598 if (!PyArg_ParseTuple(args, ":__copy__"))
599 return NULL;
600
601 element = (ElementObject*) element_new(
602 self->tag, (self->extra) ? self->extra->attrib : Py_None
603 );
604 if (!element)
605 return NULL;
606
607 Py_DECREF(JOIN_OBJ(element->text));
608 element->text = self->text;
609 Py_INCREF(JOIN_OBJ(element->text));
610
611 Py_DECREF(JOIN_OBJ(element->tail));
612 element->tail = self->tail;
613 Py_INCREF(JOIN_OBJ(element->tail));
614
615 if (self->extra) {
616
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000617 if (element_resize(element, self->extra->length) < 0) {
618 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000619 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000620 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000621
622 for (i = 0; i < self->extra->length; i++) {
623 Py_INCREF(self->extra->children[i]);
624 element->extra->children[i] = self->extra->children[i];
625 }
626
627 element->extra->length = self->extra->length;
628
629 }
630
631 return (PyObject*) element;
632}
633
634static PyObject*
635element_deepcopy(ElementObject* self, PyObject* args)
636{
637 int i;
638 ElementObject* element;
639 PyObject* tag;
640 PyObject* attrib;
641 PyObject* text;
642 PyObject* tail;
643 PyObject* id;
644
645 PyObject* memo;
646 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
647 return NULL;
648
649 tag = deepcopy(self->tag, memo);
650 if (!tag)
651 return NULL;
652
653 if (self->extra) {
654 attrib = deepcopy(self->extra->attrib, memo);
655 if (!attrib) {
656 Py_DECREF(tag);
657 return NULL;
658 }
659 } else {
660 Py_INCREF(Py_None);
661 attrib = Py_None;
662 }
663
664 element = (ElementObject*) element_new(tag, attrib);
665
666 Py_DECREF(tag);
667 Py_DECREF(attrib);
668
669 if (!element)
670 return NULL;
671
672 text = deepcopy(JOIN_OBJ(self->text), memo);
673 if (!text)
674 goto error;
675 Py_DECREF(element->text);
676 element->text = JOIN_SET(text, JOIN_GET(self->text));
677
678 tail = deepcopy(JOIN_OBJ(self->tail), memo);
679 if (!tail)
680 goto error;
681 Py_DECREF(element->tail);
682 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
683
684 if (self->extra) {
685
686 if (element_resize(element, self->extra->length) < 0)
687 goto error;
688
689 for (i = 0; i < self->extra->length; i++) {
690 PyObject* child = deepcopy(self->extra->children[i], memo);
691 if (!child) {
692 element->extra->length = i;
693 goto error;
694 }
695 element->extra->children[i] = child;
696 }
697
698 element->extra->length = self->extra->length;
699
700 }
701
702 /* add object to memo dictionary (so deepcopy won't visit it again) */
703 id = PyInt_FromLong((Py_uintptr_t) self);
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000704 if (!id)
705 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000706
707 i = PyDict_SetItem(memo, id, (PyObject*) element);
708
709 Py_DECREF(id);
710
711 if (i < 0)
712 goto error;
713
714 return (PyObject*) element;
715
716 error:
717 Py_DECREF(element);
718 return NULL;
719}
720
721LOCAL(int)
722checkpath(PyObject* tag)
723{
Neal Norwitzc7074382006-06-12 02:06:17 +0000724 Py_ssize_t i;
725 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000726
727 /* check if a tag contains an xpath character */
728
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000729#define PATHCHAR(ch) \
730 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000731
732#if defined(Py_USING_UNICODE)
733 if (PyUnicode_Check(tag)) {
734 Py_UNICODE *p = PyUnicode_AS_UNICODE(tag);
735 for (i = 0; i < PyUnicode_GET_SIZE(tag); i++) {
736 if (p[i] == '{')
737 check = 0;
738 else if (p[i] == '}')
739 check = 1;
740 else if (check && PATHCHAR(p[i]))
741 return 1;
742 }
743 return 0;
744 }
745#endif
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000746 if (PyString_Check(tag)) {
747 char *p = PyString_AS_STRING(tag);
748 for (i = 0; i < PyString_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000749 if (p[i] == '{')
750 check = 0;
751 else if (p[i] == '}')
752 check = 1;
753 else if (check && PATHCHAR(p[i]))
754 return 1;
755 }
756 return 0;
757 }
758
759 return 1; /* unknown type; might be path expression */
760}
761
762static PyObject*
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000763element_extend(ElementObject* self, PyObject* args)
764{
765 PyObject* seq;
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300766 Py_ssize_t i;
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000767
768 PyObject* seq_in;
769 if (!PyArg_ParseTuple(args, "O:extend", &seq_in))
770 return NULL;
771
772 seq = PySequence_Fast(seq_in, "");
773 if (!seq) {
774 PyErr_Format(
775 PyExc_TypeError,
776 "expected sequence, not \"%.200s\"", Py_TYPE(seq_in)->tp_name
777 );
778 return NULL;
779 }
780
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300781 for (i = 0; i < PySequence_Fast_GET_SIZE(seq); i++) {
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000782 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
783 if (element_add_subelement(self, element) < 0) {
784 Py_DECREF(seq);
785 return NULL;
786 }
787 }
788
789 Py_DECREF(seq);
790
791 Py_RETURN_NONE;
792}
793
794static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000795element_find(ElementObject* self, PyObject* args)
796{
797 int i;
798
799 PyObject* tag;
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000800 PyObject* namespaces = Py_None;
801 if (!PyArg_ParseTuple(args, "O|O:find", &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000802 return NULL;
803
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000804 if (checkpath(tag) || namespaces != Py_None)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000805 return PyObject_CallMethod(
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000806 elementpath_obj, "find", "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000807 );
808
809 if (!self->extra)
810 Py_RETURN_NONE;
811
812 for (i = 0; i < self->extra->length; i++) {
813 PyObject* item = self->extra->children[i];
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300814 int rc;
815 if (!Element_CheckExact(item))
816 continue;
817 Py_INCREF(item);
818 rc = PyObject_Compare(((ElementObject*)item)->tag, tag);
819 if (rc == 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000820 return item;
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300821 Py_DECREF(item);
822 if (rc < 0 && PyErr_Occurred())
823 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000824 }
825
826 Py_RETURN_NONE;
827}
828
829static PyObject*
830element_findtext(ElementObject* self, PyObject* args)
831{
832 int i;
833
834 PyObject* tag;
835 PyObject* default_value = Py_None;
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000836 PyObject* namespaces = Py_None;
837 if (!PyArg_ParseTuple(args, "O|OO:findtext", &tag, &default_value, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000838 return NULL;
839
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000840 if (checkpath(tag) || namespaces != Py_None)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000841 return PyObject_CallMethod(
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000842 elementpath_obj, "findtext", "OOOO", self, tag, default_value, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000843 );
844
845 if (!self->extra) {
846 Py_INCREF(default_value);
847 return default_value;
848 }
849
850 for (i = 0; i < self->extra->length; i++) {
851 ElementObject* item = (ElementObject*) self->extra->children[i];
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300852 int rc;
853 if (!Element_CheckExact(item))
854 continue;
855 Py_INCREF(item);
856 rc = PyObject_Compare(item->tag, tag);
857 if (rc == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000858 PyObject* text = element_get_text(item);
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300859 if (text == Py_None) {
860 Py_DECREF(item);
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000861 return PyString_FromString("");
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300862 }
Neal Norwitz6f5ff3f2006-08-12 01:43:40 +0000863 Py_XINCREF(text);
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300864 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000865 return text;
866 }
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300867 Py_DECREF(item);
868 if (rc < 0 && PyErr_Occurred())
869 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000870 }
871
872 Py_INCREF(default_value);
873 return default_value;
874}
875
876static PyObject*
877element_findall(ElementObject* self, PyObject* args)
878{
879 int i;
880 PyObject* out;
881
882 PyObject* tag;
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000883 PyObject* namespaces = Py_None;
884 if (!PyArg_ParseTuple(args, "O|O:findall", &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000885 return NULL;
886
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000887 if (checkpath(tag) || namespaces != Py_None)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000888 return PyObject_CallMethod(
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000889 elementpath_obj, "findall", "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000890 );
891
892 out = PyList_New(0);
893 if (!out)
894 return NULL;
895
896 if (!self->extra)
897 return out;
898
899 for (i = 0; i < self->extra->length; i++) {
900 PyObject* item = self->extra->children[i];
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300901 int rc;
902 if (!Element_CheckExact(item))
903 continue;
904 Py_INCREF(item);
905 rc = PyObject_Compare(((ElementObject*)item)->tag, tag);
906 if (rc == 0)
907 rc = PyList_Append(out, item);
908 Py_DECREF(item);
909 if (rc < 0 && PyErr_Occurred()) {
910 Py_DECREF(out);
911 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000912 }
913 }
914
915 return out;
916}
917
918static PyObject*
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000919element_iterfind(ElementObject* self, PyObject* args)
920{
921 PyObject* tag;
922 PyObject* namespaces = Py_None;
923 if (!PyArg_ParseTuple(args, "O|O:iterfind", &tag, &namespaces))
924 return NULL;
925
926 return PyObject_CallMethod(
927 elementpath_obj, "iterfind", "OOO", self, tag, namespaces
928 );
929}
930
931static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000932element_get(ElementObject* self, PyObject* args)
933{
934 PyObject* value;
935
936 PyObject* key;
937 PyObject* default_value = Py_None;
938 if (!PyArg_ParseTuple(args, "O|O:get", &key, &default_value))
939 return NULL;
940
941 if (!self->extra || self->extra->attrib == Py_None)
942 value = default_value;
943 else {
944 value = PyDict_GetItem(self->extra->attrib, key);
945 if (!value)
946 value = default_value;
947 }
948
949 Py_INCREF(value);
950 return value;
951}
952
953static PyObject*
954element_getchildren(ElementObject* self, PyObject* args)
955{
956 int i;
957 PyObject* list;
958
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000959 /* FIXME: report as deprecated? */
960
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000961 if (!PyArg_ParseTuple(args, ":getchildren"))
962 return NULL;
963
964 if (!self->extra)
965 return PyList_New(0);
966
967 list = PyList_New(self->extra->length);
968 if (!list)
969 return NULL;
970
971 for (i = 0; i < self->extra->length; i++) {
972 PyObject* item = self->extra->children[i];
973 Py_INCREF(item);
974 PyList_SET_ITEM(list, i, item);
975 }
976
977 return list;
978}
979
980static PyObject*
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000981element_iter(ElementObject* self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000982{
983 PyObject* result;
984
985 PyObject* tag = Py_None;
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000986 if (!PyArg_ParseTuple(args, "|O:iter", &tag))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000987 return NULL;
988
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000989 if (!elementtree_iter_obj) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000990 PyErr_SetString(
991 PyExc_RuntimeError,
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000992 "iter helper not found"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000993 );
994 return NULL;
995 }
996
997 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000998 if (!args)
999 return NULL;
Neal Norwitz02876df2006-02-07 06:58:52 +00001000
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001001 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
1002 Py_INCREF(tag); PyTuple_SET_ITEM(args, 1, (PyObject*) tag);
1003
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001004 result = PyObject_CallObject(elementtree_iter_obj, args);
1005
1006 Py_DECREF(args);
1007
1008 return result;
1009}
1010
1011
1012static PyObject*
1013element_itertext(ElementObject* self, PyObject* args)
1014{
1015 PyObject* result;
1016
1017 if (!PyArg_ParseTuple(args, ":itertext"))
1018 return NULL;
1019
1020 if (!elementtree_itertext_obj) {
1021 PyErr_SetString(
1022 PyExc_RuntimeError,
1023 "itertext helper not found"
1024 );
1025 return NULL;
1026 }
1027
1028 args = PyTuple_New(1);
1029 if (!args)
1030 return NULL;
1031
1032 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
1033
1034 result = PyObject_CallObject(elementtree_itertext_obj, args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001035
1036 Py_DECREF(args);
1037
1038 return result;
1039}
1040
1041static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001042element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001043{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001044 ElementObject* self = (ElementObject*) self_;
1045
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001046 if (!self->extra || index < 0 || index >= self->extra->length) {
1047 PyErr_SetString(
1048 PyExc_IndexError,
1049 "child index out of range"
1050 );
1051 return NULL;
1052 }
1053
1054 Py_INCREF(self->extra->children[index]);
1055 return self->extra->children[index];
1056}
1057
1058static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001059element_insert(ElementObject* self, PyObject* args)
1060{
1061 int i;
1062
1063 int index;
1064 PyObject* element;
1065 if (!PyArg_ParseTuple(args, "iO!:insert", &index,
1066 &Element_Type, &element))
1067 return NULL;
1068
1069 if (!self->extra)
1070 element_new_extra(self, NULL);
1071
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001072 if (index < 0) {
1073 index += self->extra->length;
1074 if (index < 0)
1075 index = 0;
1076 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001077 if (index > self->extra->length)
1078 index = self->extra->length;
1079
1080 if (element_resize(self, 1) < 0)
1081 return NULL;
1082
1083 for (i = self->extra->length; i > index; i--)
1084 self->extra->children[i] = self->extra->children[i-1];
1085
1086 Py_INCREF(element);
1087 self->extra->children[index] = element;
1088
1089 self->extra->length++;
1090
1091 Py_RETURN_NONE;
1092}
1093
1094static PyObject*
1095element_items(ElementObject* self, PyObject* args)
1096{
1097 if (!PyArg_ParseTuple(args, ":items"))
1098 return NULL;
1099
1100 if (!self->extra || self->extra->attrib == Py_None)
1101 return PyList_New(0);
1102
1103 return PyDict_Items(self->extra->attrib);
1104}
1105
1106static PyObject*
1107element_keys(ElementObject* self, PyObject* args)
1108{
1109 if (!PyArg_ParseTuple(args, ":keys"))
1110 return NULL;
1111
1112 if (!self->extra || self->extra->attrib == Py_None)
1113 return PyList_New(0);
1114
1115 return PyDict_Keys(self->extra->attrib);
1116}
1117
Martin v. Löwis18e16552006-02-15 17:27:45 +00001118static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001119element_length(ElementObject* self)
1120{
1121 if (!self->extra)
1122 return 0;
1123
1124 return self->extra->length;
1125}
1126
1127static PyObject*
1128element_makeelement(PyObject* self, PyObject* args, PyObject* kw)
1129{
1130 PyObject* elem;
1131
1132 PyObject* tag;
1133 PyObject* attrib;
1134 if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib))
1135 return NULL;
1136
1137 attrib = PyDict_Copy(attrib);
1138 if (!attrib)
1139 return NULL;
1140
1141 elem = element_new(tag, attrib);
1142
1143 Py_DECREF(attrib);
1144
1145 return elem;
1146}
1147
1148static PyObject*
1149element_reduce(ElementObject* self, PyObject* args)
1150{
1151 if (!PyArg_ParseTuple(args, ":__reduce__"))
1152 return NULL;
1153
1154 /* Hack alert: This method is used to work around a __copy__
1155 problem on certain 2.3 and 2.4 versions. To save time and
1156 simplify the code, we create the copy in here, and use a dummy
1157 copyelement helper to trick the copy module into doing the
1158 right thing. */
1159
1160 if (!elementtree_copyelement_obj) {
1161 PyErr_SetString(
1162 PyExc_RuntimeError,
1163 "copyelement helper not found"
1164 );
1165 return NULL;
1166 }
1167
1168 return Py_BuildValue(
1169 "O(N)", elementtree_copyelement_obj, element_copy(self, args)
1170 );
1171}
1172
1173static PyObject*
1174element_remove(ElementObject* self, PyObject* args)
1175{
1176 int i;
Serhiy Storchaka25598f32015-05-18 18:28:57 +03001177 int rc;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001178 PyObject* element;
Serhiy Storchaka25598f32015-05-18 18:28:57 +03001179 PyObject* found;
1180
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001181 if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element))
1182 return NULL;
1183
1184 if (!self->extra) {
1185 /* element has no children, so raise exception */
1186 PyErr_SetString(
1187 PyExc_ValueError,
1188 "list.remove(x): x not in list"
1189 );
1190 return NULL;
1191 }
1192
1193 for (i = 0; i < self->extra->length; i++) {
1194 if (self->extra->children[i] == element)
1195 break;
Serhiy Storchaka25598f32015-05-18 18:28:57 +03001196 rc = PyObject_Compare(self->extra->children[i], element);
1197 if (rc == 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001198 break;
Serhiy Storchaka25598f32015-05-18 18:28:57 +03001199 if (rc < 0 && PyErr_Occurred())
1200 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001201 }
1202
Serhiy Storchaka25598f32015-05-18 18:28:57 +03001203 if (i >= self->extra->length) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001204 /* element is not in children, so raise exception */
1205 PyErr_SetString(
1206 PyExc_ValueError,
1207 "list.remove(x): x not in list"
1208 );
1209 return NULL;
1210 }
1211
Serhiy Storchaka25598f32015-05-18 18:28:57 +03001212 found = self->extra->children[i];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001213
1214 self->extra->length--;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001215 for (; i < self->extra->length; i++)
1216 self->extra->children[i] = self->extra->children[i+1];
1217
Serhiy Storchaka25598f32015-05-18 18:28:57 +03001218 Py_DECREF(found);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001219 Py_RETURN_NONE;
1220}
1221
1222static PyObject*
1223element_repr(ElementObject* self)
1224{
Florent Xiclunae2e81e82010-03-11 15:55:11 +00001225 PyObject *repr, *tag;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001226
Florent Xiclunae2e81e82010-03-11 15:55:11 +00001227 tag = PyObject_Repr(self->tag);
1228 if (!tag)
1229 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001230
Florent Xiclunae2e81e82010-03-11 15:55:11 +00001231 repr = PyString_FromFormat("<Element %s at %p>",
1232 PyString_AS_STRING(tag), self);
1233
1234 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001235
1236 return repr;
1237}
1238
1239static PyObject*
1240element_set(ElementObject* self, PyObject* args)
1241{
1242 PyObject* attrib;
1243
1244 PyObject* key;
1245 PyObject* value;
1246 if (!PyArg_ParseTuple(args, "OO:set", &key, &value))
1247 return NULL;
1248
1249 if (!self->extra)
1250 element_new_extra(self, NULL);
1251
1252 attrib = element_get_attrib(self);
1253 if (!attrib)
1254 return NULL;
1255
1256 if (PyDict_SetItem(attrib, key, value) < 0)
1257 return NULL;
1258
1259 Py_RETURN_NONE;
1260}
1261
1262static int
Serhiy Storchakab5b76c32015-11-26 11:21:47 +02001263element_setitem(PyObject* self_, Py_ssize_t index_, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001264{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001265 ElementObject* self = (ElementObject*) self_;
Serhiy Storchakac4c64be2015-11-25 20:12:58 +02001266 int i, index;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001267 PyObject* old;
1268
Serhiy Storchakac4c64be2015-11-25 20:12:58 +02001269 if (!self->extra || index_ < 0 || index_ >= self->extra->length) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001270 PyErr_SetString(
1271 PyExc_IndexError,
1272 "child assignment index out of range");
1273 return -1;
1274 }
Serhiy Storchakac4c64be2015-11-25 20:12:58 +02001275 index = (int)index_;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001276
1277 old = self->extra->children[index];
1278
1279 if (item) {
1280 Py_INCREF(item);
1281 self->extra->children[index] = item;
1282 } else {
1283 self->extra->length--;
1284 for (i = index; i < self->extra->length; i++)
1285 self->extra->children[i] = self->extra->children[i+1];
1286 }
1287
1288 Py_DECREF(old);
1289
1290 return 0;
1291}
1292
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001293static PyObject*
1294element_subscr(PyObject* self_, PyObject* item)
1295{
1296 ElementObject* self = (ElementObject*) self_;
1297
1298#if (PY_VERSION_HEX < 0x02050000)
1299 if (PyInt_Check(item) || PyLong_Check(item)) {
1300 long i = PyInt_AsLong(item);
1301#else
1302 if (PyIndex_Check(item)) {
1303 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1304#endif
1305
1306 if (i == -1 && PyErr_Occurred()) {
1307 return NULL;
1308 }
1309 if (i < 0 && self->extra)
1310 i += self->extra->length;
1311 return element_getitem(self_, i);
1312 }
1313 else if (PySlice_Check(item)) {
1314 Py_ssize_t start, stop, step, slicelen, cur, i;
1315 PyObject* list;
1316
1317 if (!self->extra)
1318 return PyList_New(0);
1319
1320 if (PySlice_GetIndicesEx((PySliceObject *)item,
1321 self->extra->length,
1322 &start, &stop, &step, &slicelen) < 0) {
1323 return NULL;
1324 }
1325
1326 if (slicelen <= 0)
1327 return PyList_New(0);
1328 else {
1329 list = PyList_New(slicelen);
1330 if (!list)
1331 return NULL;
1332
1333 for (cur = start, i = 0; i < slicelen;
1334 cur += step, i++) {
1335 PyObject* item = self->extra->children[cur];
1336 Py_INCREF(item);
1337 PyList_SET_ITEM(list, i, item);
1338 }
1339
1340 return list;
1341 }
1342 }
1343 else {
1344 PyErr_SetString(PyExc_TypeError,
1345 "element indices must be integers");
1346 return NULL;
1347 }
1348}
1349
1350static int
1351element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1352{
1353 ElementObject* self = (ElementObject*) self_;
1354
1355#if (PY_VERSION_HEX < 0x02050000)
1356 if (PyInt_Check(item) || PyLong_Check(item)) {
1357 long i = PyInt_AsLong(item);
1358#else
1359 if (PyIndex_Check(item)) {
1360 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1361#endif
1362
1363 if (i == -1 && PyErr_Occurred()) {
1364 return -1;
1365 }
1366 if (i < 0 && self->extra)
1367 i += self->extra->length;
1368 return element_setitem(self_, i, value);
1369 }
1370 else if (PySlice_Check(item)) {
1371 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1372
1373 PyObject* recycle = NULL;
1374 PyObject* seq = NULL;
1375
1376 if (!self->extra)
1377 element_new_extra(self, NULL);
1378
1379 if (PySlice_GetIndicesEx((PySliceObject *)item,
1380 self->extra->length,
1381 &start, &stop, &step, &slicelen) < 0) {
1382 return -1;
1383 }
Serhiy Storchakac4c64be2015-11-25 20:12:58 +02001384 assert(slicelen <= self->extra->length);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001385
1386 if (value == NULL)
1387 newlen = 0;
1388 else {
1389 seq = PySequence_Fast(value, "");
1390 if (!seq) {
1391 PyErr_Format(
1392 PyExc_TypeError,
1393 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1394 );
1395 return -1;
1396 }
1397 newlen = PySequence_Size(seq);
1398 }
1399
1400 if (step != 1 && newlen != slicelen)
1401 {
Serhiy Storchakaa0ae9ff2015-11-22 12:31:11 +02001402 Py_XDECREF(seq);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001403 PyErr_Format(PyExc_ValueError,
1404#if (PY_VERSION_HEX < 0x02050000)
1405 "attempt to assign sequence of size %d "
1406 "to extended slice of size %d",
Serhiy Storchakaa0ae9ff2015-11-22 12:31:11 +02001407 (int)newlen, (int)slicelen
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001408#else
1409 "attempt to assign sequence of size %zd "
1410 "to extended slice of size %zd",
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001411 newlen, slicelen
Serhiy Storchakaa0ae9ff2015-11-22 12:31:11 +02001412#endif
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001413 );
1414 return -1;
1415 }
1416
1417
1418 /* Resize before creating the recycle bin, to prevent refleaks. */
1419 if (newlen > slicelen) {
1420 if (element_resize(self, newlen - slicelen) < 0) {
Serhiy Storchakaa0ae9ff2015-11-22 12:31:11 +02001421 Py_XDECREF(seq);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001422 return -1;
1423 }
1424 }
Serhiy Storchakac4c64be2015-11-25 20:12:58 +02001425 assert(newlen - slicelen <= INT_MAX - self->extra->length);
1426 assert(newlen - slicelen >= -self->extra->length);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001427
1428 if (slicelen > 0) {
1429 /* to avoid recursive calls to this method (via decref), move
1430 old items to the recycle bin here, and get rid of them when
1431 we're done modifying the element */
1432 recycle = PyList_New(slicelen);
1433 if (!recycle) {
Serhiy Storchakaa0ae9ff2015-11-22 12:31:11 +02001434 Py_XDECREF(seq);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001435 return -1;
1436 }
1437 for (cur = start, i = 0; i < slicelen;
1438 cur += step, i++)
1439 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1440 }
1441
1442 if (newlen < slicelen) {
1443 /* delete slice */
1444 for (i = stop; i < self->extra->length; i++)
1445 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1446 } else if (newlen > slicelen) {
1447 /* insert slice */
1448 for (i = self->extra->length-1; i >= stop; i--)
1449 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1450 }
1451
1452 /* replace the slice */
1453 for (cur = start, i = 0; i < newlen;
1454 cur += step, i++) {
1455 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1456 Py_INCREF(element);
1457 self->extra->children[cur] = element;
1458 }
1459
Serhiy Storchakac4c64be2015-11-25 20:12:58 +02001460 self->extra->length += (int)(newlen - slicelen);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001461
Serhiy Storchakaa0ae9ff2015-11-22 12:31:11 +02001462 Py_XDECREF(seq);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001463
1464 /* discard the recycle bin, and everything in it */
1465 Py_XDECREF(recycle);
1466
1467 return 0;
1468 }
1469 else {
1470 PyErr_SetString(PyExc_TypeError,
1471 "element indices must be integers");
1472 return -1;
1473 }
1474}
1475
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001476static PyMethodDef element_methods[] = {
1477
1478 {"clear", (PyCFunction) element_clear, METH_VARARGS},
1479
1480 {"get", (PyCFunction) element_get, METH_VARARGS},
1481 {"set", (PyCFunction) element_set, METH_VARARGS},
1482
1483 {"find", (PyCFunction) element_find, METH_VARARGS},
1484 {"findtext", (PyCFunction) element_findtext, METH_VARARGS},
1485 {"findall", (PyCFunction) element_findall, METH_VARARGS},
1486
1487 {"append", (PyCFunction) element_append, METH_VARARGS},
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001488 {"extend", (PyCFunction) element_extend, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001489 {"insert", (PyCFunction) element_insert, METH_VARARGS},
1490 {"remove", (PyCFunction) element_remove, METH_VARARGS},
1491
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001492 {"iter", (PyCFunction) element_iter, METH_VARARGS},
1493 {"itertext", (PyCFunction) element_itertext, METH_VARARGS},
1494 {"iterfind", (PyCFunction) element_iterfind, METH_VARARGS},
1495
1496 {"getiterator", (PyCFunction) element_iter, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001497 {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS},
1498
1499 {"items", (PyCFunction) element_items, METH_VARARGS},
1500 {"keys", (PyCFunction) element_keys, METH_VARARGS},
1501
1502 {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS},
1503
1504 {"__copy__", (PyCFunction) element_copy, METH_VARARGS},
1505 {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS},
1506
1507 /* Some 2.3 and 2.4 versions do not handle the __copy__ method on
1508 C objects correctly, so we have to fake it using a __reduce__-
1509 based hack (see the element_reduce implementation above for
1510 details). */
1511
1512 /* The behaviour has been changed in 2.3.5 and 2.4.1, so we're
1513 using a runtime test to figure out if we need to fake things
1514 or now (see the init code below). The following entry is
1515 enabled only if the hack is needed. */
1516
1517 {"!__reduce__", (PyCFunction) element_reduce, METH_VARARGS},
1518
1519 {NULL, NULL}
1520};
1521
1522static PyObject*
1523element_getattr(ElementObject* self, char* name)
1524{
1525 PyObject* res;
1526
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001527 /* handle common attributes first */
1528 if (strcmp(name, "tag") == 0) {
1529 res = self->tag;
1530 Py_INCREF(res);
1531 return res;
1532 } else if (strcmp(name, "text") == 0) {
1533 res = element_get_text(self);
1534 Py_INCREF(res);
1535 return res;
1536 }
1537
1538 /* methods */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001539 res = Py_FindMethod(element_methods, (PyObject*) self, name);
1540 if (res)
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001541 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001542
1543 PyErr_Clear();
1544
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001545 /* less common attributes */
1546 if (strcmp(name, "tail") == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001547 res = element_get_tail(self);
1548 } else if (strcmp(name, "attrib") == 0) {
1549 if (!self->extra)
1550 element_new_extra(self, NULL);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001551 res = element_get_attrib(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001552 } else {
1553 PyErr_SetString(PyExc_AttributeError, name);
1554 return NULL;
1555 }
1556
1557 if (!res)
1558 return NULL;
1559
1560 Py_INCREF(res);
1561 return res;
1562}
1563
1564static int
1565element_setattr(ElementObject* self, const char* name, PyObject* value)
1566{
1567 if (value == NULL) {
1568 PyErr_SetString(
1569 PyExc_AttributeError,
1570 "can't delete element attributes"
1571 );
1572 return -1;
1573 }
1574
1575 if (strcmp(name, "tag") == 0) {
Serhiy Storchaka2e6c8292015-12-27 15:41:58 +02001576 Py_INCREF(value);
1577 Py_SETREF(self->tag, value);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001578 } else if (strcmp(name, "text") == 0) {
1579 Py_DECREF(JOIN_OBJ(self->text));
1580 self->text = value;
1581 Py_INCREF(self->text);
1582 } else if (strcmp(name, "tail") == 0) {
1583 Py_DECREF(JOIN_OBJ(self->tail));
1584 self->tail = value;
1585 Py_INCREF(self->tail);
1586 } else if (strcmp(name, "attrib") == 0) {
1587 if (!self->extra)
1588 element_new_extra(self, NULL);
Serhiy Storchaka2e6c8292015-12-27 15:41:58 +02001589 Py_INCREF(value);
1590 Py_SETREF(self->extra->attrib, value);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001591 } else {
1592 PyErr_SetString(PyExc_AttributeError, name);
1593 return -1;
1594 }
1595
1596 return 0;
1597}
1598
1599static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001600 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001601 0, /* sq_concat */
1602 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001603 element_getitem,
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001604 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001605 element_setitem,
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001606 0,
1607};
1608
1609static PyMappingMethods element_as_mapping = {
1610 (lenfunc) element_length,
1611 (binaryfunc) element_subscr,
1612 (objobjargproc) element_ass_subscr,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001613};
1614
1615statichere PyTypeObject Element_Type = {
1616 PyObject_HEAD_INIT(NULL)
1617 0, "Element", sizeof(ElementObject), 0,
1618 /* methods */
1619 (destructor)element_dealloc, /* tp_dealloc */
1620 0, /* tp_print */
1621 (getattrfunc)element_getattr, /* tp_getattr */
1622 (setattrfunc)element_setattr, /* tp_setattr */
1623 0, /* tp_compare */
1624 (reprfunc)element_repr, /* tp_repr */
1625 0, /* tp_as_number */
1626 &element_as_sequence, /* tp_as_sequence */
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001627 &element_as_mapping, /* tp_as_mapping */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001628};
1629
1630/* ==================================================================== */
1631/* the tree builder type */
1632
1633typedef struct {
1634 PyObject_HEAD
1635
1636 PyObject* root; /* root node (first created node) */
1637
1638 ElementObject* this; /* current node */
1639 ElementObject* last; /* most recently created node */
1640
1641 PyObject* data; /* data collector (string or list), or NULL */
1642
1643 PyObject* stack; /* element stack */
Neal Norwitzc7074382006-06-12 02:06:17 +00001644 Py_ssize_t index; /* current stack size (0=empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001645
1646 /* element tracing */
1647 PyObject* events; /* list of events, or NULL if not collecting */
1648 PyObject* start_event_obj; /* event objects (NULL to ignore) */
1649 PyObject* end_event_obj;
1650 PyObject* start_ns_event_obj;
1651 PyObject* end_ns_event_obj;
1652
1653} TreeBuilderObject;
1654
1655staticforward PyTypeObject TreeBuilder_Type;
1656
Christian Heimese93237d2007-12-19 02:37:44 +00001657#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001658
1659/* -------------------------------------------------------------------- */
1660/* constructor and destructor */
1661
1662LOCAL(PyObject*)
1663treebuilder_new(void)
1664{
1665 TreeBuilderObject* self;
1666
1667 self = PyObject_New(TreeBuilderObject, &TreeBuilder_Type);
1668 if (self == NULL)
1669 return NULL;
1670
1671 self->root = NULL;
1672
1673 Py_INCREF(Py_None);
1674 self->this = (ElementObject*) Py_None;
1675
1676 Py_INCREF(Py_None);
1677 self->last = (ElementObject*) Py_None;
1678
1679 self->data = NULL;
1680
1681 self->stack = PyList_New(20);
1682 self->index = 0;
1683
1684 self->events = NULL;
1685 self->start_event_obj = self->end_event_obj = NULL;
1686 self->start_ns_event_obj = self->end_ns_event_obj = NULL;
1687
1688 ALLOC(sizeof(TreeBuilderObject), "create treebuilder");
1689
1690 return (PyObject*) self;
1691}
1692
1693static PyObject*
Fredrik Lundh81707f12006-06-03 21:56:05 +00001694treebuilder(PyObject* self_, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001695{
1696 if (!PyArg_ParseTuple(args, ":TreeBuilder"))
1697 return NULL;
1698
1699 return treebuilder_new();
1700}
1701
1702static void
1703treebuilder_dealloc(TreeBuilderObject* self)
1704{
1705 Py_XDECREF(self->end_ns_event_obj);
1706 Py_XDECREF(self->start_ns_event_obj);
1707 Py_XDECREF(self->end_event_obj);
1708 Py_XDECREF(self->start_event_obj);
1709 Py_XDECREF(self->events);
1710 Py_DECREF(self->stack);
1711 Py_XDECREF(self->data);
1712 Py_DECREF(self->last);
1713 Py_DECREF(self->this);
1714 Py_XDECREF(self->root);
1715
1716 RELEASE(sizeof(TreeBuilderObject), "destroy treebuilder");
1717
1718 PyObject_Del(self);
1719}
1720
Serhiy Storchaka45cf0b72015-12-06 23:51:53 +02001721LOCAL(int)
1722treebuilder_append_event(TreeBuilderObject *self, PyObject *action,
1723 PyObject *node)
1724{
1725 if (action != NULL) {
1726 PyObject *res = PyTuple_Pack(2, action, node);
1727 if (res == NULL)
1728 return -1;
1729 if (PyList_Append(self->events, res) < 0) {
1730 Py_DECREF(res);
1731 return -1;
1732 }
1733 Py_DECREF(res);
1734 }
1735 return 0;
1736}
1737
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001738/* -------------------------------------------------------------------- */
1739/* handlers */
1740
1741LOCAL(PyObject*)
1742treebuilder_handle_xml(TreeBuilderObject* self, PyObject* encoding,
1743 PyObject* standalone)
1744{
1745 Py_RETURN_NONE;
1746}
1747
1748LOCAL(PyObject*)
1749treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
1750 PyObject* attrib)
1751{
1752 PyObject* node;
1753 PyObject* this;
1754
1755 if (self->data) {
1756 if (self->this == self->last) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001757 Py_DECREF(JOIN_OBJ(self->last->text));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001758 self->last->text = JOIN_SET(
1759 self->data, PyList_CheckExact(self->data)
1760 );
1761 } else {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001762 Py_DECREF(JOIN_OBJ(self->last->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001763 self->last->tail = JOIN_SET(
1764 self->data, PyList_CheckExact(self->data)
1765 );
1766 }
1767 self->data = NULL;
1768 }
1769
1770 node = element_new(tag, attrib);
1771 if (!node)
1772 return NULL;
1773
1774 this = (PyObject*) self->this;
1775
1776 if (this != Py_None) {
1777 if (element_add_subelement((ElementObject*) this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001778 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001779 } else {
1780 if (self->root) {
1781 PyErr_SetString(
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001782 elementtree_parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001783 "multiple elements on top level"
1784 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001785 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001786 }
1787 Py_INCREF(node);
1788 self->root = node;
1789 }
1790
1791 if (self->index < PyList_GET_SIZE(self->stack)) {
1792 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001793 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001794 Py_INCREF(this);
1795 } else {
1796 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001797 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001798 }
1799 self->index++;
1800
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001801 Py_INCREF(node);
Serhiy Storchaka2e6c8292015-12-27 15:41:58 +02001802 Py_SETREF(self->this, (ElementObject*) node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001803
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001804 Py_INCREF(node);
Serhiy Storchaka2e6c8292015-12-27 15:41:58 +02001805 Py_SETREF(self->last, (ElementObject*) node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001806
Serhiy Storchaka45cf0b72015-12-06 23:51:53 +02001807 if (treebuilder_append_event(self, self->start_event_obj, node) < 0)
1808 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001809
1810 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001811
1812 error:
1813 Py_DECREF(node);
1814 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001815}
1816
1817LOCAL(PyObject*)
1818treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
1819{
1820 if (!self->data) {
Fredrik Lundhdc075b92006-08-16 16:47:07 +00001821 if (self->last == (ElementObject*) Py_None) {
1822 /* ignore calls to data before the first call to start */
1823 Py_RETURN_NONE;
1824 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001825 /* store the first item as is */
1826 Py_INCREF(data); self->data = data;
1827 } else {
1828 /* more than one item; use a list to collect items */
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001829 if (PyString_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
1830 PyString_CheckExact(data) && PyString_GET_SIZE(data) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001831 /* expat often generates single character data sections; handle
1832 the most common case by resizing the existing string... */
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001833 Py_ssize_t size = PyString_GET_SIZE(self->data);
1834 if (_PyString_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001835 return NULL;
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001836 PyString_AS_STRING(self->data)[size] = PyString_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001837 } else if (PyList_CheckExact(self->data)) {
1838 if (PyList_Append(self->data, data) < 0)
1839 return NULL;
1840 } else {
1841 PyObject* list = PyList_New(2);
1842 if (!list)
1843 return NULL;
1844 PyList_SET_ITEM(list, 0, self->data);
1845 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
1846 self->data = list;
1847 }
1848 }
1849
1850 Py_RETURN_NONE;
1851}
1852
1853LOCAL(PyObject*)
1854treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
1855{
Serhiy Storchaka2e6c8292015-12-27 15:41:58 +02001856 ElementObject *item;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001857
1858 if (self->data) {
1859 if (self->this == self->last) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001860 Py_DECREF(JOIN_OBJ(self->last->text));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001861 self->last->text = JOIN_SET(
1862 self->data, PyList_CheckExact(self->data)
1863 );
1864 } else {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001865 Py_DECREF(JOIN_OBJ(self->last->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001866 self->last->tail = JOIN_SET(
1867 self->data, PyList_CheckExact(self->data)
1868 );
1869 }
1870 self->data = NULL;
1871 }
1872
1873 if (self->index == 0) {
1874 PyErr_SetString(
1875 PyExc_IndexError,
1876 "pop from empty stack"
1877 );
1878 return NULL;
1879 }
1880
Serhiy Storchaka2e6c8292015-12-27 15:41:58 +02001881 item = self->last;
1882 self->last = self->this;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001883 self->index--;
Serhiy Storchaka2e6c8292015-12-27 15:41:58 +02001884 self->this = (ElementObject *) PyList_GET_ITEM(self->stack, self->index);
1885 Py_INCREF(self->this);
1886 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001887
Serhiy Storchaka45cf0b72015-12-06 23:51:53 +02001888 if (treebuilder_append_event(self, self->end_event_obj, (PyObject*)self->last) < 0)
1889 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001890
1891 Py_INCREF(self->last);
1892 return (PyObject*) self->last;
1893}
1894
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001895/* -------------------------------------------------------------------- */
1896/* methods (in alphabetical order) */
1897
1898static PyObject*
1899treebuilder_data(TreeBuilderObject* self, PyObject* args)
1900{
1901 PyObject* data;
1902 if (!PyArg_ParseTuple(args, "O:data", &data))
1903 return NULL;
1904
1905 return treebuilder_handle_data(self, data);
1906}
1907
1908static PyObject*
1909treebuilder_end(TreeBuilderObject* self, PyObject* args)
1910{
1911 PyObject* tag;
1912 if (!PyArg_ParseTuple(args, "O:end", &tag))
1913 return NULL;
1914
1915 return treebuilder_handle_end(self, tag);
1916}
1917
1918LOCAL(PyObject*)
1919treebuilder_done(TreeBuilderObject* self)
1920{
1921 PyObject* res;
1922
1923 /* FIXME: check stack size? */
1924
1925 if (self->root)
1926 res = self->root;
1927 else
1928 res = Py_None;
1929
1930 Py_INCREF(res);
1931 return res;
1932}
1933
1934static PyObject*
1935treebuilder_close(TreeBuilderObject* self, PyObject* args)
1936{
1937 if (!PyArg_ParseTuple(args, ":close"))
1938 return NULL;
1939
1940 return treebuilder_done(self);
1941}
1942
1943static PyObject*
1944treebuilder_start(TreeBuilderObject* self, PyObject* args)
1945{
1946 PyObject* tag;
1947 PyObject* attrib = Py_None;
1948 if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib))
1949 return NULL;
1950
1951 return treebuilder_handle_start(self, tag, attrib);
1952}
1953
1954static PyObject*
1955treebuilder_xml(TreeBuilderObject* self, PyObject* args)
1956{
1957 PyObject* encoding;
1958 PyObject* standalone;
1959 if (!PyArg_ParseTuple(args, "OO:xml", &encoding, &standalone))
1960 return NULL;
1961
1962 return treebuilder_handle_xml(self, encoding, standalone);
1963}
1964
1965static PyMethodDef treebuilder_methods[] = {
1966 {"data", (PyCFunction) treebuilder_data, METH_VARARGS},
1967 {"start", (PyCFunction) treebuilder_start, METH_VARARGS},
1968 {"end", (PyCFunction) treebuilder_end, METH_VARARGS},
1969 {"xml", (PyCFunction) treebuilder_xml, METH_VARARGS},
1970 {"close", (PyCFunction) treebuilder_close, METH_VARARGS},
1971 {NULL, NULL}
1972};
1973
1974static PyObject*
1975treebuilder_getattr(TreeBuilderObject* self, char* name)
1976{
1977 return Py_FindMethod(treebuilder_methods, (PyObject*) self, name);
1978}
1979
1980statichere PyTypeObject TreeBuilder_Type = {
1981 PyObject_HEAD_INIT(NULL)
1982 0, "TreeBuilder", sizeof(TreeBuilderObject), 0,
1983 /* methods */
1984 (destructor)treebuilder_dealloc, /* tp_dealloc */
1985 0, /* tp_print */
1986 (getattrfunc)treebuilder_getattr, /* tp_getattr */
1987};
1988
1989/* ==================================================================== */
1990/* the expat interface */
1991
1992#if defined(USE_EXPAT)
1993
1994#include "expat.h"
1995
1996#if defined(USE_PYEXPAT_CAPI)
1997#include "pyexpat.h"
1998static struct PyExpat_CAPI* expat_capi;
1999#define EXPAT(func) (expat_capi->func)
2000#else
2001#define EXPAT(func) (XML_##func)
2002#endif
2003
2004typedef struct {
2005 PyObject_HEAD
2006
2007 XML_Parser parser;
2008
2009 PyObject* target;
2010 PyObject* entity;
2011
2012 PyObject* names;
2013
2014 PyObject* handle_xml;
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002015
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002016 PyObject* handle_start;
2017 PyObject* handle_data;
2018 PyObject* handle_end;
2019
2020 PyObject* handle_comment;
2021 PyObject* handle_pi;
2022
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002023 PyObject* handle_close;
2024
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002025} XMLParserObject;
2026
2027staticforward PyTypeObject XMLParser_Type;
2028
2029/* helpers */
2030
2031#if defined(Py_USING_UNICODE)
2032LOCAL(int)
2033checkstring(const char* string, int size)
2034{
2035 int i;
2036
2037 /* check if an 8-bit string contains UTF-8 characters */
2038 for (i = 0; i < size; i++)
2039 if (string[i] & 0x80)
2040 return 1;
2041
2042 return 0;
2043}
2044#endif
2045
2046LOCAL(PyObject*)
2047makestring(const char* string, int size)
2048{
2049 /* convert a UTF-8 string to either a 7-bit ascii string or a
2050 Unicode string */
2051
2052#if defined(Py_USING_UNICODE)
2053 if (checkstring(string, size))
2054 return PyUnicode_DecodeUTF8(string, size, "strict");
2055#endif
2056
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002057 return PyString_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002058}
2059
2060LOCAL(PyObject*)
2061makeuniversal(XMLParserObject* self, const char* string)
2062{
2063 /* convert a UTF-8 tag/attribute name from the expat parser
2064 to a universal name string */
2065
2066 int size = strlen(string);
2067 PyObject* key;
2068 PyObject* value;
2069
2070 /* look the 'raw' name up in the names dictionary */
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002071 key = PyString_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002072 if (!key)
2073 return NULL;
2074
2075 value = PyDict_GetItem(self->names, key);
2076
2077 if (value) {
2078 Py_INCREF(value);
2079 } else {
2080 /* new name. convert to universal name, and decode as
2081 necessary */
2082
2083 PyObject* tag;
2084 char* p;
2085 int i;
2086
2087 /* look for namespace separator */
2088 for (i = 0; i < size; i++)
2089 if (string[i] == '}')
2090 break;
2091 if (i != size) {
2092 /* convert to universal name */
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002093 tag = PyString_FromStringAndSize(NULL, size+1);
2094 p = PyString_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002095 p[0] = '{';
2096 memcpy(p+1, string, size);
2097 size++;
2098 } else {
2099 /* plain name; use key as tag */
2100 Py_INCREF(key);
2101 tag = key;
2102 }
2103
2104 /* decode universal name */
2105#if defined(Py_USING_UNICODE)
2106 /* inline makestring, to avoid duplicating the source string if
2107 it's not an utf-8 string */
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002108 p = PyString_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002109 if (checkstring(p, size)) {
2110 value = PyUnicode_DecodeUTF8(p, size, "strict");
2111 Py_DECREF(tag);
2112 if (!value) {
2113 Py_DECREF(key);
2114 return NULL;
2115 }
2116 } else
2117#endif
2118 value = tag; /* use tag as is */
2119
2120 /* add to names dictionary */
2121 if (PyDict_SetItem(self->names, key, value) < 0) {
2122 Py_DECREF(key);
2123 Py_DECREF(value);
2124 return NULL;
2125 }
2126 }
2127
2128 Py_DECREF(key);
2129 return value;
2130}
2131
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002132static void
2133expat_set_error(const char* message, int line, int column)
2134{
2135 PyObject *error;
2136 PyObject *position;
2137 char buffer[256];
2138
2139 sprintf(buffer, "%s: line %d, column %d", message, line, column);
2140
2141 error = PyObject_CallFunction(elementtree_parseerror_obj, "s", buffer);
2142 if (!error)
2143 return;
2144
2145 /* add position attribute */
2146 position = Py_BuildValue("(ii)", line, column);
2147 if (!position) {
2148 Py_DECREF(error);
2149 return;
2150 }
2151 if (PyObject_SetAttrString(error, "position", position) == -1) {
2152 Py_DECREF(error);
2153 Py_DECREF(position);
2154 return;
2155 }
2156 Py_DECREF(position);
2157
2158 PyErr_SetObject(elementtree_parseerror_obj, error);
2159 Py_DECREF(error);
2160}
2161
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002162/* -------------------------------------------------------------------- */
2163/* handlers */
2164
2165static void
2166expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2167 int data_len)
2168{
2169 PyObject* key;
2170 PyObject* value;
2171 PyObject* res;
2172
2173 if (data_len < 2 || data_in[0] != '&')
2174 return;
2175
2176 key = makestring(data_in + 1, data_len - 2);
2177 if (!key)
2178 return;
2179
2180 value = PyDict_GetItem(self->entity, key);
2181
2182 if (value) {
2183 if (TreeBuilder_CheckExact(self->target))
2184 res = treebuilder_handle_data(
2185 (TreeBuilderObject*) self->target, value
2186 );
2187 else if (self->handle_data)
2188 res = PyObject_CallFunction(self->handle_data, "O", value);
2189 else
2190 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002191 Py_XDECREF(res);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002192 } else if (!PyErr_Occurred()) {
2193 /* Report the first error, not the last */
2194 char message[128];
2195 sprintf(message, "undefined entity &%.100s;", PyString_AS_STRING(key));
2196 expat_set_error(
2197 message,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002198 EXPAT(GetErrorLineNumber)(self->parser),
2199 EXPAT(GetErrorColumnNumber)(self->parser)
2200 );
2201 }
2202
2203 Py_DECREF(key);
2204}
2205
2206static void
2207expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2208 const XML_Char **attrib_in)
2209{
2210 PyObject* res;
2211 PyObject* tag;
2212 PyObject* attrib;
2213 int ok;
2214
2215 /* tag name */
2216 tag = makeuniversal(self, tag_in);
2217 if (!tag)
2218 return; /* parser will look for errors */
2219
2220 /* attributes */
2221 if (attrib_in[0]) {
2222 attrib = PyDict_New();
Serhiy Storchaka33ea2972015-12-09 19:44:30 +02002223 if (!attrib) {
2224 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002225 return;
Serhiy Storchaka33ea2972015-12-09 19:44:30 +02002226 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002227 while (attrib_in[0] && attrib_in[1]) {
2228 PyObject* key = makeuniversal(self, attrib_in[0]);
2229 PyObject* value = makestring(attrib_in[1], strlen(attrib_in[1]));
2230 if (!key || !value) {
2231 Py_XDECREF(value);
2232 Py_XDECREF(key);
2233 Py_DECREF(attrib);
Serhiy Storchaka33ea2972015-12-09 19:44:30 +02002234 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002235 return;
2236 }
2237 ok = PyDict_SetItem(attrib, key, value);
2238 Py_DECREF(value);
2239 Py_DECREF(key);
2240 if (ok < 0) {
2241 Py_DECREF(attrib);
Serhiy Storchaka33ea2972015-12-09 19:44:30 +02002242 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002243 return;
2244 }
2245 attrib_in += 2;
2246 }
2247 } else {
2248 Py_INCREF(Py_None);
2249 attrib = Py_None;
2250 }
2251
2252 if (TreeBuilder_CheckExact(self->target))
2253 /* shortcut */
2254 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2255 tag, attrib);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002256 else if (self->handle_start) {
2257 if (attrib == Py_None) {
2258 Py_DECREF(attrib);
2259 attrib = PyDict_New();
Serhiy Storchaka33ea2972015-12-09 19:44:30 +02002260 if (!attrib) {
2261 Py_DECREF(tag);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002262 return;
Serhiy Storchaka33ea2972015-12-09 19:44:30 +02002263 }
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002264 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002265 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002266 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002267 res = NULL;
2268
2269 Py_DECREF(tag);
2270 Py_DECREF(attrib);
2271
2272 Py_XDECREF(res);
2273}
2274
2275static void
2276expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2277 int data_len)
2278{
2279 PyObject* data;
2280 PyObject* res;
2281
2282 data = makestring(data_in, data_len);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002283 if (!data)
2284 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002285
2286 if (TreeBuilder_CheckExact(self->target))
2287 /* shortcut */
2288 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
2289 else if (self->handle_data)
2290 res = PyObject_CallFunction(self->handle_data, "O", data);
2291 else
2292 res = NULL;
2293
2294 Py_DECREF(data);
2295
2296 Py_XDECREF(res);
2297}
2298
2299static void
2300expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
2301{
2302 PyObject* tag;
2303 PyObject* res = NULL;
2304
2305 if (TreeBuilder_CheckExact(self->target))
2306 /* shortcut */
2307 /* the standard tree builder doesn't look at the end tag */
2308 res = treebuilder_handle_end(
2309 (TreeBuilderObject*) self->target, Py_None
2310 );
2311 else if (self->handle_end) {
2312 tag = makeuniversal(self, tag_in);
2313 if (tag) {
2314 res = PyObject_CallFunction(self->handle_end, "O", tag);
2315 Py_DECREF(tag);
2316 }
2317 }
2318
2319 Py_XDECREF(res);
2320}
2321
2322static void
2323expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
2324 const XML_Char *uri)
2325{
Serhiy Storchaka45cf0b72015-12-06 23:51:53 +02002326 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
2327 PyObject *parcel;
2328 PyObject *sprefix = NULL;
2329 PyObject *suri = NULL;
2330
2331 if (PyErr_Occurred())
2332 return;
2333
2334 if (!target->events || !target->start_ns_event_obj)
2335 return;
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002336
Eli Benderskyf933e082013-11-28 06:25:45 -08002337 if (uri)
Eli Bendersky71142c42013-11-28 06:37:25 -08002338 suri = makestring(uri, strlen(uri));
Eli Benderskyf933e082013-11-28 06:25:45 -08002339 else
Eli Bendersky71142c42013-11-28 06:37:25 -08002340 suri = PyString_FromStringAndSize("", 0);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002341 if (!suri)
2342 return;
2343
2344 if (prefix)
2345 sprefix = makestring(prefix, strlen(prefix));
2346 else
2347 sprefix = PyString_FromStringAndSize("", 0);
2348 if (!sprefix) {
2349 Py_DECREF(suri);
2350 return;
2351 }
2352
Serhiy Storchaka45cf0b72015-12-06 23:51:53 +02002353 parcel = PyTuple_Pack(2, sprefix, suri);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002354 Py_DECREF(sprefix);
2355 Py_DECREF(suri);
Serhiy Storchaka45cf0b72015-12-06 23:51:53 +02002356 if (!parcel)
2357 return;
2358 treebuilder_append_event(target, target->start_ns_event_obj, parcel);
2359 Py_DECREF(parcel);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002360}
2361
2362static void
2363expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
2364{
Serhiy Storchaka45cf0b72015-12-06 23:51:53 +02002365 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
2366
2367 if (PyErr_Occurred())
2368 return;
2369
2370 if (!target->events)
2371 return;
2372
2373 treebuilder_append_event(target, target->end_ns_event_obj, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002374}
2375
2376static void
2377expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
2378{
2379 PyObject* comment;
2380 PyObject* res;
2381
2382 if (self->handle_comment) {
2383 comment = makestring(comment_in, strlen(comment_in));
2384 if (comment) {
2385 res = PyObject_CallFunction(self->handle_comment, "O", comment);
2386 Py_XDECREF(res);
2387 Py_DECREF(comment);
2388 }
2389 }
2390}
2391
2392static void
2393expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
2394 const XML_Char* data_in)
2395{
2396 PyObject* target;
2397 PyObject* data;
2398 PyObject* res;
2399
2400 if (self->handle_pi) {
2401 target = makestring(target_in, strlen(target_in));
2402 data = makestring(data_in, strlen(data_in));
2403 if (target && data) {
2404 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
2405 Py_XDECREF(res);
2406 Py_DECREF(data);
2407 Py_DECREF(target);
2408 } else {
2409 Py_XDECREF(data);
2410 Py_XDECREF(target);
2411 }
2412 }
2413}
2414
2415#if defined(Py_USING_UNICODE)
2416static int
2417expat_unknown_encoding_handler(XMLParserObject *self, const XML_Char *name,
2418 XML_Encoding *info)
2419{
2420 PyObject* u;
2421 Py_UNICODE* p;
2422 unsigned char s[256];
2423 int i;
2424
2425 memset(info, 0, sizeof(XML_Encoding));
2426
2427 for (i = 0; i < 256; i++)
2428 s[i] = i;
2429
Fredrik Lundhc3389992005-12-25 11:40:19 +00002430 u = PyUnicode_Decode((char*) s, 256, name, "replace");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002431 if (!u)
2432 return XML_STATUS_ERROR;
2433
2434 if (PyUnicode_GET_SIZE(u) != 256) {
2435 Py_DECREF(u);
Eli Benderskyb6717012013-08-04 06:09:49 -07002436 PyErr_SetString(PyExc_ValueError,
2437 "multi-byte encodings are not supported");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002438 return XML_STATUS_ERROR;
2439 }
2440
2441 p = PyUnicode_AS_UNICODE(u);
2442
2443 for (i = 0; i < 256; i++) {
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002444 if (p[i] != Py_UNICODE_REPLACEMENT_CHARACTER)
2445 info->map[i] = p[i];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002446 else
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002447 info->map[i] = -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002448 }
2449
2450 Py_DECREF(u);
2451
2452 return XML_STATUS_OK;
2453}
2454#endif
2455
2456/* -------------------------------------------------------------------- */
2457/* constructor and destructor */
2458
2459static PyObject*
Fredrik Lundh81707f12006-06-03 21:56:05 +00002460xmlparser(PyObject* self_, PyObject* args, PyObject* kw)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002461{
2462 XMLParserObject* self;
2463 /* FIXME: does this need to be static? */
2464 static XML_Memory_Handling_Suite memory_handler;
2465
2466 PyObject* target = NULL;
2467 char* encoding = NULL;
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +00002468 static char* kwlist[] = { "target", "encoding", NULL };
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002469 if (!PyArg_ParseTupleAndKeywords(args, kw, "|Oz:XMLParser", kwlist,
2470 &target, &encoding))
2471 return NULL;
2472
2473#if defined(USE_PYEXPAT_CAPI)
2474 if (!expat_capi) {
2475 PyErr_SetString(
2476 PyExc_RuntimeError, "cannot load dispatch table from pyexpat"
2477 );
2478 return NULL;
2479 }
2480#endif
2481
2482 self = PyObject_New(XMLParserObject, &XMLParser_Type);
2483 if (self == NULL)
2484 return NULL;
2485
2486 self->entity = PyDict_New();
2487 if (!self->entity) {
2488 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002489 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002490 }
2491
2492 self->names = PyDict_New();
2493 if (!self->names) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002494 PyObject_Del(self->entity);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002495 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002496 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002497 }
2498
2499 memory_handler.malloc_fcn = PyObject_Malloc;
2500 memory_handler.realloc_fcn = PyObject_Realloc;
2501 memory_handler.free_fcn = PyObject_Free;
2502
2503 self->parser = EXPAT(ParserCreate_MM)(encoding, &memory_handler, "}");
2504 if (!self->parser) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002505 PyObject_Del(self->names);
2506 PyObject_Del(self->entity);
2507 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002508 PyErr_NoMemory();
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002509 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002510 }
2511
2512 /* setup target handlers */
2513 if (!target) {
2514 target = treebuilder_new();
2515 if (!target) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002516 EXPAT(ParserFree)(self->parser);
2517 PyObject_Del(self->names);
2518 PyObject_Del(self->entity);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002519 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002520 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002521 }
2522 } else
2523 Py_INCREF(target);
2524 self->target = target;
2525
2526 self->handle_xml = PyObject_GetAttrString(target, "xml");
2527 self->handle_start = PyObject_GetAttrString(target, "start");
2528 self->handle_data = PyObject_GetAttrString(target, "data");
2529 self->handle_end = PyObject_GetAttrString(target, "end");
2530 self->handle_comment = PyObject_GetAttrString(target, "comment");
2531 self->handle_pi = PyObject_GetAttrString(target, "pi");
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002532 self->handle_close = PyObject_GetAttrString(target, "close");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002533
2534 PyErr_Clear();
2535
2536 /* configure parser */
2537 EXPAT(SetUserData)(self->parser, self);
2538 EXPAT(SetElementHandler)(
2539 self->parser,
2540 (XML_StartElementHandler) expat_start_handler,
2541 (XML_EndElementHandler) expat_end_handler
2542 );
2543 EXPAT(SetDefaultHandlerExpand)(
2544 self->parser,
2545 (XML_DefaultHandler) expat_default_handler
2546 );
2547 EXPAT(SetCharacterDataHandler)(
2548 self->parser,
2549 (XML_CharacterDataHandler) expat_data_handler
2550 );
2551 if (self->handle_comment)
2552 EXPAT(SetCommentHandler)(
2553 self->parser,
2554 (XML_CommentHandler) expat_comment_handler
2555 );
2556 if (self->handle_pi)
2557 EXPAT(SetProcessingInstructionHandler)(
2558 self->parser,
2559 (XML_ProcessingInstructionHandler) expat_pi_handler
2560 );
2561#if defined(Py_USING_UNICODE)
2562 EXPAT(SetUnknownEncodingHandler)(
2563 self->parser,
2564 (XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL
2565 );
2566#endif
2567
2568 ALLOC(sizeof(XMLParserObject), "create expatparser");
2569
2570 return (PyObject*) self;
2571}
2572
2573static void
2574xmlparser_dealloc(XMLParserObject* self)
2575{
2576 EXPAT(ParserFree)(self->parser);
2577
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002578 Py_XDECREF(self->handle_close);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002579 Py_XDECREF(self->handle_pi);
2580 Py_XDECREF(self->handle_comment);
2581 Py_XDECREF(self->handle_end);
2582 Py_XDECREF(self->handle_data);
2583 Py_XDECREF(self->handle_start);
2584 Py_XDECREF(self->handle_xml);
2585
2586 Py_DECREF(self->target);
2587 Py_DECREF(self->entity);
2588 Py_DECREF(self->names);
2589
2590 RELEASE(sizeof(XMLParserObject), "destroy expatparser");
2591
2592 PyObject_Del(self);
2593}
2594
2595/* -------------------------------------------------------------------- */
2596/* methods (in alphabetical order) */
2597
2598LOCAL(PyObject*)
2599expat_parse(XMLParserObject* self, char* data, int data_len, int final)
2600{
2601 int ok;
2602
2603 ok = EXPAT(Parse)(self->parser, data, data_len, final);
2604
2605 if (PyErr_Occurred())
2606 return NULL;
2607
2608 if (!ok) {
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002609 expat_set_error(
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002610 EXPAT(ErrorString)(EXPAT(GetErrorCode)(self->parser)),
2611 EXPAT(GetErrorLineNumber)(self->parser),
2612 EXPAT(GetErrorColumnNumber)(self->parser)
2613 );
2614 return NULL;
2615 }
2616
2617 Py_RETURN_NONE;
2618}
2619
2620static PyObject*
2621xmlparser_close(XMLParserObject* self, PyObject* args)
2622{
2623 /* end feeding data to parser */
2624
2625 PyObject* res;
2626 if (!PyArg_ParseTuple(args, ":close"))
2627 return NULL;
2628
2629 res = expat_parse(self, "", 0, 1);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002630 if (!res)
2631 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002632
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002633 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002634 Py_DECREF(res);
2635 return treebuilder_done((TreeBuilderObject*) self->target);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002636 } if (self->handle_close) {
2637 Py_DECREF(res);
2638 return PyObject_CallFunction(self->handle_close, "");
2639 } else
2640 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002641}
2642
2643static PyObject*
2644xmlparser_feed(XMLParserObject* self, PyObject* args)
2645{
2646 /* feed data to parser */
2647
2648 char* data;
2649 int data_len;
2650 if (!PyArg_ParseTuple(args, "s#:feed", &data, &data_len))
2651 return NULL;
2652
2653 return expat_parse(self, data, data_len, 0);
2654}
2655
2656static PyObject*
2657xmlparser_parse(XMLParserObject* self, PyObject* args)
2658{
2659 /* (internal) parse until end of input stream */
2660
2661 PyObject* reader;
2662 PyObject* buffer;
2663 PyObject* res;
2664
2665 PyObject* fileobj;
2666 if (!PyArg_ParseTuple(args, "O:_parse", &fileobj))
2667 return NULL;
2668
2669 reader = PyObject_GetAttrString(fileobj, "read");
2670 if (!reader)
2671 return NULL;
2672
2673 /* read from open file object */
2674 for (;;) {
2675
2676 buffer = PyObject_CallFunction(reader, "i", 64*1024);
2677
2678 if (!buffer) {
2679 /* read failed (e.g. due to KeyboardInterrupt) */
2680 Py_DECREF(reader);
2681 return NULL;
2682 }
2683
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002684 if (!PyString_CheckExact(buffer) || PyString_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002685 Py_DECREF(buffer);
2686 break;
2687 }
2688
Serhiy Storchakac4c64be2015-11-25 20:12:58 +02002689 if (PyString_GET_SIZE(buffer) > INT_MAX) {
2690 Py_DECREF(buffer);
2691 Py_DECREF(reader);
2692 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
2693 return NULL;
2694 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002695 res = expat_parse(
Serhiy Storchakac4c64be2015-11-25 20:12:58 +02002696 self, PyString_AS_STRING(buffer), (int)PyString_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002697 );
2698
2699 Py_DECREF(buffer);
2700
2701 if (!res) {
2702 Py_DECREF(reader);
2703 return NULL;
2704 }
2705 Py_DECREF(res);
2706
2707 }
2708
2709 Py_DECREF(reader);
2710
2711 res = expat_parse(self, "", 0, 1);
2712
2713 if (res && TreeBuilder_CheckExact(self->target)) {
2714 Py_DECREF(res);
2715 return treebuilder_done((TreeBuilderObject*) self->target);
2716 }
2717
2718 return res;
2719}
2720
2721static PyObject*
2722xmlparser_setevents(XMLParserObject* self, PyObject* args)
2723{
2724 /* activate element event reporting */
2725
Neal Norwitzc7074382006-06-12 02:06:17 +00002726 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002727 TreeBuilderObject* target;
2728
2729 PyObject* events; /* event collector */
2730 PyObject* event_set = Py_None;
2731 if (!PyArg_ParseTuple(args, "O!|O:_setevents", &PyList_Type, &events,
2732 &event_set))
2733 return NULL;
2734
2735 if (!TreeBuilder_CheckExact(self->target)) {
2736 PyErr_SetString(
2737 PyExc_TypeError,
2738 "event handling only supported for cElementTree.Treebuilder "
2739 "targets"
2740 );
2741 return NULL;
2742 }
2743
2744 target = (TreeBuilderObject*) self->target;
2745
2746 Py_INCREF(events);
Serhiy Storchaka20a003b2015-12-24 11:51:24 +02002747 Py_SETREF(target->events, events);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002748
2749 /* clear out existing events */
Serhiy Storchaka98a97222014-02-09 13:14:04 +02002750 Py_CLEAR(target->start_event_obj);
2751 Py_CLEAR(target->end_event_obj);
2752 Py_CLEAR(target->start_ns_event_obj);
2753 Py_CLEAR(target->end_ns_event_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002754
2755 if (event_set == Py_None) {
2756 /* default is "end" only */
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002757 target->end_event_obj = PyString_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002758 Py_RETURN_NONE;
2759 }
2760
2761 if (!PyTuple_Check(event_set)) /* FIXME: handle arbitrary sequences */
2762 goto error;
2763
2764 for (i = 0; i < PyTuple_GET_SIZE(event_set); i++) {
2765 PyObject* item = PyTuple_GET_ITEM(event_set, i);
2766 char* event;
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002767 if (!PyString_Check(item))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002768 goto error;
Serhiy Storchaka20a003b2015-12-24 11:51:24 +02002769 Py_INCREF(item);
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002770 event = PyString_AS_STRING(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002771 if (strcmp(event, "start") == 0) {
Serhiy Storchaka20a003b2015-12-24 11:51:24 +02002772 Py_SETREF(target->start_event_obj, item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002773 } else if (strcmp(event, "end") == 0) {
Serhiy Storchaka20a003b2015-12-24 11:51:24 +02002774 Py_SETREF(target->end_event_obj, item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002775 } else if (strcmp(event, "start-ns") == 0) {
Serhiy Storchaka20a003b2015-12-24 11:51:24 +02002776 Py_SETREF(target->start_ns_event_obj, item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002777 EXPAT(SetNamespaceDeclHandler)(
2778 self->parser,
2779 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2780 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2781 );
2782 } else if (strcmp(event, "end-ns") == 0) {
Serhiy Storchaka20a003b2015-12-24 11:51:24 +02002783 Py_SETREF(target->end_ns_event_obj, item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002784 EXPAT(SetNamespaceDeclHandler)(
2785 self->parser,
2786 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2787 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2788 );
2789 } else {
Serhiy Storchaka20a003b2015-12-24 11:51:24 +02002790 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002791 PyErr_Format(
2792 PyExc_ValueError,
2793 "unknown event '%s'", event
2794 );
2795 return NULL;
2796 }
2797 }
2798
2799 Py_RETURN_NONE;
2800
2801 error:
2802 PyErr_SetString(
2803 PyExc_TypeError,
2804 "invalid event tuple"
2805 );
2806 return NULL;
2807}
2808
2809static PyMethodDef xmlparser_methods[] = {
2810 {"feed", (PyCFunction) xmlparser_feed, METH_VARARGS},
2811 {"close", (PyCFunction) xmlparser_close, METH_VARARGS},
2812 {"_parse", (PyCFunction) xmlparser_parse, METH_VARARGS},
2813 {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
2814 {NULL, NULL}
2815};
2816
2817static PyObject*
2818xmlparser_getattr(XMLParserObject* self, char* name)
2819{
2820 PyObject* res;
2821
2822 res = Py_FindMethod(xmlparser_methods, (PyObject*) self, name);
2823 if (res)
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002824 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002825
2826 PyErr_Clear();
2827
2828 if (strcmp(name, "entity") == 0)
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002829 res = self->entity;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002830 else if (strcmp(name, "target") == 0)
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002831 res = self->target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002832 else if (strcmp(name, "version") == 0) {
2833 char buffer[100];
2834 sprintf(buffer, "Expat %d.%d.%d", XML_MAJOR_VERSION,
2835 XML_MINOR_VERSION, XML_MICRO_VERSION);
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002836 return PyString_FromString(buffer);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002837 } else {
2838 PyErr_SetString(PyExc_AttributeError, name);
2839 return NULL;
2840 }
2841
2842 Py_INCREF(res);
2843 return res;
2844}
2845
2846statichere PyTypeObject XMLParser_Type = {
2847 PyObject_HEAD_INIT(NULL)
2848 0, "XMLParser", sizeof(XMLParserObject), 0,
2849 /* methods */
2850 (destructor)xmlparser_dealloc, /* tp_dealloc */
2851 0, /* tp_print */
2852 (getattrfunc)xmlparser_getattr, /* tp_getattr */
2853};
2854
2855#endif
2856
2857/* ==================================================================== */
2858/* python module interface */
2859
2860static PyMethodDef _functions[] = {
2861 {"Element", (PyCFunction) element, METH_VARARGS|METH_KEYWORDS},
2862 {"SubElement", (PyCFunction) subelement, METH_VARARGS|METH_KEYWORDS},
2863 {"TreeBuilder", (PyCFunction) treebuilder, METH_VARARGS},
2864#if defined(USE_EXPAT)
2865 {"XMLParser", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
2866 {"XMLTreeBuilder", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
2867#endif
2868 {NULL, NULL}
2869};
2870
2871DL_EXPORT(void)
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002872init_elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002873{
2874 PyObject* m;
2875 PyObject* g;
2876 char* bootstrap;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002877
2878 /* Patch object type */
Christian Heimese93237d2007-12-19 02:37:44 +00002879 Py_TYPE(&Element_Type) = Py_TYPE(&TreeBuilder_Type) = &PyType_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002880#if defined(USE_EXPAT)
Christian Heimese93237d2007-12-19 02:37:44 +00002881 Py_TYPE(&XMLParser_Type) = &PyType_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002882#endif
2883
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002884 m = Py_InitModule("_elementtree", _functions);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002885 if (!m)
2886 return;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002887
2888 /* python glue code */
2889
2890 g = PyDict_New();
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002891 if (!g)
2892 return;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002893
2894 PyDict_SetItemString(g, "__builtins__", PyEval_GetBuiltins());
2895
2896 bootstrap = (
2897
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002898 "from copy import copy, deepcopy\n"
2899
2900 "try:\n"
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002901 " from xml.etree import ElementTree\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002902 "except ImportError:\n"
2903 " import ElementTree\n"
2904 "ET = ElementTree\n"
2905 "del ElementTree\n"
2906
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002907 "import _elementtree as cElementTree\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002908
2909 "try:\n" /* check if copy works as is */
2910 " copy(cElementTree.Element('x'))\n"
2911 "except:\n"
2912 " def copyelement(elem):\n"
2913 " return elem\n"
2914
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002915 "class CommentProxy:\n"
2916 " def __call__(self, text=None):\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002917 " element = cElementTree.Element(ET.Comment)\n"
2918 " element.text = text\n"
2919 " return element\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002920 " def __cmp__(self, other):\n"
2921 " return cmp(ET.Comment, other)\n"
2922 "cElementTree.Comment = CommentProxy()\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002923
2924 "class ElementTree(ET.ElementTree):\n" /* public */
2925 " def parse(self, source, parser=None):\n"
Florent Xicluna67d5d0e2011-10-29 03:38:56 +02002926 " close_source = False\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002927 " if not hasattr(source, 'read'):\n"
2928 " source = open(source, 'rb')\n"
Florent Xicluna67d5d0e2011-10-29 03:38:56 +02002929 " close_source = False\n"
2930 " try:\n"
2931 " if parser is not None:\n"
2932 " while 1:\n"
2933 " data = source.read(65536)\n"
2934 " if not data:\n"
2935 " break\n"
2936 " parser.feed(data)\n"
2937 " self._root = parser.close()\n"
2938 " else:\n"
2939 " parser = cElementTree.XMLParser()\n"
2940 " self._root = parser._parse(source)\n"
2941 " return self._root\n"
2942 " finally:\n"
2943 " if close_source:\n"
2944 " source.close()\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002945 "cElementTree.ElementTree = ElementTree\n"
2946
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002947 "def iter(node, tag=None):\n" /* helper */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002948 " if tag == '*':\n"
2949 " tag = None\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002950 " if tag is None or node.tag == tag:\n"
2951 " yield node\n"
2952 " for node in node:\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002953 " for node in iter(node, tag):\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002954 " yield node\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002955
2956 "def itertext(node):\n" /* helper */
2957 " if node.text:\n"
2958 " yield node.text\n"
2959 " for e in node:\n"
2960 " for s in e.itertext():\n"
2961 " yield s\n"
2962 " if e.tail:\n"
2963 " yield e.tail\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002964
2965 "def parse(source, parser=None):\n" /* public */
2966 " tree = ElementTree()\n"
2967 " tree.parse(source, parser)\n"
2968 " return tree\n"
2969 "cElementTree.parse = parse\n"
2970
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002971 "class iterparse(object):\n"
2972 " root = None\n"
2973 " def __init__(self, file, events=None):\n"
Florent Xicluna67d5d0e2011-10-29 03:38:56 +02002974 " self._close_file = False\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002975 " if not hasattr(file, 'read'):\n"
2976 " file = open(file, 'rb')\n"
Florent Xicluna67d5d0e2011-10-29 03:38:56 +02002977 " self._close_file = True\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002978 " self._file = file\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002979 " self._events = []\n"
2980 " self._index = 0\n"
Florent Xicluna0965ee22011-11-01 23:34:41 +01002981 " self._error = None\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002982 " self.root = self._root = None\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002983 " b = cElementTree.TreeBuilder()\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002984 " self._parser = cElementTree.XMLParser(b)\n"
2985 " self._parser._setevents(self._events, events)\n"
2986 " def next(self):\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002987 " while 1:\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002988 " try:\n"
2989 " item = self._events[self._index]\n"
Florent Xicluna0965ee22011-11-01 23:34:41 +01002990 " self._index += 1\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002991 " return item\n"
Florent Xicluna0965ee22011-11-01 23:34:41 +01002992 " except IndexError:\n"
2993 " pass\n"
2994 " if self._error:\n"
2995 " e = self._error\n"
2996 " self._error = None\n"
2997 " raise e\n"
2998 " if self._parser is None:\n"
2999 " self.root = self._root\n"
3000 " if self._close_file:\n"
3001 " self._file.close()\n"
3002 " raise StopIteration\n"
3003 " # load event buffer\n"
3004 " del self._events[:]\n"
3005 " self._index = 0\n"
3006 " data = self._file.read(16384)\n"
3007 " if data:\n"
3008 " try:\n"
3009 " self._parser.feed(data)\n"
3010 " except SyntaxError as exc:\n"
3011 " self._error = exc\n"
3012 " else:\n"
3013 " self._root = self._parser.close()\n"
3014 " self._parser = None\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003015 " def __iter__(self):\n"
3016 " return self\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003017 "cElementTree.iterparse = iterparse\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003018
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003019 "class PIProxy:\n"
3020 " def __call__(self, target, text=None):\n"
3021 " element = cElementTree.Element(ET.PI)\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003022 " element.text = target\n"
3023 " if text:\n"
3024 " element.text = element.text + ' ' + text\n"
3025 " return element\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003026 " def __cmp__(self, other):\n"
3027 " return cmp(ET.PI, other)\n"
3028 "cElementTree.PI = cElementTree.ProcessingInstruction = PIProxy()\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003029
3030 "def XML(text):\n" /* public */
3031 " parser = cElementTree.XMLParser()\n"
3032 " parser.feed(text)\n"
3033 " return parser.close()\n"
3034 "cElementTree.XML = cElementTree.fromstring = XML\n"
3035
3036 "def XMLID(text):\n" /* public */
3037 " tree = XML(text)\n"
3038 " ids = {}\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003039 " for elem in tree.iter():\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003040 " id = elem.get('id')\n"
3041 " if id:\n"
3042 " ids[id] = elem\n"
3043 " return tree, ids\n"
3044 "cElementTree.XMLID = XMLID\n"
3045
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003046 "try:\n"
3047 " register_namespace = ET.register_namespace\n"
3048 "except AttributeError:\n"
3049 " def register_namespace(prefix, uri):\n"
3050 " ET._namespace_map[uri] = prefix\n"
3051 "cElementTree.register_namespace = register_namespace\n"
3052
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003053 "cElementTree.dump = ET.dump\n"
3054 "cElementTree.ElementPath = ElementPath = ET.ElementPath\n"
3055 "cElementTree.iselement = ET.iselement\n"
3056 "cElementTree.QName = ET.QName\n"
3057 "cElementTree.tostring = ET.tostring\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003058 "cElementTree.fromstringlist = ET.fromstringlist\n"
3059 "cElementTree.tostringlist = ET.tostringlist\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003060 "cElementTree.VERSION = '" VERSION "'\n"
3061 "cElementTree.__version__ = '" VERSION "'\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003062
3063 );
3064
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003065 if (!PyRun_String(bootstrap, Py_file_input, g, NULL))
3066 return;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003067
3068 elementpath_obj = PyDict_GetItemString(g, "ElementPath");
3069
3070 elementtree_copyelement_obj = PyDict_GetItemString(g, "copyelement");
3071 if (elementtree_copyelement_obj) {
3072 /* reduce hack needed; enable reduce method */
3073 PyMethodDef* mp;
3074 for (mp = element_methods; mp->ml_name; mp++)
3075 if (mp->ml_meth == (PyCFunction) element_reduce) {
3076 mp->ml_name = "__reduce__";
3077 break;
3078 }
3079 } else
3080 PyErr_Clear();
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003081
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003082 elementtree_deepcopy_obj = PyDict_GetItemString(g, "deepcopy");
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003083 elementtree_iter_obj = PyDict_GetItemString(g, "iter");
3084 elementtree_itertext_obj = PyDict_GetItemString(g, "itertext");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003085
3086#if defined(USE_PYEXPAT_CAPI)
3087 /* link against pyexpat, if possible */
Larry Hastings402b73f2010-03-25 00:54:54 +00003088 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003089 if (expat_capi) {
3090 /* check that it's usable */
3091 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
3092 expat_capi->size < sizeof(struct PyExpat_CAPI) ||
3093 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
3094 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
3095 expat_capi->MICRO_VERSION != XML_MICRO_VERSION)
3096 expat_capi = NULL;
3097 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003098#endif
3099
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003100 elementtree_parseerror_obj = PyErr_NewException(
3101 "cElementTree.ParseError", PyExc_SyntaxError, NULL
3102 );
3103 Py_INCREF(elementtree_parseerror_obj);
3104 PyModule_AddObject(m, "ParseError", elementtree_parseerror_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003105}