blob: b01167b8a08e6257bb804d808265d7f269701bec [file] [log] [blame]
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001/*
2 * ElementTree
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003 * $Id: _elementtree.c 3473 2009-01-11 22:53:55Z fredrik $
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
5 * elementtree accelerator
6 *
7 * History:
8 * 1999-06-20 fl created (as part of sgmlop)
9 * 2001-05-29 fl effdom edition
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000010 * 2003-02-27 fl elementtree edition (alpha)
11 * 2004-06-03 fl updates for elementtree 1.2
Florent Xicluna3e8c1892010-03-11 14:36:19 +000012 * 2005-01-05 fl major optimization effort
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000013 * 2005-01-11 fl first public release (cElementTree 0.8)
14 * 2005-01-12 fl split element object into base and extras
15 * 2005-01-13 fl use tagged pointers for tail/text (cElementTree 0.9)
16 * 2005-01-17 fl added treebuilder close method
17 * 2005-01-17 fl fixed crash in getchildren
18 * 2005-01-18 fl removed observer api, added iterparse (cElementTree 0.9.3)
19 * 2005-01-23 fl revised iterparse api; added namespace event support (0.9.8)
20 * 2005-01-26 fl added VERSION module property (cElementTree 1.0)
21 * 2005-01-28 fl added remove method (1.0.1)
22 * 2005-03-01 fl added iselement function; fixed makeelement aliasing (1.0.2)
23 * 2005-03-13 fl export Comment and ProcessingInstruction/PI helpers
24 * 2005-03-26 fl added Comment and PI support to XMLParser
25 * 2005-03-27 fl event optimizations; complain about bogus events
26 * 2005-08-08 fl fixed read error handling in parse
27 * 2005-08-11 fl added runtime test for copy workaround (1.0.3)
28 * 2005-12-13 fl added expat_capi support (for xml.etree) (1.0.4)
29 * 2005-12-16 fl added support for non-standard encodings
Fredrik Lundh44ed4db2006-03-12 21:06:35 +000030 * 2006-03-08 fl fixed a couple of potential null-refs and leaks
31 * 2006-03-12 fl merge in 2.5 ssize_t changes
Florent Xicluna3e8c1892010-03-11 14:36:19 +000032 * 2007-08-25 fl call custom builder's close method from XMLParser
33 * 2007-08-31 fl added iter, extend from ET 1.3
34 * 2007-09-01 fl fixed ParseError exception, setslice source type, etc
35 * 2007-09-03 fl fixed handling of negative insert indexes
36 * 2007-09-04 fl added itertext from ET 1.3
37 * 2007-09-06 fl added position attribute to ParseError exception
38 * 2008-06-06 fl delay error reporting in iterparse (from Hrvoje Niksic)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000039 *
Florent Xicluna3e8c1892010-03-11 14:36:19 +000040 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
41 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000042 *
43 * info@pythonware.com
44 * http://www.pythonware.com
45 */
46
Fredrik Lundh6d52b552005-12-16 22:06:43 +000047/* Licensed to PSF under a Contributor Agreement. */
Florent Xicluna3e8c1892010-03-11 14:36:19 +000048/* See http://www.python.org/psf/license for licensing details. */
Fredrik Lundh6d52b552005-12-16 22:06:43 +000049
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000050#include "Python.h"
51
Fredrik Lundhdc075b92006-08-16 16:47:07 +000052#define VERSION "1.0.6"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000053
54/* -------------------------------------------------------------------- */
55/* configuration */
56
57/* Leave defined to include the expat-based XMLParser type */
58#define USE_EXPAT
59
Florent Xicluna3e8c1892010-03-11 14:36:19 +000060/* Define to do all expat calls via pyexpat's embedded expat library */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000061/* #define USE_PYEXPAT_CAPI */
62
63/* An element can hold this many children without extra memory
64 allocations. */
65#define STATIC_CHILDREN 4
66
67/* For best performance, chose a value so that 80-90% of all nodes
68 have no more than the given number of children. Set this to zero
69 to minimize the size of the element structure itself (this only
70 helps if you have lots of leaf nodes with attributes). */
71
72/* Also note that pymalloc always allocates blocks in multiples of
73 eight bytes. For the current version of cElementTree, this means
74 that the number of children should be an even number, at least on
75 32-bit platforms. */
76
77/* -------------------------------------------------------------------- */
78
79#if 0
80static int memory = 0;
81#define ALLOC(size, comment)\
82do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
83#define RELEASE(size, comment)\
84do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
85#else
86#define ALLOC(size, comment)
87#define RELEASE(size, comment)
88#endif
89
90/* compiler tweaks */
91#if defined(_MSC_VER)
92#define LOCAL(type) static __inline type __fastcall
93#else
94#define LOCAL(type) static type
95#endif
96
97/* compatibility macros */
Florent Xicluna3e8c1892010-03-11 14:36:19 +000098#if (PY_VERSION_HEX < 0x02060000)
99#define Py_REFCNT(ob) (((PyObject*)(ob))->ob_refcnt)
100#define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)
101#endif
102
Martin v. Löwis18e16552006-02-15 17:27:45 +0000103#if (PY_VERSION_HEX < 0x02050000)
104typedef int Py_ssize_t;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000105#define lenfunc inquiry
Martin v. Löwis18e16552006-02-15 17:27:45 +0000106#endif
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000107
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000108#if (PY_VERSION_HEX < 0x02040000)
109#define PyDict_CheckExact PyDict_Check
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000110
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000111#if !defined(Py_RETURN_NONE)
112#define Py_RETURN_NONE return Py_INCREF(Py_None), Py_None
113#endif
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000114#endif
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000115
116/* macros used to store 'join' flags in string object pointers. note
117 that all use of text and tail as object pointers must be wrapped in
118 JOIN_OBJ. see comments in the ElementObject definition for more
119 info. */
120#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
121#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
122#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~1))
123
124/* glue functions (see the init function for details) */
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000125static PyObject* elementtree_parseerror_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000126static PyObject* elementtree_copyelement_obj;
127static PyObject* elementtree_deepcopy_obj;
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000128static PyObject* elementtree_iter_obj;
129static PyObject* elementtree_itertext_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000130static PyObject* elementpath_obj;
131
132/* helpers */
133
134LOCAL(PyObject*)
135deepcopy(PyObject* object, PyObject* memo)
136{
137 /* do a deep copy of the given object */
138
139 PyObject* args;
140 PyObject* result;
141
142 if (!elementtree_deepcopy_obj) {
143 PyErr_SetString(
144 PyExc_RuntimeError,
145 "deepcopy helper not found"
146 );
147 return NULL;
148 }
149
150 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000151 if (!args)
152 return NULL;
153
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000154 Py_INCREF(object); PyTuple_SET_ITEM(args, 0, (PyObject*) object);
155 Py_INCREF(memo); PyTuple_SET_ITEM(args, 1, (PyObject*) memo);
156
157 result = PyObject_CallObject(elementtree_deepcopy_obj, args);
158
159 Py_DECREF(args);
160
161 return result;
162}
163
164LOCAL(PyObject*)
165list_join(PyObject* list)
166{
167 /* join list elements (destroying the list in the process) */
168
169 PyObject* joiner;
170 PyObject* function;
171 PyObject* args;
172 PyObject* result;
173
174 switch (PyList_GET_SIZE(list)) {
175 case 0:
176 Py_DECREF(list);
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000177 return PyString_FromString("");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000178 case 1:
179 result = PyList_GET_ITEM(list, 0);
180 Py_INCREF(result);
181 Py_DECREF(list);
182 return result;
183 }
184
185 /* two or more elements: slice out a suitable separator from the
186 first member, and use that to join the entire list */
187
188 joiner = PySequence_GetSlice(PyList_GET_ITEM(list, 0), 0, 0);
189 if (!joiner)
190 return NULL;
191
192 function = PyObject_GetAttrString(joiner, "join");
193 if (!function) {
194 Py_DECREF(joiner);
195 return NULL;
196 }
197
198 args = PyTuple_New(1);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000199 if (!args)
200 return NULL;
201
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000202 PyTuple_SET_ITEM(args, 0, list);
203
204 result = PyObject_CallObject(function, args);
205
206 Py_DECREF(args); /* also removes list */
207 Py_DECREF(function);
208 Py_DECREF(joiner);
209
210 return result;
211}
212
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000213/* -------------------------------------------------------------------- */
214/* the element type */
215
216typedef struct {
217
218 /* attributes (a dictionary object), or None if no attributes */
219 PyObject* attrib;
220
221 /* child elements */
222 int length; /* actual number of items */
223 int allocated; /* allocated items */
224
225 /* this either points to _children or to a malloced buffer */
226 PyObject* *children;
227
228 PyObject* _children[STATIC_CHILDREN];
229
230} ElementObjectExtra;
231
232typedef struct {
233 PyObject_HEAD
234
235 /* element tag (a string). */
236 PyObject* tag;
237
238 /* text before first child. note that this is a tagged pointer;
239 use JOIN_OBJ to get the object pointer. the join flag is used
240 to distinguish lists created by the tree builder from lists
241 assigned to the attribute by application code; the former
242 should be joined before being returned to the user, the latter
243 should be left intact. */
244 PyObject* text;
245
246 /* text after this element, in parent. note that this is a tagged
247 pointer; use JOIN_OBJ to get the object pointer. */
248 PyObject* tail;
249
250 ElementObjectExtra* extra;
251
252} ElementObject;
253
254staticforward PyTypeObject Element_Type;
255
Christian Heimese93237d2007-12-19 02:37:44 +0000256#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000257
258/* -------------------------------------------------------------------- */
259/* element constructor and destructor */
260
261LOCAL(int)
262element_new_extra(ElementObject* self, PyObject* attrib)
263{
264 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
265 if (!self->extra)
266 return -1;
267
268 if (!attrib)
269 attrib = Py_None;
270
271 Py_INCREF(attrib);
272 self->extra->attrib = attrib;
273
274 self->extra->length = 0;
275 self->extra->allocated = STATIC_CHILDREN;
276 self->extra->children = self->extra->_children;
277
278 return 0;
279}
280
281LOCAL(void)
282element_dealloc_extra(ElementObject* self)
283{
284 int i;
285
286 Py_DECREF(self->extra->attrib);
287
288 for (i = 0; i < self->extra->length; i++)
289 Py_DECREF(self->extra->children[i]);
290
291 if (self->extra->children != self->extra->_children)
292 PyObject_Free(self->extra->children);
293
294 PyObject_Free(self->extra);
295}
296
297LOCAL(PyObject*)
298element_new(PyObject* tag, PyObject* attrib)
299{
300 ElementObject* self;
301
302 self = PyObject_New(ElementObject, &Element_Type);
303 if (self == NULL)
304 return NULL;
305
306 /* use None for empty dictionaries */
307 if (PyDict_CheckExact(attrib) && !PyDict_Size(attrib))
308 attrib = Py_None;
309
310 self->extra = NULL;
311
312 if (attrib != Py_None) {
313
Neal Norwitzc6a989a2006-05-10 06:57:58 +0000314 if (element_new_extra(self, attrib) < 0) {
315 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000316 return NULL;
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000317 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000318
319 self->extra->length = 0;
320 self->extra->allocated = STATIC_CHILDREN;
321 self->extra->children = self->extra->_children;
322
323 }
324
325 Py_INCREF(tag);
326 self->tag = tag;
327
328 Py_INCREF(Py_None);
329 self->text = Py_None;
330
331 Py_INCREF(Py_None);
332 self->tail = Py_None;
333
334 ALLOC(sizeof(ElementObject), "create element");
335
336 return (PyObject*) self;
337}
338
339LOCAL(int)
Serhiy Storchakac4c64be2015-11-25 20:12:58 +0200340element_resize(ElementObject* self, Py_ssize_t extra)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000341{
Serhiy Storchakac4c64be2015-11-25 20:12:58 +0200342 Py_ssize_t size;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000343 PyObject* *children;
344
345 /* make sure self->children can hold the given number of extra
346 elements. set an exception and return -1 if allocation failed */
347
348 if (!self->extra)
349 element_new_extra(self, NULL);
350
351 size = self->extra->length + extra;
352
353 if (size > self->extra->allocated) {
354 /* use Python 2.4's list growth strategy */
355 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes87dcf3d2008-01-18 08:04:57 +0000356 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
357 * which needs at least 4 bytes.
358 * Although it's a false alarm always assume at least one child to
359 * be safe.
360 */
361 size = size ? size : 1;
Serhiy Storchakac4c64be2015-11-25 20:12:58 +0200362 if ((size_t)size > PY_SSIZE_T_MAX/sizeof(PyObject*))
363 goto nomemory;
364 if (size > INT_MAX) {
365 PyErr_SetString(PyExc_OverflowError,
366 "too many children");
367 return -1;
368 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000369 if (self->extra->children != self->extra->_children) {
Christian Heimes87dcf3d2008-01-18 08:04:57 +0000370 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
371 * "children", which needs at least 4 bytes. Although it's a
372 * false alarm always assume at least one child to be safe.
373 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000374 children = PyObject_Realloc(self->extra->children,
375 size * sizeof(PyObject*));
376 if (!children)
377 goto nomemory;
378 } else {
379 children = PyObject_Malloc(size * sizeof(PyObject*));
380 if (!children)
381 goto nomemory;
382 /* copy existing children from static area to malloc buffer */
383 memcpy(children, self->extra->children,
384 self->extra->length * sizeof(PyObject*));
385 }
386 self->extra->children = children;
387 self->extra->allocated = size;
388 }
389
390 return 0;
391
392 nomemory:
393 PyErr_NoMemory();
394 return -1;
395}
396
397LOCAL(int)
398element_add_subelement(ElementObject* self, PyObject* element)
399{
400 /* add a child element to a parent */
401
402 if (element_resize(self, 1) < 0)
403 return -1;
404
405 Py_INCREF(element);
406 self->extra->children[self->extra->length] = element;
407
408 self->extra->length++;
409
410 return 0;
411}
412
413LOCAL(PyObject*)
414element_get_attrib(ElementObject* self)
415{
416 /* return borrowed reference to attrib dictionary */
417 /* note: this function assumes that the extra section exists */
418
419 PyObject* res = self->extra->attrib;
420
421 if (res == Py_None) {
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000422 Py_DECREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000423 /* create missing dictionary */
424 res = PyDict_New();
425 if (!res)
426 return NULL;
427 self->extra->attrib = res;
428 }
429
430 return res;
431}
432
433LOCAL(PyObject*)
434element_get_text(ElementObject* self)
435{
436 /* return borrowed reference to text attribute */
437
438 PyObject* res = self->text;
439
440 if (JOIN_GET(res)) {
441 res = JOIN_OBJ(res);
442 if (PyList_CheckExact(res)) {
443 res = list_join(res);
444 if (!res)
445 return NULL;
446 self->text = res;
447 }
448 }
449
450 return res;
451}
452
453LOCAL(PyObject*)
454element_get_tail(ElementObject* self)
455{
456 /* return borrowed reference to text attribute */
457
458 PyObject* res = self->tail;
459
460 if (JOIN_GET(res)) {
461 res = JOIN_OBJ(res);
462 if (PyList_CheckExact(res)) {
463 res = list_join(res);
464 if (!res)
465 return NULL;
466 self->tail = res;
467 }
468 }
469
470 return res;
471}
472
473static PyObject*
474element(PyObject* self, PyObject* args, PyObject* kw)
475{
476 PyObject* elem;
477
478 PyObject* tag;
479 PyObject* attrib = NULL;
480 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag,
481 &PyDict_Type, &attrib))
482 return NULL;
483
484 if (attrib || kw) {
485 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
486 if (!attrib)
487 return NULL;
488 if (kw)
489 PyDict_Update(attrib, kw);
490 } else {
491 Py_INCREF(Py_None);
492 attrib = Py_None;
493 }
494
495 elem = element_new(tag, attrib);
496
497 Py_DECREF(attrib);
498
499 return elem;
500}
501
502static PyObject*
503subelement(PyObject* self, PyObject* args, PyObject* kw)
504{
505 PyObject* elem;
506
507 ElementObject* parent;
508 PyObject* tag;
509 PyObject* attrib = NULL;
510 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
511 &Element_Type, &parent, &tag,
512 &PyDict_Type, &attrib))
513 return NULL;
514
515 if (attrib || kw) {
516 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
517 if (!attrib)
518 return NULL;
519 if (kw)
520 PyDict_Update(attrib, kw);
521 } else {
522 Py_INCREF(Py_None);
523 attrib = Py_None;
524 }
525
526 elem = element_new(tag, attrib);
527
528 Py_DECREF(attrib);
529
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000530 if (element_add_subelement(parent, elem) < 0) {
531 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000532 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000533 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000534
535 return elem;
536}
537
538static void
539element_dealloc(ElementObject* self)
540{
541 if (self->extra)
542 element_dealloc_extra(self);
543
544 /* discard attributes */
545 Py_DECREF(self->tag);
546 Py_DECREF(JOIN_OBJ(self->text));
547 Py_DECREF(JOIN_OBJ(self->tail));
548
549 RELEASE(sizeof(ElementObject), "destroy element");
550
551 PyObject_Del(self);
552}
553
554/* -------------------------------------------------------------------- */
555/* methods (in alphabetical order) */
556
557static PyObject*
558element_append(ElementObject* self, PyObject* args)
559{
560 PyObject* element;
561 if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element))
562 return NULL;
563
564 if (element_add_subelement(self, element) < 0)
565 return NULL;
566
567 Py_RETURN_NONE;
568}
569
570static PyObject*
571element_clear(ElementObject* self, PyObject* args)
572{
573 if (!PyArg_ParseTuple(args, ":clear"))
574 return NULL;
575
576 if (self->extra) {
577 element_dealloc_extra(self);
578 self->extra = NULL;
579 }
580
581 Py_INCREF(Py_None);
582 Py_DECREF(JOIN_OBJ(self->text));
583 self->text = Py_None;
584
585 Py_INCREF(Py_None);
586 Py_DECREF(JOIN_OBJ(self->tail));
587 self->tail = Py_None;
588
589 Py_RETURN_NONE;
590}
591
592static PyObject*
593element_copy(ElementObject* self, PyObject* args)
594{
595 int i;
596 ElementObject* element;
597
598 if (!PyArg_ParseTuple(args, ":__copy__"))
599 return NULL;
600
601 element = (ElementObject*) element_new(
602 self->tag, (self->extra) ? self->extra->attrib : Py_None
603 );
604 if (!element)
605 return NULL;
606
607 Py_DECREF(JOIN_OBJ(element->text));
608 element->text = self->text;
609 Py_INCREF(JOIN_OBJ(element->text));
610
611 Py_DECREF(JOIN_OBJ(element->tail));
612 element->tail = self->tail;
613 Py_INCREF(JOIN_OBJ(element->tail));
614
615 if (self->extra) {
616
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000617 if (element_resize(element, self->extra->length) < 0) {
618 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000619 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000620 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000621
622 for (i = 0; i < self->extra->length; i++) {
623 Py_INCREF(self->extra->children[i]);
624 element->extra->children[i] = self->extra->children[i];
625 }
626
627 element->extra->length = self->extra->length;
628
629 }
630
631 return (PyObject*) element;
632}
633
634static PyObject*
635element_deepcopy(ElementObject* self, PyObject* args)
636{
637 int i;
638 ElementObject* element;
639 PyObject* tag;
640 PyObject* attrib;
641 PyObject* text;
642 PyObject* tail;
643 PyObject* id;
644
645 PyObject* memo;
646 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
647 return NULL;
648
649 tag = deepcopy(self->tag, memo);
650 if (!tag)
651 return NULL;
652
653 if (self->extra) {
654 attrib = deepcopy(self->extra->attrib, memo);
655 if (!attrib) {
656 Py_DECREF(tag);
657 return NULL;
658 }
659 } else {
660 Py_INCREF(Py_None);
661 attrib = Py_None;
662 }
663
664 element = (ElementObject*) element_new(tag, attrib);
665
666 Py_DECREF(tag);
667 Py_DECREF(attrib);
668
669 if (!element)
670 return NULL;
671
672 text = deepcopy(JOIN_OBJ(self->text), memo);
673 if (!text)
674 goto error;
675 Py_DECREF(element->text);
676 element->text = JOIN_SET(text, JOIN_GET(self->text));
677
678 tail = deepcopy(JOIN_OBJ(self->tail), memo);
679 if (!tail)
680 goto error;
681 Py_DECREF(element->tail);
682 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
683
684 if (self->extra) {
685
686 if (element_resize(element, self->extra->length) < 0)
687 goto error;
688
689 for (i = 0; i < self->extra->length; i++) {
690 PyObject* child = deepcopy(self->extra->children[i], memo);
691 if (!child) {
692 element->extra->length = i;
693 goto error;
694 }
695 element->extra->children[i] = child;
696 }
697
698 element->extra->length = self->extra->length;
699
700 }
701
702 /* add object to memo dictionary (so deepcopy won't visit it again) */
703 id = PyInt_FromLong((Py_uintptr_t) self);
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000704 if (!id)
705 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000706
707 i = PyDict_SetItem(memo, id, (PyObject*) element);
708
709 Py_DECREF(id);
710
711 if (i < 0)
712 goto error;
713
714 return (PyObject*) element;
715
716 error:
717 Py_DECREF(element);
718 return NULL;
719}
720
721LOCAL(int)
722checkpath(PyObject* tag)
723{
Neal Norwitzc7074382006-06-12 02:06:17 +0000724 Py_ssize_t i;
725 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000726
727 /* check if a tag contains an xpath character */
728
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000729#define PATHCHAR(ch) \
730 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000731
732#if defined(Py_USING_UNICODE)
733 if (PyUnicode_Check(tag)) {
734 Py_UNICODE *p = PyUnicode_AS_UNICODE(tag);
735 for (i = 0; i < PyUnicode_GET_SIZE(tag); i++) {
736 if (p[i] == '{')
737 check = 0;
738 else if (p[i] == '}')
739 check = 1;
740 else if (check && PATHCHAR(p[i]))
741 return 1;
742 }
743 return 0;
744 }
745#endif
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000746 if (PyString_Check(tag)) {
747 char *p = PyString_AS_STRING(tag);
748 for (i = 0; i < PyString_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000749 if (p[i] == '{')
750 check = 0;
751 else if (p[i] == '}')
752 check = 1;
753 else if (check && PATHCHAR(p[i]))
754 return 1;
755 }
756 return 0;
757 }
758
759 return 1; /* unknown type; might be path expression */
760}
761
762static PyObject*
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000763element_extend(ElementObject* self, PyObject* args)
764{
765 PyObject* seq;
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300766 Py_ssize_t i;
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000767
768 PyObject* seq_in;
769 if (!PyArg_ParseTuple(args, "O:extend", &seq_in))
770 return NULL;
771
772 seq = PySequence_Fast(seq_in, "");
773 if (!seq) {
774 PyErr_Format(
775 PyExc_TypeError,
776 "expected sequence, not \"%.200s\"", Py_TYPE(seq_in)->tp_name
777 );
778 return NULL;
779 }
780
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300781 for (i = 0; i < PySequence_Fast_GET_SIZE(seq); i++) {
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000782 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
783 if (element_add_subelement(self, element) < 0) {
784 Py_DECREF(seq);
785 return NULL;
786 }
787 }
788
789 Py_DECREF(seq);
790
791 Py_RETURN_NONE;
792}
793
794static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000795element_find(ElementObject* self, PyObject* args)
796{
797 int i;
798
799 PyObject* tag;
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000800 PyObject* namespaces = Py_None;
801 if (!PyArg_ParseTuple(args, "O|O:find", &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000802 return NULL;
803
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000804 if (checkpath(tag) || namespaces != Py_None)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000805 return PyObject_CallMethod(
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000806 elementpath_obj, "find", "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000807 );
808
809 if (!self->extra)
810 Py_RETURN_NONE;
811
812 for (i = 0; i < self->extra->length; i++) {
813 PyObject* item = self->extra->children[i];
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300814 int rc;
815 if (!Element_CheckExact(item))
816 continue;
817 Py_INCREF(item);
818 rc = PyObject_Compare(((ElementObject*)item)->tag, tag);
819 if (rc == 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000820 return item;
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300821 Py_DECREF(item);
822 if (rc < 0 && PyErr_Occurred())
823 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000824 }
825
826 Py_RETURN_NONE;
827}
828
829static PyObject*
830element_findtext(ElementObject* self, PyObject* args)
831{
832 int i;
833
834 PyObject* tag;
835 PyObject* default_value = Py_None;
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000836 PyObject* namespaces = Py_None;
837 if (!PyArg_ParseTuple(args, "O|OO:findtext", &tag, &default_value, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000838 return NULL;
839
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000840 if (checkpath(tag) || namespaces != Py_None)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000841 return PyObject_CallMethod(
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000842 elementpath_obj, "findtext", "OOOO", self, tag, default_value, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000843 );
844
845 if (!self->extra) {
846 Py_INCREF(default_value);
847 return default_value;
848 }
849
850 for (i = 0; i < self->extra->length; i++) {
851 ElementObject* item = (ElementObject*) self->extra->children[i];
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300852 int rc;
853 if (!Element_CheckExact(item))
854 continue;
855 Py_INCREF(item);
856 rc = PyObject_Compare(item->tag, tag);
857 if (rc == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000858 PyObject* text = element_get_text(item);
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300859 if (text == Py_None) {
860 Py_DECREF(item);
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000861 return PyString_FromString("");
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300862 }
Neal Norwitz6f5ff3f2006-08-12 01:43:40 +0000863 Py_XINCREF(text);
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300864 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000865 return text;
866 }
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300867 Py_DECREF(item);
868 if (rc < 0 && PyErr_Occurred())
869 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000870 }
871
872 Py_INCREF(default_value);
873 return default_value;
874}
875
876static PyObject*
877element_findall(ElementObject* self, PyObject* args)
878{
879 int i;
880 PyObject* out;
881
882 PyObject* tag;
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000883 PyObject* namespaces = Py_None;
884 if (!PyArg_ParseTuple(args, "O|O:findall", &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000885 return NULL;
886
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000887 if (checkpath(tag) || namespaces != Py_None)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000888 return PyObject_CallMethod(
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000889 elementpath_obj, "findall", "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000890 );
891
892 out = PyList_New(0);
893 if (!out)
894 return NULL;
895
896 if (!self->extra)
897 return out;
898
899 for (i = 0; i < self->extra->length; i++) {
900 PyObject* item = self->extra->children[i];
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300901 int rc;
902 if (!Element_CheckExact(item))
903 continue;
904 Py_INCREF(item);
905 rc = PyObject_Compare(((ElementObject*)item)->tag, tag);
906 if (rc == 0)
907 rc = PyList_Append(out, item);
908 Py_DECREF(item);
909 if (rc < 0 && PyErr_Occurred()) {
910 Py_DECREF(out);
911 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000912 }
913 }
914
915 return out;
916}
917
918static PyObject*
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000919element_iterfind(ElementObject* self, PyObject* args)
920{
921 PyObject* tag;
922 PyObject* namespaces = Py_None;
923 if (!PyArg_ParseTuple(args, "O|O:iterfind", &tag, &namespaces))
924 return NULL;
925
926 return PyObject_CallMethod(
927 elementpath_obj, "iterfind", "OOO", self, tag, namespaces
928 );
929}
930
931static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000932element_get(ElementObject* self, PyObject* args)
933{
934 PyObject* value;
935
936 PyObject* key;
937 PyObject* default_value = Py_None;
938 if (!PyArg_ParseTuple(args, "O|O:get", &key, &default_value))
939 return NULL;
940
941 if (!self->extra || self->extra->attrib == Py_None)
942 value = default_value;
943 else {
944 value = PyDict_GetItem(self->extra->attrib, key);
945 if (!value)
946 value = default_value;
947 }
948
949 Py_INCREF(value);
950 return value;
951}
952
953static PyObject*
954element_getchildren(ElementObject* self, PyObject* args)
955{
956 int i;
957 PyObject* list;
958
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000959 /* FIXME: report as deprecated? */
960
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000961 if (!PyArg_ParseTuple(args, ":getchildren"))
962 return NULL;
963
964 if (!self->extra)
965 return PyList_New(0);
966
967 list = PyList_New(self->extra->length);
968 if (!list)
969 return NULL;
970
971 for (i = 0; i < self->extra->length; i++) {
972 PyObject* item = self->extra->children[i];
973 Py_INCREF(item);
974 PyList_SET_ITEM(list, i, item);
975 }
976
977 return list;
978}
979
980static PyObject*
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000981element_iter(ElementObject* self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000982{
983 PyObject* result;
984
985 PyObject* tag = Py_None;
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000986 if (!PyArg_ParseTuple(args, "|O:iter", &tag))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000987 return NULL;
988
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000989 if (!elementtree_iter_obj) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000990 PyErr_SetString(
991 PyExc_RuntimeError,
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000992 "iter helper not found"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000993 );
994 return NULL;
995 }
996
997 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000998 if (!args)
999 return NULL;
Neal Norwitz02876df2006-02-07 06:58:52 +00001000
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001001 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
1002 Py_INCREF(tag); PyTuple_SET_ITEM(args, 1, (PyObject*) tag);
1003
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001004 result = PyObject_CallObject(elementtree_iter_obj, args);
1005
1006 Py_DECREF(args);
1007
1008 return result;
1009}
1010
1011
1012static PyObject*
1013element_itertext(ElementObject* self, PyObject* args)
1014{
1015 PyObject* result;
1016
1017 if (!PyArg_ParseTuple(args, ":itertext"))
1018 return NULL;
1019
1020 if (!elementtree_itertext_obj) {
1021 PyErr_SetString(
1022 PyExc_RuntimeError,
1023 "itertext helper not found"
1024 );
1025 return NULL;
1026 }
1027
1028 args = PyTuple_New(1);
1029 if (!args)
1030 return NULL;
1031
1032 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
1033
1034 result = PyObject_CallObject(elementtree_itertext_obj, args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001035
1036 Py_DECREF(args);
1037
1038 return result;
1039}
1040
1041static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001042element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001043{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001044 ElementObject* self = (ElementObject*) self_;
1045
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001046 if (!self->extra || index < 0 || index >= self->extra->length) {
1047 PyErr_SetString(
1048 PyExc_IndexError,
1049 "child index out of range"
1050 );
1051 return NULL;
1052 }
1053
1054 Py_INCREF(self->extra->children[index]);
1055 return self->extra->children[index];
1056}
1057
1058static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001059element_insert(ElementObject* self, PyObject* args)
1060{
1061 int i;
1062
1063 int index;
1064 PyObject* element;
1065 if (!PyArg_ParseTuple(args, "iO!:insert", &index,
1066 &Element_Type, &element))
1067 return NULL;
1068
1069 if (!self->extra)
1070 element_new_extra(self, NULL);
1071
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001072 if (index < 0) {
1073 index += self->extra->length;
1074 if (index < 0)
1075 index = 0;
1076 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001077 if (index > self->extra->length)
1078 index = self->extra->length;
1079
1080 if (element_resize(self, 1) < 0)
1081 return NULL;
1082
1083 for (i = self->extra->length; i > index; i--)
1084 self->extra->children[i] = self->extra->children[i-1];
1085
1086 Py_INCREF(element);
1087 self->extra->children[index] = element;
1088
1089 self->extra->length++;
1090
1091 Py_RETURN_NONE;
1092}
1093
1094static PyObject*
1095element_items(ElementObject* self, PyObject* args)
1096{
1097 if (!PyArg_ParseTuple(args, ":items"))
1098 return NULL;
1099
1100 if (!self->extra || self->extra->attrib == Py_None)
1101 return PyList_New(0);
1102
1103 return PyDict_Items(self->extra->attrib);
1104}
1105
1106static PyObject*
1107element_keys(ElementObject* self, PyObject* args)
1108{
1109 if (!PyArg_ParseTuple(args, ":keys"))
1110 return NULL;
1111
1112 if (!self->extra || self->extra->attrib == Py_None)
1113 return PyList_New(0);
1114
1115 return PyDict_Keys(self->extra->attrib);
1116}
1117
Martin v. Löwis18e16552006-02-15 17:27:45 +00001118static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001119element_length(ElementObject* self)
1120{
1121 if (!self->extra)
1122 return 0;
1123
1124 return self->extra->length;
1125}
1126
1127static PyObject*
1128element_makeelement(PyObject* self, PyObject* args, PyObject* kw)
1129{
1130 PyObject* elem;
1131
1132 PyObject* tag;
1133 PyObject* attrib;
1134 if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib))
1135 return NULL;
1136
1137 attrib = PyDict_Copy(attrib);
1138 if (!attrib)
1139 return NULL;
1140
1141 elem = element_new(tag, attrib);
1142
1143 Py_DECREF(attrib);
1144
1145 return elem;
1146}
1147
1148static PyObject*
1149element_reduce(ElementObject* self, PyObject* args)
1150{
1151 if (!PyArg_ParseTuple(args, ":__reduce__"))
1152 return NULL;
1153
1154 /* Hack alert: This method is used to work around a __copy__
1155 problem on certain 2.3 and 2.4 versions. To save time and
1156 simplify the code, we create the copy in here, and use a dummy
1157 copyelement helper to trick the copy module into doing the
1158 right thing. */
1159
1160 if (!elementtree_copyelement_obj) {
1161 PyErr_SetString(
1162 PyExc_RuntimeError,
1163 "copyelement helper not found"
1164 );
1165 return NULL;
1166 }
1167
1168 return Py_BuildValue(
1169 "O(N)", elementtree_copyelement_obj, element_copy(self, args)
1170 );
1171}
1172
1173static PyObject*
1174element_remove(ElementObject* self, PyObject* args)
1175{
1176 int i;
Serhiy Storchaka25598f32015-05-18 18:28:57 +03001177 int rc;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001178 PyObject* element;
Serhiy Storchaka25598f32015-05-18 18:28:57 +03001179 PyObject* found;
1180
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001181 if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element))
1182 return NULL;
1183
1184 if (!self->extra) {
1185 /* element has no children, so raise exception */
1186 PyErr_SetString(
1187 PyExc_ValueError,
1188 "list.remove(x): x not in list"
1189 );
1190 return NULL;
1191 }
1192
1193 for (i = 0; i < self->extra->length; i++) {
1194 if (self->extra->children[i] == element)
1195 break;
Serhiy Storchaka25598f32015-05-18 18:28:57 +03001196 rc = PyObject_Compare(self->extra->children[i], element);
1197 if (rc == 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001198 break;
Serhiy Storchaka25598f32015-05-18 18:28:57 +03001199 if (rc < 0 && PyErr_Occurred())
1200 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001201 }
1202
Serhiy Storchaka25598f32015-05-18 18:28:57 +03001203 if (i >= self->extra->length) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001204 /* element is not in children, so raise exception */
1205 PyErr_SetString(
1206 PyExc_ValueError,
1207 "list.remove(x): x not in list"
1208 );
1209 return NULL;
1210 }
1211
Serhiy Storchaka25598f32015-05-18 18:28:57 +03001212 found = self->extra->children[i];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001213
1214 self->extra->length--;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001215 for (; i < self->extra->length; i++)
1216 self->extra->children[i] = self->extra->children[i+1];
1217
Serhiy Storchaka25598f32015-05-18 18:28:57 +03001218 Py_DECREF(found);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001219 Py_RETURN_NONE;
1220}
1221
1222static PyObject*
1223element_repr(ElementObject* self)
1224{
Serhiy Storchaka1f7586e2016-06-12 10:06:32 +03001225 int status;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001226
Serhiy Storchaka1f7586e2016-06-12 10:06:32 +03001227 if (self->tag == NULL)
1228 return PyUnicode_FromFormat("<Element at %p>", self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001229
Serhiy Storchaka1f7586e2016-06-12 10:06:32 +03001230 status = Py_ReprEnter((PyObject *)self);
1231 if (status == 0) {
1232 PyObject *repr, *tag;
1233 tag = PyObject_Repr(self->tag);
1234 if (!tag)
1235 return NULL;
Florent Xiclunae2e81e82010-03-11 15:55:11 +00001236
Serhiy Storchaka1f7586e2016-06-12 10:06:32 +03001237 repr = PyString_FromFormat("<Element %s at %p>",
1238 PyString_AS_STRING(tag), self);
Benjamin Petersond7324bc2016-12-03 11:30:04 -08001239 Py_ReprLeave((PyObject *)self);
Serhiy Storchaka1f7586e2016-06-12 10:06:32 +03001240 Py_DECREF(tag);
1241 return repr;
1242 }
1243 if (status > 0)
1244 PyErr_Format(PyExc_RuntimeError,
1245 "reentrant call inside %s.__repr__",
1246 Py_TYPE(self)->tp_name);
1247 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001248}
1249
1250static PyObject*
1251element_set(ElementObject* self, PyObject* args)
1252{
1253 PyObject* attrib;
1254
1255 PyObject* key;
1256 PyObject* value;
1257 if (!PyArg_ParseTuple(args, "OO:set", &key, &value))
1258 return NULL;
1259
1260 if (!self->extra)
1261 element_new_extra(self, NULL);
1262
1263 attrib = element_get_attrib(self);
1264 if (!attrib)
1265 return NULL;
1266
1267 if (PyDict_SetItem(attrib, key, value) < 0)
1268 return NULL;
1269
1270 Py_RETURN_NONE;
1271}
1272
1273static int
Serhiy Storchakab5b76c32015-11-26 11:21:47 +02001274element_setitem(PyObject* self_, Py_ssize_t index_, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001275{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001276 ElementObject* self = (ElementObject*) self_;
Serhiy Storchakac4c64be2015-11-25 20:12:58 +02001277 int i, index;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001278 PyObject* old;
1279
Serhiy Storchakac4c64be2015-11-25 20:12:58 +02001280 if (!self->extra || index_ < 0 || index_ >= self->extra->length) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001281 PyErr_SetString(
1282 PyExc_IndexError,
1283 "child assignment index out of range");
1284 return -1;
1285 }
Serhiy Storchakac4c64be2015-11-25 20:12:58 +02001286 index = (int)index_;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001287
1288 old = self->extra->children[index];
1289
1290 if (item) {
1291 Py_INCREF(item);
1292 self->extra->children[index] = item;
1293 } else {
1294 self->extra->length--;
1295 for (i = index; i < self->extra->length; i++)
1296 self->extra->children[i] = self->extra->children[i+1];
1297 }
1298
1299 Py_DECREF(old);
1300
1301 return 0;
1302}
1303
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001304static PyObject*
1305element_subscr(PyObject* self_, PyObject* item)
1306{
1307 ElementObject* self = (ElementObject*) self_;
1308
1309#if (PY_VERSION_HEX < 0x02050000)
1310 if (PyInt_Check(item) || PyLong_Check(item)) {
1311 long i = PyInt_AsLong(item);
1312#else
1313 if (PyIndex_Check(item)) {
1314 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1315#endif
1316
1317 if (i == -1 && PyErr_Occurred()) {
1318 return NULL;
1319 }
1320 if (i < 0 && self->extra)
1321 i += self->extra->length;
1322 return element_getitem(self_, i);
1323 }
1324 else if (PySlice_Check(item)) {
1325 Py_ssize_t start, stop, step, slicelen, cur, i;
1326 PyObject* list;
1327
1328 if (!self->extra)
1329 return PyList_New(0);
1330
1331 if (PySlice_GetIndicesEx((PySliceObject *)item,
1332 self->extra->length,
1333 &start, &stop, &step, &slicelen) < 0) {
1334 return NULL;
1335 }
1336
1337 if (slicelen <= 0)
1338 return PyList_New(0);
1339 else {
1340 list = PyList_New(slicelen);
1341 if (!list)
1342 return NULL;
1343
1344 for (cur = start, i = 0; i < slicelen;
1345 cur += step, i++) {
1346 PyObject* item = self->extra->children[cur];
1347 Py_INCREF(item);
1348 PyList_SET_ITEM(list, i, item);
1349 }
1350
1351 return list;
1352 }
1353 }
1354 else {
1355 PyErr_SetString(PyExc_TypeError,
1356 "element indices must be integers");
1357 return NULL;
1358 }
1359}
1360
1361static int
1362element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1363{
1364 ElementObject* self = (ElementObject*) self_;
1365
1366#if (PY_VERSION_HEX < 0x02050000)
1367 if (PyInt_Check(item) || PyLong_Check(item)) {
1368 long i = PyInt_AsLong(item);
1369#else
1370 if (PyIndex_Check(item)) {
1371 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1372#endif
1373
1374 if (i == -1 && PyErr_Occurred()) {
1375 return -1;
1376 }
1377 if (i < 0 && self->extra)
1378 i += self->extra->length;
1379 return element_setitem(self_, i, value);
1380 }
1381 else if (PySlice_Check(item)) {
1382 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1383
1384 PyObject* recycle = NULL;
1385 PyObject* seq = NULL;
1386
1387 if (!self->extra)
1388 element_new_extra(self, NULL);
1389
1390 if (PySlice_GetIndicesEx((PySliceObject *)item,
1391 self->extra->length,
1392 &start, &stop, &step, &slicelen) < 0) {
1393 return -1;
1394 }
Serhiy Storchakac4c64be2015-11-25 20:12:58 +02001395 assert(slicelen <= self->extra->length);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001396
1397 if (value == NULL)
1398 newlen = 0;
1399 else {
1400 seq = PySequence_Fast(value, "");
1401 if (!seq) {
1402 PyErr_Format(
1403 PyExc_TypeError,
1404 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1405 );
1406 return -1;
1407 }
1408 newlen = PySequence_Size(seq);
1409 }
1410
1411 if (step != 1 && newlen != slicelen)
1412 {
Serhiy Storchakaa0ae9ff2015-11-22 12:31:11 +02001413 Py_XDECREF(seq);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001414 PyErr_Format(PyExc_ValueError,
1415#if (PY_VERSION_HEX < 0x02050000)
1416 "attempt to assign sequence of size %d "
1417 "to extended slice of size %d",
Serhiy Storchakaa0ae9ff2015-11-22 12:31:11 +02001418 (int)newlen, (int)slicelen
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001419#else
1420 "attempt to assign sequence of size %zd "
1421 "to extended slice of size %zd",
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001422 newlen, slicelen
Serhiy Storchakaa0ae9ff2015-11-22 12:31:11 +02001423#endif
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001424 );
1425 return -1;
1426 }
1427
1428
1429 /* Resize before creating the recycle bin, to prevent refleaks. */
1430 if (newlen > slicelen) {
1431 if (element_resize(self, newlen - slicelen) < 0) {
Serhiy Storchakaa0ae9ff2015-11-22 12:31:11 +02001432 Py_XDECREF(seq);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001433 return -1;
1434 }
1435 }
Serhiy Storchakac4c64be2015-11-25 20:12:58 +02001436 assert(newlen - slicelen <= INT_MAX - self->extra->length);
1437 assert(newlen - slicelen >= -self->extra->length);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001438
1439 if (slicelen > 0) {
1440 /* to avoid recursive calls to this method (via decref), move
1441 old items to the recycle bin here, and get rid of them when
1442 we're done modifying the element */
1443 recycle = PyList_New(slicelen);
1444 if (!recycle) {
Serhiy Storchakaa0ae9ff2015-11-22 12:31:11 +02001445 Py_XDECREF(seq);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001446 return -1;
1447 }
1448 for (cur = start, i = 0; i < slicelen;
1449 cur += step, i++)
1450 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1451 }
1452
1453 if (newlen < slicelen) {
1454 /* delete slice */
1455 for (i = stop; i < self->extra->length; i++)
1456 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1457 } else if (newlen > slicelen) {
1458 /* insert slice */
1459 for (i = self->extra->length-1; i >= stop; i--)
1460 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1461 }
1462
1463 /* replace the slice */
1464 for (cur = start, i = 0; i < newlen;
1465 cur += step, i++) {
1466 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1467 Py_INCREF(element);
1468 self->extra->children[cur] = element;
1469 }
1470
Serhiy Storchakac4c64be2015-11-25 20:12:58 +02001471 self->extra->length += (int)(newlen - slicelen);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001472
Serhiy Storchakaa0ae9ff2015-11-22 12:31:11 +02001473 Py_XDECREF(seq);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001474
1475 /* discard the recycle bin, and everything in it */
1476 Py_XDECREF(recycle);
1477
1478 return 0;
1479 }
1480 else {
1481 PyErr_SetString(PyExc_TypeError,
1482 "element indices must be integers");
1483 return -1;
1484 }
1485}
1486
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001487static PyMethodDef element_methods[] = {
1488
1489 {"clear", (PyCFunction) element_clear, METH_VARARGS},
1490
1491 {"get", (PyCFunction) element_get, METH_VARARGS},
1492 {"set", (PyCFunction) element_set, METH_VARARGS},
1493
1494 {"find", (PyCFunction) element_find, METH_VARARGS},
1495 {"findtext", (PyCFunction) element_findtext, METH_VARARGS},
1496 {"findall", (PyCFunction) element_findall, METH_VARARGS},
1497
1498 {"append", (PyCFunction) element_append, METH_VARARGS},
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001499 {"extend", (PyCFunction) element_extend, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001500 {"insert", (PyCFunction) element_insert, METH_VARARGS},
1501 {"remove", (PyCFunction) element_remove, METH_VARARGS},
1502
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001503 {"iter", (PyCFunction) element_iter, METH_VARARGS},
1504 {"itertext", (PyCFunction) element_itertext, METH_VARARGS},
1505 {"iterfind", (PyCFunction) element_iterfind, METH_VARARGS},
1506
1507 {"getiterator", (PyCFunction) element_iter, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001508 {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS},
1509
1510 {"items", (PyCFunction) element_items, METH_VARARGS},
1511 {"keys", (PyCFunction) element_keys, METH_VARARGS},
1512
1513 {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS},
1514
1515 {"__copy__", (PyCFunction) element_copy, METH_VARARGS},
1516 {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS},
1517
1518 /* Some 2.3 and 2.4 versions do not handle the __copy__ method on
1519 C objects correctly, so we have to fake it using a __reduce__-
1520 based hack (see the element_reduce implementation above for
1521 details). */
1522
1523 /* The behaviour has been changed in 2.3.5 and 2.4.1, so we're
1524 using a runtime test to figure out if we need to fake things
1525 or now (see the init code below). The following entry is
1526 enabled only if the hack is needed. */
1527
1528 {"!__reduce__", (PyCFunction) element_reduce, METH_VARARGS},
1529
1530 {NULL, NULL}
1531};
1532
1533static PyObject*
1534element_getattr(ElementObject* self, char* name)
1535{
1536 PyObject* res;
1537
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001538 /* handle common attributes first */
1539 if (strcmp(name, "tag") == 0) {
1540 res = self->tag;
1541 Py_INCREF(res);
1542 return res;
1543 } else if (strcmp(name, "text") == 0) {
1544 res = element_get_text(self);
1545 Py_INCREF(res);
1546 return res;
1547 }
1548
1549 /* methods */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001550 res = Py_FindMethod(element_methods, (PyObject*) self, name);
1551 if (res)
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001552 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001553
1554 PyErr_Clear();
1555
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001556 /* less common attributes */
1557 if (strcmp(name, "tail") == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001558 res = element_get_tail(self);
1559 } else if (strcmp(name, "attrib") == 0) {
1560 if (!self->extra)
1561 element_new_extra(self, NULL);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001562 res = element_get_attrib(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001563 } else {
1564 PyErr_SetString(PyExc_AttributeError, name);
1565 return NULL;
1566 }
1567
1568 if (!res)
1569 return NULL;
1570
1571 Py_INCREF(res);
1572 return res;
1573}
1574
1575static int
1576element_setattr(ElementObject* self, const char* name, PyObject* value)
1577{
1578 if (value == NULL) {
1579 PyErr_SetString(
1580 PyExc_AttributeError,
1581 "can't delete element attributes"
1582 );
1583 return -1;
1584 }
1585
1586 if (strcmp(name, "tag") == 0) {
Serhiy Storchaka2e6c8292015-12-27 15:41:58 +02001587 Py_INCREF(value);
Serhiy Storchaka763a61c2016-04-10 18:05:12 +03001588 Py_SETREF(self->tag, value);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001589 } else if (strcmp(name, "text") == 0) {
1590 Py_DECREF(JOIN_OBJ(self->text));
1591 self->text = value;
1592 Py_INCREF(self->text);
1593 } else if (strcmp(name, "tail") == 0) {
1594 Py_DECREF(JOIN_OBJ(self->tail));
1595 self->tail = value;
1596 Py_INCREF(self->tail);
1597 } else if (strcmp(name, "attrib") == 0) {
1598 if (!self->extra)
1599 element_new_extra(self, NULL);
Serhiy Storchaka2e6c8292015-12-27 15:41:58 +02001600 Py_INCREF(value);
Serhiy Storchaka763a61c2016-04-10 18:05:12 +03001601 Py_SETREF(self->extra->attrib, value);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001602 } else {
1603 PyErr_SetString(PyExc_AttributeError, name);
1604 return -1;
1605 }
1606
1607 return 0;
1608}
1609
1610static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001611 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001612 0, /* sq_concat */
1613 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001614 element_getitem,
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001615 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001616 element_setitem,
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001617 0,
1618};
1619
1620static PyMappingMethods element_as_mapping = {
1621 (lenfunc) element_length,
1622 (binaryfunc) element_subscr,
1623 (objobjargproc) element_ass_subscr,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001624};
1625
1626statichere PyTypeObject Element_Type = {
1627 PyObject_HEAD_INIT(NULL)
1628 0, "Element", sizeof(ElementObject), 0,
1629 /* methods */
1630 (destructor)element_dealloc, /* tp_dealloc */
1631 0, /* tp_print */
1632 (getattrfunc)element_getattr, /* tp_getattr */
1633 (setattrfunc)element_setattr, /* tp_setattr */
1634 0, /* tp_compare */
1635 (reprfunc)element_repr, /* tp_repr */
1636 0, /* tp_as_number */
1637 &element_as_sequence, /* tp_as_sequence */
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001638 &element_as_mapping, /* tp_as_mapping */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001639};
1640
1641/* ==================================================================== */
1642/* the tree builder type */
1643
1644typedef struct {
1645 PyObject_HEAD
1646
1647 PyObject* root; /* root node (first created node) */
1648
1649 ElementObject* this; /* current node */
1650 ElementObject* last; /* most recently created node */
1651
1652 PyObject* data; /* data collector (string or list), or NULL */
1653
1654 PyObject* stack; /* element stack */
Neal Norwitzc7074382006-06-12 02:06:17 +00001655 Py_ssize_t index; /* current stack size (0=empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001656
1657 /* element tracing */
1658 PyObject* events; /* list of events, or NULL if not collecting */
1659 PyObject* start_event_obj; /* event objects (NULL to ignore) */
1660 PyObject* end_event_obj;
1661 PyObject* start_ns_event_obj;
1662 PyObject* end_ns_event_obj;
1663
1664} TreeBuilderObject;
1665
1666staticforward PyTypeObject TreeBuilder_Type;
1667
Christian Heimese93237d2007-12-19 02:37:44 +00001668#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001669
1670/* -------------------------------------------------------------------- */
1671/* constructor and destructor */
1672
1673LOCAL(PyObject*)
1674treebuilder_new(void)
1675{
1676 TreeBuilderObject* self;
1677
1678 self = PyObject_New(TreeBuilderObject, &TreeBuilder_Type);
1679 if (self == NULL)
1680 return NULL;
1681
1682 self->root = NULL;
1683
1684 Py_INCREF(Py_None);
1685 self->this = (ElementObject*) Py_None;
1686
1687 Py_INCREF(Py_None);
1688 self->last = (ElementObject*) Py_None;
1689
1690 self->data = NULL;
1691
1692 self->stack = PyList_New(20);
1693 self->index = 0;
1694
1695 self->events = NULL;
1696 self->start_event_obj = self->end_event_obj = NULL;
1697 self->start_ns_event_obj = self->end_ns_event_obj = NULL;
1698
1699 ALLOC(sizeof(TreeBuilderObject), "create treebuilder");
1700
1701 return (PyObject*) self;
1702}
1703
1704static PyObject*
Fredrik Lundh81707f12006-06-03 21:56:05 +00001705treebuilder(PyObject* self_, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001706{
1707 if (!PyArg_ParseTuple(args, ":TreeBuilder"))
1708 return NULL;
1709
1710 return treebuilder_new();
1711}
1712
1713static void
1714treebuilder_dealloc(TreeBuilderObject* self)
1715{
1716 Py_XDECREF(self->end_ns_event_obj);
1717 Py_XDECREF(self->start_ns_event_obj);
1718 Py_XDECREF(self->end_event_obj);
1719 Py_XDECREF(self->start_event_obj);
1720 Py_XDECREF(self->events);
1721 Py_DECREF(self->stack);
1722 Py_XDECREF(self->data);
1723 Py_DECREF(self->last);
1724 Py_DECREF(self->this);
1725 Py_XDECREF(self->root);
1726
1727 RELEASE(sizeof(TreeBuilderObject), "destroy treebuilder");
1728
1729 PyObject_Del(self);
1730}
1731
Serhiy Storchaka45cf0b72015-12-06 23:51:53 +02001732LOCAL(int)
1733treebuilder_append_event(TreeBuilderObject *self, PyObject *action,
1734 PyObject *node)
1735{
1736 if (action != NULL) {
1737 PyObject *res = PyTuple_Pack(2, action, node);
1738 if (res == NULL)
1739 return -1;
1740 if (PyList_Append(self->events, res) < 0) {
1741 Py_DECREF(res);
1742 return -1;
1743 }
1744 Py_DECREF(res);
1745 }
1746 return 0;
1747}
1748
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001749/* -------------------------------------------------------------------- */
1750/* handlers */
1751
1752LOCAL(PyObject*)
1753treebuilder_handle_xml(TreeBuilderObject* self, PyObject* encoding,
1754 PyObject* standalone)
1755{
1756 Py_RETURN_NONE;
1757}
1758
1759LOCAL(PyObject*)
1760treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
1761 PyObject* attrib)
1762{
1763 PyObject* node;
1764 PyObject* this;
1765
1766 if (self->data) {
1767 if (self->this == self->last) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001768 Py_DECREF(JOIN_OBJ(self->last->text));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001769 self->last->text = JOIN_SET(
1770 self->data, PyList_CheckExact(self->data)
1771 );
1772 } else {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001773 Py_DECREF(JOIN_OBJ(self->last->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001774 self->last->tail = JOIN_SET(
1775 self->data, PyList_CheckExact(self->data)
1776 );
1777 }
1778 self->data = NULL;
1779 }
1780
1781 node = element_new(tag, attrib);
1782 if (!node)
1783 return NULL;
1784
1785 this = (PyObject*) self->this;
1786
1787 if (this != Py_None) {
1788 if (element_add_subelement((ElementObject*) this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001789 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001790 } else {
1791 if (self->root) {
1792 PyErr_SetString(
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001793 elementtree_parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001794 "multiple elements on top level"
1795 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001796 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001797 }
1798 Py_INCREF(node);
1799 self->root = node;
1800 }
1801
1802 if (self->index < PyList_GET_SIZE(self->stack)) {
1803 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001804 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001805 Py_INCREF(this);
1806 } else {
1807 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001808 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001809 }
1810 self->index++;
1811
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001812 Py_INCREF(node);
Serhiy Storchaka763a61c2016-04-10 18:05:12 +03001813 Py_SETREF(self->this, (ElementObject*) node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001814
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001815 Py_INCREF(node);
Serhiy Storchaka763a61c2016-04-10 18:05:12 +03001816 Py_SETREF(self->last, (ElementObject*) node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001817
Serhiy Storchaka45cf0b72015-12-06 23:51:53 +02001818 if (treebuilder_append_event(self, self->start_event_obj, node) < 0)
1819 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001820
1821 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001822
1823 error:
1824 Py_DECREF(node);
1825 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001826}
1827
1828LOCAL(PyObject*)
1829treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
1830{
1831 if (!self->data) {
Fredrik Lundhdc075b92006-08-16 16:47:07 +00001832 if (self->last == (ElementObject*) Py_None) {
1833 /* ignore calls to data before the first call to start */
1834 Py_RETURN_NONE;
1835 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001836 /* store the first item as is */
1837 Py_INCREF(data); self->data = data;
1838 } else {
1839 /* more than one item; use a list to collect items */
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001840 if (PyString_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
1841 PyString_CheckExact(data) && PyString_GET_SIZE(data) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001842 /* expat often generates single character data sections; handle
1843 the most common case by resizing the existing string... */
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001844 Py_ssize_t size = PyString_GET_SIZE(self->data);
1845 if (_PyString_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001846 return NULL;
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001847 PyString_AS_STRING(self->data)[size] = PyString_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001848 } else if (PyList_CheckExact(self->data)) {
1849 if (PyList_Append(self->data, data) < 0)
1850 return NULL;
1851 } else {
1852 PyObject* list = PyList_New(2);
1853 if (!list)
1854 return NULL;
1855 PyList_SET_ITEM(list, 0, self->data);
1856 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
1857 self->data = list;
1858 }
1859 }
1860
1861 Py_RETURN_NONE;
1862}
1863
1864LOCAL(PyObject*)
1865treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
1866{
Serhiy Storchaka2e6c8292015-12-27 15:41:58 +02001867 ElementObject *item;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001868
1869 if (self->data) {
1870 if (self->this == self->last) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001871 Py_DECREF(JOIN_OBJ(self->last->text));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001872 self->last->text = JOIN_SET(
1873 self->data, PyList_CheckExact(self->data)
1874 );
1875 } else {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001876 Py_DECREF(JOIN_OBJ(self->last->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001877 self->last->tail = JOIN_SET(
1878 self->data, PyList_CheckExact(self->data)
1879 );
1880 }
1881 self->data = NULL;
1882 }
1883
1884 if (self->index == 0) {
1885 PyErr_SetString(
1886 PyExc_IndexError,
1887 "pop from empty stack"
1888 );
1889 return NULL;
1890 }
1891
Serhiy Storchaka2e6c8292015-12-27 15:41:58 +02001892 item = self->last;
1893 self->last = self->this;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001894 self->index--;
Serhiy Storchaka2e6c8292015-12-27 15:41:58 +02001895 self->this = (ElementObject *) PyList_GET_ITEM(self->stack, self->index);
1896 Py_INCREF(self->this);
1897 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001898
Serhiy Storchaka45cf0b72015-12-06 23:51:53 +02001899 if (treebuilder_append_event(self, self->end_event_obj, (PyObject*)self->last) < 0)
1900 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001901
1902 Py_INCREF(self->last);
1903 return (PyObject*) self->last;
1904}
1905
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001906/* -------------------------------------------------------------------- */
1907/* methods (in alphabetical order) */
1908
1909static PyObject*
1910treebuilder_data(TreeBuilderObject* self, PyObject* args)
1911{
1912 PyObject* data;
1913 if (!PyArg_ParseTuple(args, "O:data", &data))
1914 return NULL;
1915
1916 return treebuilder_handle_data(self, data);
1917}
1918
1919static PyObject*
1920treebuilder_end(TreeBuilderObject* self, PyObject* args)
1921{
1922 PyObject* tag;
1923 if (!PyArg_ParseTuple(args, "O:end", &tag))
1924 return NULL;
1925
1926 return treebuilder_handle_end(self, tag);
1927}
1928
1929LOCAL(PyObject*)
1930treebuilder_done(TreeBuilderObject* self)
1931{
1932 PyObject* res;
1933
1934 /* FIXME: check stack size? */
1935
1936 if (self->root)
1937 res = self->root;
1938 else
1939 res = Py_None;
1940
1941 Py_INCREF(res);
1942 return res;
1943}
1944
1945static PyObject*
1946treebuilder_close(TreeBuilderObject* self, PyObject* args)
1947{
1948 if (!PyArg_ParseTuple(args, ":close"))
1949 return NULL;
1950
1951 return treebuilder_done(self);
1952}
1953
1954static PyObject*
1955treebuilder_start(TreeBuilderObject* self, PyObject* args)
1956{
1957 PyObject* tag;
1958 PyObject* attrib = Py_None;
1959 if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib))
1960 return NULL;
1961
1962 return treebuilder_handle_start(self, tag, attrib);
1963}
1964
1965static PyObject*
1966treebuilder_xml(TreeBuilderObject* self, PyObject* args)
1967{
1968 PyObject* encoding;
1969 PyObject* standalone;
1970 if (!PyArg_ParseTuple(args, "OO:xml", &encoding, &standalone))
1971 return NULL;
1972
1973 return treebuilder_handle_xml(self, encoding, standalone);
1974}
1975
1976static PyMethodDef treebuilder_methods[] = {
1977 {"data", (PyCFunction) treebuilder_data, METH_VARARGS},
1978 {"start", (PyCFunction) treebuilder_start, METH_VARARGS},
1979 {"end", (PyCFunction) treebuilder_end, METH_VARARGS},
1980 {"xml", (PyCFunction) treebuilder_xml, METH_VARARGS},
1981 {"close", (PyCFunction) treebuilder_close, METH_VARARGS},
1982 {NULL, NULL}
1983};
1984
1985static PyObject*
1986treebuilder_getattr(TreeBuilderObject* self, char* name)
1987{
1988 return Py_FindMethod(treebuilder_methods, (PyObject*) self, name);
1989}
1990
1991statichere PyTypeObject TreeBuilder_Type = {
1992 PyObject_HEAD_INIT(NULL)
1993 0, "TreeBuilder", sizeof(TreeBuilderObject), 0,
1994 /* methods */
1995 (destructor)treebuilder_dealloc, /* tp_dealloc */
1996 0, /* tp_print */
1997 (getattrfunc)treebuilder_getattr, /* tp_getattr */
1998};
1999
2000/* ==================================================================== */
2001/* the expat interface */
2002
2003#if defined(USE_EXPAT)
2004
2005#include "expat.h"
2006
2007#if defined(USE_PYEXPAT_CAPI)
2008#include "pyexpat.h"
2009static struct PyExpat_CAPI* expat_capi;
2010#define EXPAT(func) (expat_capi->func)
2011#else
2012#define EXPAT(func) (XML_##func)
2013#endif
2014
2015typedef struct {
2016 PyObject_HEAD
2017
2018 XML_Parser parser;
2019
2020 PyObject* target;
2021 PyObject* entity;
2022
2023 PyObject* names;
2024
2025 PyObject* handle_xml;
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002026
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002027 PyObject* handle_start;
2028 PyObject* handle_data;
2029 PyObject* handle_end;
2030
2031 PyObject* handle_comment;
2032 PyObject* handle_pi;
2033
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002034 PyObject* handle_close;
2035
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002036} XMLParserObject;
2037
2038staticforward PyTypeObject XMLParser_Type;
2039
2040/* helpers */
2041
2042#if defined(Py_USING_UNICODE)
2043LOCAL(int)
2044checkstring(const char* string, int size)
2045{
2046 int i;
2047
2048 /* check if an 8-bit string contains UTF-8 characters */
2049 for (i = 0; i < size; i++)
2050 if (string[i] & 0x80)
2051 return 1;
2052
2053 return 0;
2054}
2055#endif
2056
2057LOCAL(PyObject*)
2058makestring(const char* string, int size)
2059{
2060 /* convert a UTF-8 string to either a 7-bit ascii string or a
2061 Unicode string */
2062
2063#if defined(Py_USING_UNICODE)
2064 if (checkstring(string, size))
2065 return PyUnicode_DecodeUTF8(string, size, "strict");
2066#endif
2067
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002068 return PyString_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002069}
2070
2071LOCAL(PyObject*)
2072makeuniversal(XMLParserObject* self, const char* string)
2073{
2074 /* convert a UTF-8 tag/attribute name from the expat parser
2075 to a universal name string */
2076
2077 int size = strlen(string);
2078 PyObject* key;
2079 PyObject* value;
2080
2081 /* look the 'raw' name up in the names dictionary */
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002082 key = PyString_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002083 if (!key)
2084 return NULL;
2085
2086 value = PyDict_GetItem(self->names, key);
2087
2088 if (value) {
2089 Py_INCREF(value);
2090 } else {
2091 /* new name. convert to universal name, and decode as
2092 necessary */
2093
2094 PyObject* tag;
2095 char* p;
2096 int i;
2097
2098 /* look for namespace separator */
2099 for (i = 0; i < size; i++)
2100 if (string[i] == '}')
2101 break;
2102 if (i != size) {
2103 /* convert to universal name */
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002104 tag = PyString_FromStringAndSize(NULL, size+1);
2105 p = PyString_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002106 p[0] = '{';
2107 memcpy(p+1, string, size);
2108 size++;
2109 } else {
2110 /* plain name; use key as tag */
2111 Py_INCREF(key);
2112 tag = key;
2113 }
2114
2115 /* decode universal name */
2116#if defined(Py_USING_UNICODE)
2117 /* inline makestring, to avoid duplicating the source string if
Martin Panter6a8163a2016-04-15 02:14:19 +00002118 it's not a utf-8 string */
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002119 p = PyString_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002120 if (checkstring(p, size)) {
2121 value = PyUnicode_DecodeUTF8(p, size, "strict");
2122 Py_DECREF(tag);
2123 if (!value) {
2124 Py_DECREF(key);
2125 return NULL;
2126 }
2127 } else
2128#endif
2129 value = tag; /* use tag as is */
2130
2131 /* add to names dictionary */
2132 if (PyDict_SetItem(self->names, key, value) < 0) {
2133 Py_DECREF(key);
2134 Py_DECREF(value);
2135 return NULL;
2136 }
2137 }
2138
2139 Py_DECREF(key);
2140 return value;
2141}
2142
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002143static void
2144expat_set_error(const char* message, int line, int column)
2145{
2146 PyObject *error;
2147 PyObject *position;
2148 char buffer[256];
2149
2150 sprintf(buffer, "%s: line %d, column %d", message, line, column);
2151
2152 error = PyObject_CallFunction(elementtree_parseerror_obj, "s", buffer);
2153 if (!error)
2154 return;
2155
2156 /* add position attribute */
2157 position = Py_BuildValue("(ii)", line, column);
2158 if (!position) {
2159 Py_DECREF(error);
2160 return;
2161 }
2162 if (PyObject_SetAttrString(error, "position", position) == -1) {
2163 Py_DECREF(error);
2164 Py_DECREF(position);
2165 return;
2166 }
2167 Py_DECREF(position);
2168
2169 PyErr_SetObject(elementtree_parseerror_obj, error);
2170 Py_DECREF(error);
2171}
2172
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002173/* -------------------------------------------------------------------- */
2174/* handlers */
2175
2176static void
2177expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2178 int data_len)
2179{
2180 PyObject* key;
2181 PyObject* value;
2182 PyObject* res;
2183
2184 if (data_len < 2 || data_in[0] != '&')
2185 return;
2186
2187 key = makestring(data_in + 1, data_len - 2);
2188 if (!key)
2189 return;
2190
2191 value = PyDict_GetItem(self->entity, key);
2192
2193 if (value) {
2194 if (TreeBuilder_CheckExact(self->target))
2195 res = treebuilder_handle_data(
2196 (TreeBuilderObject*) self->target, value
2197 );
2198 else if (self->handle_data)
2199 res = PyObject_CallFunction(self->handle_data, "O", value);
2200 else
2201 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002202 Py_XDECREF(res);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002203 } else if (!PyErr_Occurred()) {
2204 /* Report the first error, not the last */
2205 char message[128];
2206 sprintf(message, "undefined entity &%.100s;", PyString_AS_STRING(key));
2207 expat_set_error(
2208 message,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002209 EXPAT(GetErrorLineNumber)(self->parser),
2210 EXPAT(GetErrorColumnNumber)(self->parser)
2211 );
2212 }
2213
2214 Py_DECREF(key);
2215}
2216
2217static void
2218expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2219 const XML_Char **attrib_in)
2220{
2221 PyObject* res;
2222 PyObject* tag;
2223 PyObject* attrib;
2224 int ok;
2225
2226 /* tag name */
2227 tag = makeuniversal(self, tag_in);
2228 if (!tag)
2229 return; /* parser will look for errors */
2230
2231 /* attributes */
2232 if (attrib_in[0]) {
2233 attrib = PyDict_New();
Serhiy Storchaka33ea2972015-12-09 19:44:30 +02002234 if (!attrib) {
2235 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002236 return;
Serhiy Storchaka33ea2972015-12-09 19:44:30 +02002237 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002238 while (attrib_in[0] && attrib_in[1]) {
2239 PyObject* key = makeuniversal(self, attrib_in[0]);
2240 PyObject* value = makestring(attrib_in[1], strlen(attrib_in[1]));
2241 if (!key || !value) {
2242 Py_XDECREF(value);
2243 Py_XDECREF(key);
2244 Py_DECREF(attrib);
Serhiy Storchaka33ea2972015-12-09 19:44:30 +02002245 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002246 return;
2247 }
2248 ok = PyDict_SetItem(attrib, key, value);
2249 Py_DECREF(value);
2250 Py_DECREF(key);
2251 if (ok < 0) {
2252 Py_DECREF(attrib);
Serhiy Storchaka33ea2972015-12-09 19:44:30 +02002253 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002254 return;
2255 }
2256 attrib_in += 2;
2257 }
2258 } else {
2259 Py_INCREF(Py_None);
2260 attrib = Py_None;
2261 }
2262
2263 if (TreeBuilder_CheckExact(self->target))
2264 /* shortcut */
2265 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2266 tag, attrib);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002267 else if (self->handle_start) {
2268 if (attrib == Py_None) {
2269 Py_DECREF(attrib);
2270 attrib = PyDict_New();
Serhiy Storchaka33ea2972015-12-09 19:44:30 +02002271 if (!attrib) {
2272 Py_DECREF(tag);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002273 return;
Serhiy Storchaka33ea2972015-12-09 19:44:30 +02002274 }
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002275 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002276 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002277 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002278 res = NULL;
2279
2280 Py_DECREF(tag);
2281 Py_DECREF(attrib);
2282
2283 Py_XDECREF(res);
2284}
2285
2286static void
2287expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2288 int data_len)
2289{
2290 PyObject* data;
2291 PyObject* res;
2292
2293 data = makestring(data_in, data_len);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002294 if (!data)
2295 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002296
2297 if (TreeBuilder_CheckExact(self->target))
2298 /* shortcut */
2299 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
2300 else if (self->handle_data)
2301 res = PyObject_CallFunction(self->handle_data, "O", data);
2302 else
2303 res = NULL;
2304
2305 Py_DECREF(data);
2306
2307 Py_XDECREF(res);
2308}
2309
2310static void
2311expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
2312{
2313 PyObject* tag;
2314 PyObject* res = NULL;
2315
2316 if (TreeBuilder_CheckExact(self->target))
2317 /* shortcut */
2318 /* the standard tree builder doesn't look at the end tag */
2319 res = treebuilder_handle_end(
2320 (TreeBuilderObject*) self->target, Py_None
2321 );
2322 else if (self->handle_end) {
2323 tag = makeuniversal(self, tag_in);
2324 if (tag) {
2325 res = PyObject_CallFunction(self->handle_end, "O", tag);
2326 Py_DECREF(tag);
2327 }
2328 }
2329
2330 Py_XDECREF(res);
2331}
2332
2333static void
2334expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
2335 const XML_Char *uri)
2336{
Serhiy Storchaka45cf0b72015-12-06 23:51:53 +02002337 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
2338 PyObject *parcel;
2339 PyObject *sprefix = NULL;
2340 PyObject *suri = NULL;
2341
2342 if (PyErr_Occurred())
2343 return;
2344
2345 if (!target->events || !target->start_ns_event_obj)
2346 return;
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002347
Eli Benderskyf933e082013-11-28 06:25:45 -08002348 if (uri)
Eli Bendersky71142c42013-11-28 06:37:25 -08002349 suri = makestring(uri, strlen(uri));
Eli Benderskyf933e082013-11-28 06:25:45 -08002350 else
Eli Bendersky71142c42013-11-28 06:37:25 -08002351 suri = PyString_FromStringAndSize("", 0);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002352 if (!suri)
2353 return;
2354
2355 if (prefix)
2356 sprefix = makestring(prefix, strlen(prefix));
2357 else
2358 sprefix = PyString_FromStringAndSize("", 0);
2359 if (!sprefix) {
2360 Py_DECREF(suri);
2361 return;
2362 }
2363
Serhiy Storchaka45cf0b72015-12-06 23:51:53 +02002364 parcel = PyTuple_Pack(2, sprefix, suri);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002365 Py_DECREF(sprefix);
2366 Py_DECREF(suri);
Serhiy Storchaka45cf0b72015-12-06 23:51:53 +02002367 if (!parcel)
2368 return;
2369 treebuilder_append_event(target, target->start_ns_event_obj, parcel);
2370 Py_DECREF(parcel);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002371}
2372
2373static void
2374expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
2375{
Serhiy Storchaka45cf0b72015-12-06 23:51:53 +02002376 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
2377
2378 if (PyErr_Occurred())
2379 return;
2380
2381 if (!target->events)
2382 return;
2383
2384 treebuilder_append_event(target, target->end_ns_event_obj, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002385}
2386
2387static void
2388expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
2389{
2390 PyObject* comment;
2391 PyObject* res;
2392
2393 if (self->handle_comment) {
2394 comment = makestring(comment_in, strlen(comment_in));
2395 if (comment) {
2396 res = PyObject_CallFunction(self->handle_comment, "O", comment);
2397 Py_XDECREF(res);
2398 Py_DECREF(comment);
2399 }
2400 }
2401}
2402
2403static void
2404expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
2405 const XML_Char* data_in)
2406{
2407 PyObject* target;
2408 PyObject* data;
2409 PyObject* res;
2410
2411 if (self->handle_pi) {
2412 target = makestring(target_in, strlen(target_in));
2413 data = makestring(data_in, strlen(data_in));
2414 if (target && data) {
2415 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
2416 Py_XDECREF(res);
2417 Py_DECREF(data);
2418 Py_DECREF(target);
2419 } else {
2420 Py_XDECREF(data);
2421 Py_XDECREF(target);
2422 }
2423 }
2424}
2425
2426#if defined(Py_USING_UNICODE)
2427static int
2428expat_unknown_encoding_handler(XMLParserObject *self, const XML_Char *name,
2429 XML_Encoding *info)
2430{
2431 PyObject* u;
2432 Py_UNICODE* p;
2433 unsigned char s[256];
2434 int i;
2435
2436 memset(info, 0, sizeof(XML_Encoding));
2437
2438 for (i = 0; i < 256; i++)
2439 s[i] = i;
2440
Fredrik Lundhc3389992005-12-25 11:40:19 +00002441 u = PyUnicode_Decode((char*) s, 256, name, "replace");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002442 if (!u)
2443 return XML_STATUS_ERROR;
2444
2445 if (PyUnicode_GET_SIZE(u) != 256) {
2446 Py_DECREF(u);
Eli Benderskyb6717012013-08-04 06:09:49 -07002447 PyErr_SetString(PyExc_ValueError,
2448 "multi-byte encodings are not supported");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002449 return XML_STATUS_ERROR;
2450 }
2451
2452 p = PyUnicode_AS_UNICODE(u);
2453
2454 for (i = 0; i < 256; i++) {
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002455 if (p[i] != Py_UNICODE_REPLACEMENT_CHARACTER)
2456 info->map[i] = p[i];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002457 else
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002458 info->map[i] = -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002459 }
2460
2461 Py_DECREF(u);
2462
2463 return XML_STATUS_OK;
2464}
2465#endif
2466
2467/* -------------------------------------------------------------------- */
2468/* constructor and destructor */
2469
2470static PyObject*
Fredrik Lundh81707f12006-06-03 21:56:05 +00002471xmlparser(PyObject* self_, PyObject* args, PyObject* kw)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002472{
2473 XMLParserObject* self;
2474 /* FIXME: does this need to be static? */
2475 static XML_Memory_Handling_Suite memory_handler;
2476
2477 PyObject* target = NULL;
2478 char* encoding = NULL;
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +00002479 static char* kwlist[] = { "target", "encoding", NULL };
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002480 if (!PyArg_ParseTupleAndKeywords(args, kw, "|Oz:XMLParser", kwlist,
2481 &target, &encoding))
2482 return NULL;
2483
2484#if defined(USE_PYEXPAT_CAPI)
2485 if (!expat_capi) {
2486 PyErr_SetString(
2487 PyExc_RuntimeError, "cannot load dispatch table from pyexpat"
2488 );
2489 return NULL;
2490 }
2491#endif
2492
2493 self = PyObject_New(XMLParserObject, &XMLParser_Type);
2494 if (self == NULL)
2495 return NULL;
2496
2497 self->entity = PyDict_New();
2498 if (!self->entity) {
2499 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002500 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002501 }
2502
2503 self->names = PyDict_New();
2504 if (!self->names) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002505 PyObject_Del(self->entity);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002506 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002507 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002508 }
2509
2510 memory_handler.malloc_fcn = PyObject_Malloc;
2511 memory_handler.realloc_fcn = PyObject_Realloc;
2512 memory_handler.free_fcn = PyObject_Free;
2513
2514 self->parser = EXPAT(ParserCreate_MM)(encoding, &memory_handler, "}");
2515 if (!self->parser) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002516 PyObject_Del(self->names);
2517 PyObject_Del(self->entity);
2518 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002519 PyErr_NoMemory();
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002520 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002521 }
2522
2523 /* setup target handlers */
2524 if (!target) {
2525 target = treebuilder_new();
2526 if (!target) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002527 EXPAT(ParserFree)(self->parser);
2528 PyObject_Del(self->names);
2529 PyObject_Del(self->entity);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002530 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002531 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002532 }
2533 } else
2534 Py_INCREF(target);
2535 self->target = target;
2536
2537 self->handle_xml = PyObject_GetAttrString(target, "xml");
2538 self->handle_start = PyObject_GetAttrString(target, "start");
2539 self->handle_data = PyObject_GetAttrString(target, "data");
2540 self->handle_end = PyObject_GetAttrString(target, "end");
2541 self->handle_comment = PyObject_GetAttrString(target, "comment");
2542 self->handle_pi = PyObject_GetAttrString(target, "pi");
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002543 self->handle_close = PyObject_GetAttrString(target, "close");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002544
2545 PyErr_Clear();
2546
2547 /* configure parser */
2548 EXPAT(SetUserData)(self->parser, self);
2549 EXPAT(SetElementHandler)(
2550 self->parser,
2551 (XML_StartElementHandler) expat_start_handler,
2552 (XML_EndElementHandler) expat_end_handler
2553 );
2554 EXPAT(SetDefaultHandlerExpand)(
2555 self->parser,
2556 (XML_DefaultHandler) expat_default_handler
2557 );
2558 EXPAT(SetCharacterDataHandler)(
2559 self->parser,
2560 (XML_CharacterDataHandler) expat_data_handler
2561 );
2562 if (self->handle_comment)
2563 EXPAT(SetCommentHandler)(
2564 self->parser,
2565 (XML_CommentHandler) expat_comment_handler
2566 );
2567 if (self->handle_pi)
2568 EXPAT(SetProcessingInstructionHandler)(
2569 self->parser,
2570 (XML_ProcessingInstructionHandler) expat_pi_handler
2571 );
2572#if defined(Py_USING_UNICODE)
2573 EXPAT(SetUnknownEncodingHandler)(
2574 self->parser,
2575 (XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL
2576 );
2577#endif
2578
2579 ALLOC(sizeof(XMLParserObject), "create expatparser");
2580
2581 return (PyObject*) self;
2582}
2583
2584static void
2585xmlparser_dealloc(XMLParserObject* self)
2586{
2587 EXPAT(ParserFree)(self->parser);
2588
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002589 Py_XDECREF(self->handle_close);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002590 Py_XDECREF(self->handle_pi);
2591 Py_XDECREF(self->handle_comment);
2592 Py_XDECREF(self->handle_end);
2593 Py_XDECREF(self->handle_data);
2594 Py_XDECREF(self->handle_start);
2595 Py_XDECREF(self->handle_xml);
2596
2597 Py_DECREF(self->target);
2598 Py_DECREF(self->entity);
2599 Py_DECREF(self->names);
2600
2601 RELEASE(sizeof(XMLParserObject), "destroy expatparser");
2602
2603 PyObject_Del(self);
2604}
2605
2606/* -------------------------------------------------------------------- */
2607/* methods (in alphabetical order) */
2608
2609LOCAL(PyObject*)
2610expat_parse(XMLParserObject* self, char* data, int data_len, int final)
2611{
2612 int ok;
2613
2614 ok = EXPAT(Parse)(self->parser, data, data_len, final);
2615
2616 if (PyErr_Occurred())
2617 return NULL;
2618
2619 if (!ok) {
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002620 expat_set_error(
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002621 EXPAT(ErrorString)(EXPAT(GetErrorCode)(self->parser)),
2622 EXPAT(GetErrorLineNumber)(self->parser),
2623 EXPAT(GetErrorColumnNumber)(self->parser)
2624 );
2625 return NULL;
2626 }
2627
2628 Py_RETURN_NONE;
2629}
2630
2631static PyObject*
2632xmlparser_close(XMLParserObject* self, PyObject* args)
2633{
2634 /* end feeding data to parser */
2635
2636 PyObject* res;
2637 if (!PyArg_ParseTuple(args, ":close"))
2638 return NULL;
2639
2640 res = expat_parse(self, "", 0, 1);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002641 if (!res)
2642 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002643
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002644 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002645 Py_DECREF(res);
2646 return treebuilder_done((TreeBuilderObject*) self->target);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002647 } if (self->handle_close) {
2648 Py_DECREF(res);
2649 return PyObject_CallFunction(self->handle_close, "");
2650 } else
2651 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002652}
2653
2654static PyObject*
2655xmlparser_feed(XMLParserObject* self, PyObject* args)
2656{
2657 /* feed data to parser */
2658
2659 char* data;
2660 int data_len;
2661 if (!PyArg_ParseTuple(args, "s#:feed", &data, &data_len))
2662 return NULL;
2663
2664 return expat_parse(self, data, data_len, 0);
2665}
2666
2667static PyObject*
2668xmlparser_parse(XMLParserObject* self, PyObject* args)
2669{
2670 /* (internal) parse until end of input stream */
2671
2672 PyObject* reader;
2673 PyObject* buffer;
2674 PyObject* res;
2675
2676 PyObject* fileobj;
2677 if (!PyArg_ParseTuple(args, "O:_parse", &fileobj))
2678 return NULL;
2679
2680 reader = PyObject_GetAttrString(fileobj, "read");
2681 if (!reader)
2682 return NULL;
2683
2684 /* read from open file object */
2685 for (;;) {
2686
2687 buffer = PyObject_CallFunction(reader, "i", 64*1024);
2688
2689 if (!buffer) {
2690 /* read failed (e.g. due to KeyboardInterrupt) */
2691 Py_DECREF(reader);
2692 return NULL;
2693 }
2694
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002695 if (!PyString_CheckExact(buffer) || PyString_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002696 Py_DECREF(buffer);
2697 break;
2698 }
2699
Serhiy Storchakac4c64be2015-11-25 20:12:58 +02002700 if (PyString_GET_SIZE(buffer) > INT_MAX) {
2701 Py_DECREF(buffer);
2702 Py_DECREF(reader);
2703 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
2704 return NULL;
2705 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002706 res = expat_parse(
Serhiy Storchakac4c64be2015-11-25 20:12:58 +02002707 self, PyString_AS_STRING(buffer), (int)PyString_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002708 );
2709
2710 Py_DECREF(buffer);
2711
2712 if (!res) {
2713 Py_DECREF(reader);
2714 return NULL;
2715 }
2716 Py_DECREF(res);
2717
2718 }
2719
2720 Py_DECREF(reader);
2721
2722 res = expat_parse(self, "", 0, 1);
2723
2724 if (res && TreeBuilder_CheckExact(self->target)) {
2725 Py_DECREF(res);
2726 return treebuilder_done((TreeBuilderObject*) self->target);
2727 }
2728
2729 return res;
2730}
2731
2732static PyObject*
2733xmlparser_setevents(XMLParserObject* self, PyObject* args)
2734{
2735 /* activate element event reporting */
2736
Neal Norwitzc7074382006-06-12 02:06:17 +00002737 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002738 TreeBuilderObject* target;
2739
2740 PyObject* events; /* event collector */
2741 PyObject* event_set = Py_None;
2742 if (!PyArg_ParseTuple(args, "O!|O:_setevents", &PyList_Type, &events,
2743 &event_set))
2744 return NULL;
2745
2746 if (!TreeBuilder_CheckExact(self->target)) {
2747 PyErr_SetString(
2748 PyExc_TypeError,
2749 "event handling only supported for cElementTree.Treebuilder "
2750 "targets"
2751 );
2752 return NULL;
2753 }
2754
2755 target = (TreeBuilderObject*) self->target;
2756
2757 Py_INCREF(events);
Serhiy Storchakabc62af12016-04-06 09:51:18 +03002758 Py_XSETREF(target->events, events);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002759
2760 /* clear out existing events */
Serhiy Storchaka98a97222014-02-09 13:14:04 +02002761 Py_CLEAR(target->start_event_obj);
2762 Py_CLEAR(target->end_event_obj);
2763 Py_CLEAR(target->start_ns_event_obj);
2764 Py_CLEAR(target->end_ns_event_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002765
2766 if (event_set == Py_None) {
2767 /* default is "end" only */
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002768 target->end_event_obj = PyString_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002769 Py_RETURN_NONE;
2770 }
2771
2772 if (!PyTuple_Check(event_set)) /* FIXME: handle arbitrary sequences */
2773 goto error;
2774
2775 for (i = 0; i < PyTuple_GET_SIZE(event_set); i++) {
2776 PyObject* item = PyTuple_GET_ITEM(event_set, i);
2777 char* event;
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002778 if (!PyString_Check(item))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002779 goto error;
Serhiy Storchaka20a003b2015-12-24 11:51:24 +02002780 Py_INCREF(item);
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002781 event = PyString_AS_STRING(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002782 if (strcmp(event, "start") == 0) {
Serhiy Storchakabc62af12016-04-06 09:51:18 +03002783 Py_XSETREF(target->start_event_obj, item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002784 } else if (strcmp(event, "end") == 0) {
Serhiy Storchakabc62af12016-04-06 09:51:18 +03002785 Py_XSETREF(target->end_event_obj, item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002786 } else if (strcmp(event, "start-ns") == 0) {
Serhiy Storchakabc62af12016-04-06 09:51:18 +03002787 Py_XSETREF(target->start_ns_event_obj, item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002788 EXPAT(SetNamespaceDeclHandler)(
2789 self->parser,
2790 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2791 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2792 );
2793 } else if (strcmp(event, "end-ns") == 0) {
Serhiy Storchakabc62af12016-04-06 09:51:18 +03002794 Py_XSETREF(target->end_ns_event_obj, item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002795 EXPAT(SetNamespaceDeclHandler)(
2796 self->parser,
2797 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2798 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2799 );
2800 } else {
Serhiy Storchaka20a003b2015-12-24 11:51:24 +02002801 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002802 PyErr_Format(
2803 PyExc_ValueError,
2804 "unknown event '%s'", event
2805 );
2806 return NULL;
2807 }
2808 }
2809
2810 Py_RETURN_NONE;
2811
2812 error:
2813 PyErr_SetString(
2814 PyExc_TypeError,
2815 "invalid event tuple"
2816 );
2817 return NULL;
2818}
2819
2820static PyMethodDef xmlparser_methods[] = {
2821 {"feed", (PyCFunction) xmlparser_feed, METH_VARARGS},
2822 {"close", (PyCFunction) xmlparser_close, METH_VARARGS},
2823 {"_parse", (PyCFunction) xmlparser_parse, METH_VARARGS},
2824 {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
2825 {NULL, NULL}
2826};
2827
2828static PyObject*
2829xmlparser_getattr(XMLParserObject* self, char* name)
2830{
2831 PyObject* res;
2832
2833 res = Py_FindMethod(xmlparser_methods, (PyObject*) self, name);
2834 if (res)
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002835 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002836
2837 PyErr_Clear();
2838
2839 if (strcmp(name, "entity") == 0)
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002840 res = self->entity;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002841 else if (strcmp(name, "target") == 0)
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002842 res = self->target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002843 else if (strcmp(name, "version") == 0) {
2844 char buffer[100];
2845 sprintf(buffer, "Expat %d.%d.%d", XML_MAJOR_VERSION,
2846 XML_MINOR_VERSION, XML_MICRO_VERSION);
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002847 return PyString_FromString(buffer);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002848 } else {
2849 PyErr_SetString(PyExc_AttributeError, name);
2850 return NULL;
2851 }
2852
2853 Py_INCREF(res);
2854 return res;
2855}
2856
2857statichere PyTypeObject XMLParser_Type = {
2858 PyObject_HEAD_INIT(NULL)
2859 0, "XMLParser", sizeof(XMLParserObject), 0,
2860 /* methods */
2861 (destructor)xmlparser_dealloc, /* tp_dealloc */
2862 0, /* tp_print */
2863 (getattrfunc)xmlparser_getattr, /* tp_getattr */
2864};
2865
2866#endif
2867
2868/* ==================================================================== */
2869/* python module interface */
2870
2871static PyMethodDef _functions[] = {
2872 {"Element", (PyCFunction) element, METH_VARARGS|METH_KEYWORDS},
2873 {"SubElement", (PyCFunction) subelement, METH_VARARGS|METH_KEYWORDS},
2874 {"TreeBuilder", (PyCFunction) treebuilder, METH_VARARGS},
2875#if defined(USE_EXPAT)
2876 {"XMLParser", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
2877 {"XMLTreeBuilder", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
2878#endif
2879 {NULL, NULL}
2880};
2881
2882DL_EXPORT(void)
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002883init_elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002884{
2885 PyObject* m;
2886 PyObject* g;
2887 char* bootstrap;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002888
2889 /* Patch object type */
Christian Heimese93237d2007-12-19 02:37:44 +00002890 Py_TYPE(&Element_Type) = Py_TYPE(&TreeBuilder_Type) = &PyType_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002891#if defined(USE_EXPAT)
Christian Heimese93237d2007-12-19 02:37:44 +00002892 Py_TYPE(&XMLParser_Type) = &PyType_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002893#endif
2894
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002895 m = Py_InitModule("_elementtree", _functions);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002896 if (!m)
2897 return;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002898
2899 /* python glue code */
2900
2901 g = PyDict_New();
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002902 if (!g)
2903 return;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002904
2905 PyDict_SetItemString(g, "__builtins__", PyEval_GetBuiltins());
2906
2907 bootstrap = (
2908
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002909 "from copy import copy, deepcopy\n"
2910
2911 "try:\n"
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002912 " from xml.etree import ElementTree\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002913 "except ImportError:\n"
2914 " import ElementTree\n"
2915 "ET = ElementTree\n"
2916 "del ElementTree\n"
2917
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002918 "import _elementtree as cElementTree\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002919
2920 "try:\n" /* check if copy works as is */
2921 " copy(cElementTree.Element('x'))\n"
2922 "except:\n"
2923 " def copyelement(elem):\n"
2924 " return elem\n"
2925
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002926 "class CommentProxy:\n"
2927 " def __call__(self, text=None):\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002928 " element = cElementTree.Element(ET.Comment)\n"
2929 " element.text = text\n"
2930 " return element\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002931 " def __cmp__(self, other):\n"
2932 " return cmp(ET.Comment, other)\n"
2933 "cElementTree.Comment = CommentProxy()\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002934
2935 "class ElementTree(ET.ElementTree):\n" /* public */
2936 " def parse(self, source, parser=None):\n"
Florent Xicluna67d5d0e2011-10-29 03:38:56 +02002937 " close_source = False\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002938 " if not hasattr(source, 'read'):\n"
2939 " source = open(source, 'rb')\n"
Florent Xicluna67d5d0e2011-10-29 03:38:56 +02002940 " close_source = False\n"
2941 " try:\n"
2942 " if parser is not None:\n"
2943 " while 1:\n"
2944 " data = source.read(65536)\n"
2945 " if not data:\n"
2946 " break\n"
2947 " parser.feed(data)\n"
2948 " self._root = parser.close()\n"
2949 " else:\n"
2950 " parser = cElementTree.XMLParser()\n"
2951 " self._root = parser._parse(source)\n"
2952 " return self._root\n"
2953 " finally:\n"
2954 " if close_source:\n"
2955 " source.close()\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002956 "cElementTree.ElementTree = ElementTree\n"
2957
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002958 "def iter(node, tag=None):\n" /* helper */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002959 " if tag == '*':\n"
2960 " tag = None\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002961 " if tag is None or node.tag == tag:\n"
2962 " yield node\n"
2963 " for node in node:\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002964 " for node in iter(node, tag):\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002965 " yield node\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002966
2967 "def itertext(node):\n" /* helper */
2968 " if node.text:\n"
2969 " yield node.text\n"
2970 " for e in node:\n"
2971 " for s in e.itertext():\n"
2972 " yield s\n"
2973 " if e.tail:\n"
2974 " yield e.tail\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002975
2976 "def parse(source, parser=None):\n" /* public */
2977 " tree = ElementTree()\n"
2978 " tree.parse(source, parser)\n"
2979 " return tree\n"
2980 "cElementTree.parse = parse\n"
2981
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002982 "class iterparse(object):\n"
2983 " root = None\n"
2984 " def __init__(self, file, events=None):\n"
Florent Xicluna67d5d0e2011-10-29 03:38:56 +02002985 " self._close_file = False\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002986 " if not hasattr(file, 'read'):\n"
2987 " file = open(file, 'rb')\n"
Florent Xicluna67d5d0e2011-10-29 03:38:56 +02002988 " self._close_file = True\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002989 " self._file = file\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002990 " self._events = []\n"
2991 " self._index = 0\n"
Florent Xicluna0965ee22011-11-01 23:34:41 +01002992 " self._error = None\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002993 " self.root = self._root = None\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002994 " b = cElementTree.TreeBuilder()\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002995 " self._parser = cElementTree.XMLParser(b)\n"
2996 " self._parser._setevents(self._events, events)\n"
2997 " def next(self):\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002998 " while 1:\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002999 " try:\n"
3000 " item = self._events[self._index]\n"
Florent Xicluna0965ee22011-11-01 23:34:41 +01003001 " self._index += 1\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003002 " return item\n"
Florent Xicluna0965ee22011-11-01 23:34:41 +01003003 " except IndexError:\n"
3004 " pass\n"
3005 " if self._error:\n"
3006 " e = self._error\n"
3007 " self._error = None\n"
3008 " raise e\n"
3009 " if self._parser is None:\n"
3010 " self.root = self._root\n"
3011 " if self._close_file:\n"
3012 " self._file.close()\n"
3013 " raise StopIteration\n"
3014 " # load event buffer\n"
3015 " del self._events[:]\n"
3016 " self._index = 0\n"
3017 " data = self._file.read(16384)\n"
3018 " if data:\n"
3019 " try:\n"
3020 " self._parser.feed(data)\n"
3021 " except SyntaxError as exc:\n"
3022 " self._error = exc\n"
3023 " else:\n"
3024 " self._root = self._parser.close()\n"
3025 " self._parser = None\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003026 " def __iter__(self):\n"
3027 " return self\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003028 "cElementTree.iterparse = iterparse\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003029
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003030 "class PIProxy:\n"
3031 " def __call__(self, target, text=None):\n"
3032 " element = cElementTree.Element(ET.PI)\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003033 " element.text = target\n"
3034 " if text:\n"
3035 " element.text = element.text + ' ' + text\n"
3036 " return element\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003037 " def __cmp__(self, other):\n"
3038 " return cmp(ET.PI, other)\n"
3039 "cElementTree.PI = cElementTree.ProcessingInstruction = PIProxy()\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003040
3041 "def XML(text):\n" /* public */
3042 " parser = cElementTree.XMLParser()\n"
3043 " parser.feed(text)\n"
3044 " return parser.close()\n"
3045 "cElementTree.XML = cElementTree.fromstring = XML\n"
3046
3047 "def XMLID(text):\n" /* public */
3048 " tree = XML(text)\n"
3049 " ids = {}\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003050 " for elem in tree.iter():\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003051 " id = elem.get('id')\n"
3052 " if id:\n"
3053 " ids[id] = elem\n"
3054 " return tree, ids\n"
3055 "cElementTree.XMLID = XMLID\n"
3056
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003057 "try:\n"
3058 " register_namespace = ET.register_namespace\n"
3059 "except AttributeError:\n"
3060 " def register_namespace(prefix, uri):\n"
3061 " ET._namespace_map[uri] = prefix\n"
3062 "cElementTree.register_namespace = register_namespace\n"
3063
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003064 "cElementTree.dump = ET.dump\n"
3065 "cElementTree.ElementPath = ElementPath = ET.ElementPath\n"
3066 "cElementTree.iselement = ET.iselement\n"
3067 "cElementTree.QName = ET.QName\n"
3068 "cElementTree.tostring = ET.tostring\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003069 "cElementTree.fromstringlist = ET.fromstringlist\n"
3070 "cElementTree.tostringlist = ET.tostringlist\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003071 "cElementTree.VERSION = '" VERSION "'\n"
3072 "cElementTree.__version__ = '" VERSION "'\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003073
3074 );
3075
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003076 if (!PyRun_String(bootstrap, Py_file_input, g, NULL))
3077 return;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003078
3079 elementpath_obj = PyDict_GetItemString(g, "ElementPath");
3080
3081 elementtree_copyelement_obj = PyDict_GetItemString(g, "copyelement");
3082 if (elementtree_copyelement_obj) {
3083 /* reduce hack needed; enable reduce method */
3084 PyMethodDef* mp;
3085 for (mp = element_methods; mp->ml_name; mp++)
3086 if (mp->ml_meth == (PyCFunction) element_reduce) {
3087 mp->ml_name = "__reduce__";
3088 break;
3089 }
3090 } else
3091 PyErr_Clear();
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003092
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003093 elementtree_deepcopy_obj = PyDict_GetItemString(g, "deepcopy");
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003094 elementtree_iter_obj = PyDict_GetItemString(g, "iter");
3095 elementtree_itertext_obj = PyDict_GetItemString(g, "itertext");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003096
3097#if defined(USE_PYEXPAT_CAPI)
3098 /* link against pyexpat, if possible */
Larry Hastings402b73f2010-03-25 00:54:54 +00003099 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003100 if (expat_capi) {
3101 /* check that it's usable */
3102 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
3103 expat_capi->size < sizeof(struct PyExpat_CAPI) ||
3104 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
3105 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
3106 expat_capi->MICRO_VERSION != XML_MICRO_VERSION)
3107 expat_capi = NULL;
3108 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003109#endif
3110
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003111 elementtree_parseerror_obj = PyErr_NewException(
3112 "cElementTree.ParseError", PyExc_SyntaxError, NULL
3113 );
3114 Py_INCREF(elementtree_parseerror_obj);
3115 PyModule_AddObject(m, "ParseError", elementtree_parseerror_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003116}