blob: df97b5e698fd4d3dfd2bf2ca22d008fcdee43f71 [file] [log] [blame]
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001/*
2 * ElementTree
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003 * $Id: _elementtree.c 3473 2009-01-11 22:53:55Z fredrik $
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
5 * elementtree accelerator
6 *
7 * History:
8 * 1999-06-20 fl created (as part of sgmlop)
9 * 2001-05-29 fl effdom edition
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000010 * 2003-02-27 fl elementtree edition (alpha)
11 * 2004-06-03 fl updates for elementtree 1.2
Florent Xicluna3e8c1892010-03-11 14:36:19 +000012 * 2005-01-05 fl major optimization effort
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000013 * 2005-01-11 fl first public release (cElementTree 0.8)
14 * 2005-01-12 fl split element object into base and extras
15 * 2005-01-13 fl use tagged pointers for tail/text (cElementTree 0.9)
16 * 2005-01-17 fl added treebuilder close method
17 * 2005-01-17 fl fixed crash in getchildren
18 * 2005-01-18 fl removed observer api, added iterparse (cElementTree 0.9.3)
19 * 2005-01-23 fl revised iterparse api; added namespace event support (0.9.8)
20 * 2005-01-26 fl added VERSION module property (cElementTree 1.0)
21 * 2005-01-28 fl added remove method (1.0.1)
22 * 2005-03-01 fl added iselement function; fixed makeelement aliasing (1.0.2)
23 * 2005-03-13 fl export Comment and ProcessingInstruction/PI helpers
24 * 2005-03-26 fl added Comment and PI support to XMLParser
25 * 2005-03-27 fl event optimizations; complain about bogus events
26 * 2005-08-08 fl fixed read error handling in parse
27 * 2005-08-11 fl added runtime test for copy workaround (1.0.3)
28 * 2005-12-13 fl added expat_capi support (for xml.etree) (1.0.4)
29 * 2005-12-16 fl added support for non-standard encodings
Fredrik Lundh44ed4db2006-03-12 21:06:35 +000030 * 2006-03-08 fl fixed a couple of potential null-refs and leaks
31 * 2006-03-12 fl merge in 2.5 ssize_t changes
Florent Xicluna3e8c1892010-03-11 14:36:19 +000032 * 2007-08-25 fl call custom builder's close method from XMLParser
33 * 2007-08-31 fl added iter, extend from ET 1.3
34 * 2007-09-01 fl fixed ParseError exception, setslice source type, etc
35 * 2007-09-03 fl fixed handling of negative insert indexes
36 * 2007-09-04 fl added itertext from ET 1.3
37 * 2007-09-06 fl added position attribute to ParseError exception
38 * 2008-06-06 fl delay error reporting in iterparse (from Hrvoje Niksic)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000039 *
Florent Xicluna3e8c1892010-03-11 14:36:19 +000040 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
41 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000042 *
43 * info@pythonware.com
44 * http://www.pythonware.com
45 */
46
Fredrik Lundh6d52b552005-12-16 22:06:43 +000047/* Licensed to PSF under a Contributor Agreement. */
Florent Xicluna3e8c1892010-03-11 14:36:19 +000048/* See http://www.python.org/psf/license for licensing details. */
Fredrik Lundh6d52b552005-12-16 22:06:43 +000049
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000050#include "Python.h"
51
Fredrik Lundhdc075b92006-08-16 16:47:07 +000052#define VERSION "1.0.6"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000053
54/* -------------------------------------------------------------------- */
55/* configuration */
56
57/* Leave defined to include the expat-based XMLParser type */
58#define USE_EXPAT
59
Florent Xicluna3e8c1892010-03-11 14:36:19 +000060/* Define to do all expat calls via pyexpat's embedded expat library */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000061/* #define USE_PYEXPAT_CAPI */
62
63/* An element can hold this many children without extra memory
64 allocations. */
65#define STATIC_CHILDREN 4
66
67/* For best performance, chose a value so that 80-90% of all nodes
68 have no more than the given number of children. Set this to zero
69 to minimize the size of the element structure itself (this only
70 helps if you have lots of leaf nodes with attributes). */
71
72/* Also note that pymalloc always allocates blocks in multiples of
73 eight bytes. For the current version of cElementTree, this means
74 that the number of children should be an even number, at least on
75 32-bit platforms. */
76
77/* -------------------------------------------------------------------- */
78
79#if 0
80static int memory = 0;
81#define ALLOC(size, comment)\
82do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
83#define RELEASE(size, comment)\
84do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
85#else
86#define ALLOC(size, comment)
87#define RELEASE(size, comment)
88#endif
89
90/* compiler tweaks */
91#if defined(_MSC_VER)
92#define LOCAL(type) static __inline type __fastcall
93#else
94#define LOCAL(type) static type
95#endif
96
97/* compatibility macros */
Florent Xicluna3e8c1892010-03-11 14:36:19 +000098#if (PY_VERSION_HEX < 0x02060000)
99#define Py_REFCNT(ob) (((PyObject*)(ob))->ob_refcnt)
100#define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)
101#endif
102
Martin v. Löwis18e16552006-02-15 17:27:45 +0000103#if (PY_VERSION_HEX < 0x02050000)
104typedef int Py_ssize_t;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000105#define lenfunc inquiry
Martin v. Löwis18e16552006-02-15 17:27:45 +0000106#endif
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000107
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000108#if (PY_VERSION_HEX < 0x02040000)
109#define PyDict_CheckExact PyDict_Check
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000110
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000111#if !defined(Py_RETURN_NONE)
112#define Py_RETURN_NONE return Py_INCREF(Py_None), Py_None
113#endif
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000114#endif
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000115
116/* macros used to store 'join' flags in string object pointers. note
117 that all use of text and tail as object pointers must be wrapped in
118 JOIN_OBJ. see comments in the ElementObject definition for more
119 info. */
120#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
121#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
122#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~1))
123
124/* glue functions (see the init function for details) */
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000125static PyObject* elementtree_parseerror_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000126static PyObject* elementtree_copyelement_obj;
127static PyObject* elementtree_deepcopy_obj;
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000128static PyObject* elementtree_iter_obj;
129static PyObject* elementtree_itertext_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000130static PyObject* elementpath_obj;
131
132/* helpers */
133
134LOCAL(PyObject*)
135deepcopy(PyObject* object, PyObject* memo)
136{
137 /* do a deep copy of the given object */
138
139 PyObject* args;
140 PyObject* result;
141
142 if (!elementtree_deepcopy_obj) {
143 PyErr_SetString(
144 PyExc_RuntimeError,
145 "deepcopy helper not found"
146 );
147 return NULL;
148 }
149
150 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000151 if (!args)
152 return NULL;
153
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000154 Py_INCREF(object); PyTuple_SET_ITEM(args, 0, (PyObject*) object);
155 Py_INCREF(memo); PyTuple_SET_ITEM(args, 1, (PyObject*) memo);
156
157 result = PyObject_CallObject(elementtree_deepcopy_obj, args);
158
159 Py_DECREF(args);
160
161 return result;
162}
163
164LOCAL(PyObject*)
165list_join(PyObject* list)
166{
167 /* join list elements (destroying the list in the process) */
168
169 PyObject* joiner;
170 PyObject* function;
171 PyObject* args;
172 PyObject* result;
173
174 switch (PyList_GET_SIZE(list)) {
175 case 0:
176 Py_DECREF(list);
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000177 return PyString_FromString("");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000178 case 1:
179 result = PyList_GET_ITEM(list, 0);
180 Py_INCREF(result);
181 Py_DECREF(list);
182 return result;
183 }
184
185 /* two or more elements: slice out a suitable separator from the
186 first member, and use that to join the entire list */
187
188 joiner = PySequence_GetSlice(PyList_GET_ITEM(list, 0), 0, 0);
189 if (!joiner)
190 return NULL;
191
192 function = PyObject_GetAttrString(joiner, "join");
193 if (!function) {
194 Py_DECREF(joiner);
195 return NULL;
196 }
197
198 args = PyTuple_New(1);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000199 if (!args)
200 return NULL;
201
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000202 PyTuple_SET_ITEM(args, 0, list);
203
204 result = PyObject_CallObject(function, args);
205
206 Py_DECREF(args); /* also removes list */
207 Py_DECREF(function);
208 Py_DECREF(joiner);
209
210 return result;
211}
212
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000213/* -------------------------------------------------------------------- */
214/* the element type */
215
216typedef struct {
217
218 /* attributes (a dictionary object), or None if no attributes */
219 PyObject* attrib;
220
221 /* child elements */
222 int length; /* actual number of items */
223 int allocated; /* allocated items */
224
225 /* this either points to _children or to a malloced buffer */
226 PyObject* *children;
227
228 PyObject* _children[STATIC_CHILDREN];
229
230} ElementObjectExtra;
231
232typedef struct {
233 PyObject_HEAD
234
235 /* element tag (a string). */
236 PyObject* tag;
237
238 /* text before first child. note that this is a tagged pointer;
239 use JOIN_OBJ to get the object pointer. the join flag is used
240 to distinguish lists created by the tree builder from lists
241 assigned to the attribute by application code; the former
242 should be joined before being returned to the user, the latter
243 should be left intact. */
244 PyObject* text;
245
246 /* text after this element, in parent. note that this is a tagged
247 pointer; use JOIN_OBJ to get the object pointer. */
248 PyObject* tail;
249
250 ElementObjectExtra* extra;
251
252} ElementObject;
253
254staticforward PyTypeObject Element_Type;
255
Christian Heimese93237d2007-12-19 02:37:44 +0000256#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000257
258/* -------------------------------------------------------------------- */
259/* element constructor and destructor */
260
261LOCAL(int)
262element_new_extra(ElementObject* self, PyObject* attrib)
263{
264 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
265 if (!self->extra)
266 return -1;
267
268 if (!attrib)
269 attrib = Py_None;
270
271 Py_INCREF(attrib);
272 self->extra->attrib = attrib;
273
274 self->extra->length = 0;
275 self->extra->allocated = STATIC_CHILDREN;
276 self->extra->children = self->extra->_children;
277
278 return 0;
279}
280
281LOCAL(void)
282element_dealloc_extra(ElementObject* self)
283{
284 int i;
285
286 Py_DECREF(self->extra->attrib);
287
288 for (i = 0; i < self->extra->length; i++)
289 Py_DECREF(self->extra->children[i]);
290
291 if (self->extra->children != self->extra->_children)
292 PyObject_Free(self->extra->children);
293
294 PyObject_Free(self->extra);
295}
296
297LOCAL(PyObject*)
298element_new(PyObject* tag, PyObject* attrib)
299{
300 ElementObject* self;
301
302 self = PyObject_New(ElementObject, &Element_Type);
303 if (self == NULL)
304 return NULL;
305
306 /* use None for empty dictionaries */
307 if (PyDict_CheckExact(attrib) && !PyDict_Size(attrib))
308 attrib = Py_None;
309
310 self->extra = NULL;
311
312 if (attrib != Py_None) {
313
Neal Norwitzc6a989a2006-05-10 06:57:58 +0000314 if (element_new_extra(self, attrib) < 0) {
315 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000316 return NULL;
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000317 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000318
319 self->extra->length = 0;
320 self->extra->allocated = STATIC_CHILDREN;
321 self->extra->children = self->extra->_children;
322
323 }
324
325 Py_INCREF(tag);
326 self->tag = tag;
327
328 Py_INCREF(Py_None);
329 self->text = Py_None;
330
331 Py_INCREF(Py_None);
332 self->tail = Py_None;
333
334 ALLOC(sizeof(ElementObject), "create element");
335
336 return (PyObject*) self;
337}
338
339LOCAL(int)
Serhiy Storchakac4c64be2015-11-25 20:12:58 +0200340element_resize(ElementObject* self, Py_ssize_t extra)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000341{
Serhiy Storchakac4c64be2015-11-25 20:12:58 +0200342 Py_ssize_t size;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000343 PyObject* *children;
344
345 /* make sure self->children can hold the given number of extra
346 elements. set an exception and return -1 if allocation failed */
347
348 if (!self->extra)
349 element_new_extra(self, NULL);
350
351 size = self->extra->length + extra;
352
353 if (size > self->extra->allocated) {
354 /* use Python 2.4's list growth strategy */
355 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes87dcf3d2008-01-18 08:04:57 +0000356 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
357 * which needs at least 4 bytes.
358 * Although it's a false alarm always assume at least one child to
359 * be safe.
360 */
361 size = size ? size : 1;
Serhiy Storchakac4c64be2015-11-25 20:12:58 +0200362 if ((size_t)size > PY_SSIZE_T_MAX/sizeof(PyObject*))
363 goto nomemory;
364 if (size > INT_MAX) {
365 PyErr_SetString(PyExc_OverflowError,
366 "too many children");
367 return -1;
368 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000369 if (self->extra->children != self->extra->_children) {
Christian Heimes87dcf3d2008-01-18 08:04:57 +0000370 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
371 * "children", which needs at least 4 bytes. Although it's a
372 * false alarm always assume at least one child to be safe.
373 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000374 children = PyObject_Realloc(self->extra->children,
375 size * sizeof(PyObject*));
376 if (!children)
377 goto nomemory;
378 } else {
379 children = PyObject_Malloc(size * sizeof(PyObject*));
380 if (!children)
381 goto nomemory;
382 /* copy existing children from static area to malloc buffer */
383 memcpy(children, self->extra->children,
384 self->extra->length * sizeof(PyObject*));
385 }
386 self->extra->children = children;
387 self->extra->allocated = size;
388 }
389
390 return 0;
391
392 nomemory:
393 PyErr_NoMemory();
394 return -1;
395}
396
397LOCAL(int)
398element_add_subelement(ElementObject* self, PyObject* element)
399{
400 /* add a child element to a parent */
401
402 if (element_resize(self, 1) < 0)
403 return -1;
404
405 Py_INCREF(element);
406 self->extra->children[self->extra->length] = element;
407
408 self->extra->length++;
409
410 return 0;
411}
412
413LOCAL(PyObject*)
414element_get_attrib(ElementObject* self)
415{
416 /* return borrowed reference to attrib dictionary */
417 /* note: this function assumes that the extra section exists */
418
419 PyObject* res = self->extra->attrib;
420
421 if (res == Py_None) {
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000422 Py_DECREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000423 /* create missing dictionary */
424 res = PyDict_New();
425 if (!res)
426 return NULL;
427 self->extra->attrib = res;
428 }
429
430 return res;
431}
432
433LOCAL(PyObject*)
434element_get_text(ElementObject* self)
435{
436 /* return borrowed reference to text attribute */
437
438 PyObject* res = self->text;
439
440 if (JOIN_GET(res)) {
441 res = JOIN_OBJ(res);
442 if (PyList_CheckExact(res)) {
443 res = list_join(res);
444 if (!res)
445 return NULL;
446 self->text = res;
447 }
448 }
449
450 return res;
451}
452
453LOCAL(PyObject*)
454element_get_tail(ElementObject* self)
455{
456 /* return borrowed reference to text attribute */
457
458 PyObject* res = self->tail;
459
460 if (JOIN_GET(res)) {
461 res = JOIN_OBJ(res);
462 if (PyList_CheckExact(res)) {
463 res = list_join(res);
464 if (!res)
465 return NULL;
466 self->tail = res;
467 }
468 }
469
470 return res;
471}
472
473static PyObject*
474element(PyObject* self, PyObject* args, PyObject* kw)
475{
476 PyObject* elem;
477
478 PyObject* tag;
479 PyObject* attrib = NULL;
480 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag,
481 &PyDict_Type, &attrib))
482 return NULL;
483
484 if (attrib || kw) {
485 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
486 if (!attrib)
487 return NULL;
488 if (kw)
489 PyDict_Update(attrib, kw);
490 } else {
491 Py_INCREF(Py_None);
492 attrib = Py_None;
493 }
494
495 elem = element_new(tag, attrib);
496
497 Py_DECREF(attrib);
498
499 return elem;
500}
501
502static PyObject*
503subelement(PyObject* self, PyObject* args, PyObject* kw)
504{
505 PyObject* elem;
506
507 ElementObject* parent;
508 PyObject* tag;
509 PyObject* attrib = NULL;
510 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
511 &Element_Type, &parent, &tag,
512 &PyDict_Type, &attrib))
513 return NULL;
514
515 if (attrib || kw) {
516 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
517 if (!attrib)
518 return NULL;
519 if (kw)
520 PyDict_Update(attrib, kw);
521 } else {
522 Py_INCREF(Py_None);
523 attrib = Py_None;
524 }
525
526 elem = element_new(tag, attrib);
527
528 Py_DECREF(attrib);
529
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000530 if (element_add_subelement(parent, elem) < 0) {
531 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000532 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000533 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000534
535 return elem;
536}
537
538static void
539element_dealloc(ElementObject* self)
540{
541 if (self->extra)
542 element_dealloc_extra(self);
543
544 /* discard attributes */
545 Py_DECREF(self->tag);
546 Py_DECREF(JOIN_OBJ(self->text));
547 Py_DECREF(JOIN_OBJ(self->tail));
548
549 RELEASE(sizeof(ElementObject), "destroy element");
550
551 PyObject_Del(self);
552}
553
554/* -------------------------------------------------------------------- */
555/* methods (in alphabetical order) */
556
557static PyObject*
558element_append(ElementObject* self, PyObject* args)
559{
560 PyObject* element;
561 if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element))
562 return NULL;
563
564 if (element_add_subelement(self, element) < 0)
565 return NULL;
566
567 Py_RETURN_NONE;
568}
569
570static PyObject*
571element_clear(ElementObject* self, PyObject* args)
572{
573 if (!PyArg_ParseTuple(args, ":clear"))
574 return NULL;
575
576 if (self->extra) {
577 element_dealloc_extra(self);
578 self->extra = NULL;
579 }
580
581 Py_INCREF(Py_None);
582 Py_DECREF(JOIN_OBJ(self->text));
583 self->text = Py_None;
584
585 Py_INCREF(Py_None);
586 Py_DECREF(JOIN_OBJ(self->tail));
587 self->tail = Py_None;
588
589 Py_RETURN_NONE;
590}
591
592static PyObject*
593element_copy(ElementObject* self, PyObject* args)
594{
595 int i;
596 ElementObject* element;
597
598 if (!PyArg_ParseTuple(args, ":__copy__"))
599 return NULL;
600
601 element = (ElementObject*) element_new(
602 self->tag, (self->extra) ? self->extra->attrib : Py_None
603 );
604 if (!element)
605 return NULL;
606
607 Py_DECREF(JOIN_OBJ(element->text));
608 element->text = self->text;
609 Py_INCREF(JOIN_OBJ(element->text));
610
611 Py_DECREF(JOIN_OBJ(element->tail));
612 element->tail = self->tail;
613 Py_INCREF(JOIN_OBJ(element->tail));
614
615 if (self->extra) {
616
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000617 if (element_resize(element, self->extra->length) < 0) {
618 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000619 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000620 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000621
622 for (i = 0; i < self->extra->length; i++) {
623 Py_INCREF(self->extra->children[i]);
624 element->extra->children[i] = self->extra->children[i];
625 }
626
627 element->extra->length = self->extra->length;
628
629 }
630
631 return (PyObject*) element;
632}
633
634static PyObject*
635element_deepcopy(ElementObject* self, PyObject* args)
636{
637 int i;
638 ElementObject* element;
639 PyObject* tag;
640 PyObject* attrib;
641 PyObject* text;
642 PyObject* tail;
643 PyObject* id;
644
645 PyObject* memo;
646 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
647 return NULL;
648
649 tag = deepcopy(self->tag, memo);
650 if (!tag)
651 return NULL;
652
653 if (self->extra) {
654 attrib = deepcopy(self->extra->attrib, memo);
655 if (!attrib) {
656 Py_DECREF(tag);
657 return NULL;
658 }
659 } else {
660 Py_INCREF(Py_None);
661 attrib = Py_None;
662 }
663
664 element = (ElementObject*) element_new(tag, attrib);
665
666 Py_DECREF(tag);
667 Py_DECREF(attrib);
668
669 if (!element)
670 return NULL;
671
672 text = deepcopy(JOIN_OBJ(self->text), memo);
673 if (!text)
674 goto error;
675 Py_DECREF(element->text);
676 element->text = JOIN_SET(text, JOIN_GET(self->text));
677
678 tail = deepcopy(JOIN_OBJ(self->tail), memo);
679 if (!tail)
680 goto error;
681 Py_DECREF(element->tail);
682 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
683
684 if (self->extra) {
685
686 if (element_resize(element, self->extra->length) < 0)
687 goto error;
688
689 for (i = 0; i < self->extra->length; i++) {
690 PyObject* child = deepcopy(self->extra->children[i], memo);
691 if (!child) {
692 element->extra->length = i;
693 goto error;
694 }
695 element->extra->children[i] = child;
696 }
697
698 element->extra->length = self->extra->length;
699
700 }
701
702 /* add object to memo dictionary (so deepcopy won't visit it again) */
703 id = PyInt_FromLong((Py_uintptr_t) self);
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000704 if (!id)
705 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000706
707 i = PyDict_SetItem(memo, id, (PyObject*) element);
708
709 Py_DECREF(id);
710
711 if (i < 0)
712 goto error;
713
714 return (PyObject*) element;
715
716 error:
717 Py_DECREF(element);
718 return NULL;
719}
720
721LOCAL(int)
722checkpath(PyObject* tag)
723{
Neal Norwitzc7074382006-06-12 02:06:17 +0000724 Py_ssize_t i;
725 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000726
727 /* check if a tag contains an xpath character */
728
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000729#define PATHCHAR(ch) \
730 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000731
732#if defined(Py_USING_UNICODE)
733 if (PyUnicode_Check(tag)) {
734 Py_UNICODE *p = PyUnicode_AS_UNICODE(tag);
735 for (i = 0; i < PyUnicode_GET_SIZE(tag); i++) {
736 if (p[i] == '{')
737 check = 0;
738 else if (p[i] == '}')
739 check = 1;
740 else if (check && PATHCHAR(p[i]))
741 return 1;
742 }
743 return 0;
744 }
745#endif
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000746 if (PyString_Check(tag)) {
747 char *p = PyString_AS_STRING(tag);
748 for (i = 0; i < PyString_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000749 if (p[i] == '{')
750 check = 0;
751 else if (p[i] == '}')
752 check = 1;
753 else if (check && PATHCHAR(p[i]))
754 return 1;
755 }
756 return 0;
757 }
758
759 return 1; /* unknown type; might be path expression */
760}
761
762static PyObject*
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000763element_extend(ElementObject* self, PyObject* args)
764{
765 PyObject* seq;
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300766 Py_ssize_t i;
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000767
768 PyObject* seq_in;
769 if (!PyArg_ParseTuple(args, "O:extend", &seq_in))
770 return NULL;
771
772 seq = PySequence_Fast(seq_in, "");
773 if (!seq) {
774 PyErr_Format(
775 PyExc_TypeError,
776 "expected sequence, not \"%.200s\"", Py_TYPE(seq_in)->tp_name
777 );
778 return NULL;
779 }
780
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300781 for (i = 0; i < PySequence_Fast_GET_SIZE(seq); i++) {
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000782 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
783 if (element_add_subelement(self, element) < 0) {
784 Py_DECREF(seq);
785 return NULL;
786 }
787 }
788
789 Py_DECREF(seq);
790
791 Py_RETURN_NONE;
792}
793
794static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000795element_find(ElementObject* self, PyObject* args)
796{
797 int i;
798
799 PyObject* tag;
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000800 PyObject* namespaces = Py_None;
801 if (!PyArg_ParseTuple(args, "O|O:find", &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000802 return NULL;
803
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000804 if (checkpath(tag) || namespaces != Py_None)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000805 return PyObject_CallMethod(
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000806 elementpath_obj, "find", "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000807 );
808
809 if (!self->extra)
810 Py_RETURN_NONE;
811
812 for (i = 0; i < self->extra->length; i++) {
813 PyObject* item = self->extra->children[i];
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300814 int rc;
815 if (!Element_CheckExact(item))
816 continue;
817 Py_INCREF(item);
818 rc = PyObject_Compare(((ElementObject*)item)->tag, tag);
819 if (rc == 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000820 return item;
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300821 Py_DECREF(item);
822 if (rc < 0 && PyErr_Occurred())
823 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000824 }
825
826 Py_RETURN_NONE;
827}
828
829static PyObject*
830element_findtext(ElementObject* self, PyObject* args)
831{
832 int i;
833
834 PyObject* tag;
835 PyObject* default_value = Py_None;
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000836 PyObject* namespaces = Py_None;
837 if (!PyArg_ParseTuple(args, "O|OO:findtext", &tag, &default_value, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000838 return NULL;
839
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000840 if (checkpath(tag) || namespaces != Py_None)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000841 return PyObject_CallMethod(
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000842 elementpath_obj, "findtext", "OOOO", self, tag, default_value, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000843 );
844
845 if (!self->extra) {
846 Py_INCREF(default_value);
847 return default_value;
848 }
849
850 for (i = 0; i < self->extra->length; i++) {
851 ElementObject* item = (ElementObject*) self->extra->children[i];
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300852 int rc;
853 if (!Element_CheckExact(item))
854 continue;
855 Py_INCREF(item);
856 rc = PyObject_Compare(item->tag, tag);
857 if (rc == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000858 PyObject* text = element_get_text(item);
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300859 if (text == Py_None) {
860 Py_DECREF(item);
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000861 return PyString_FromString("");
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300862 }
Neal Norwitz6f5ff3f2006-08-12 01:43:40 +0000863 Py_XINCREF(text);
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300864 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000865 return text;
866 }
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300867 Py_DECREF(item);
868 if (rc < 0 && PyErr_Occurred())
869 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000870 }
871
872 Py_INCREF(default_value);
873 return default_value;
874}
875
876static PyObject*
877element_findall(ElementObject* self, PyObject* args)
878{
879 int i;
880 PyObject* out;
881
882 PyObject* tag;
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000883 PyObject* namespaces = Py_None;
884 if (!PyArg_ParseTuple(args, "O|O:findall", &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000885 return NULL;
886
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000887 if (checkpath(tag) || namespaces != Py_None)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000888 return PyObject_CallMethod(
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000889 elementpath_obj, "findall", "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000890 );
891
892 out = PyList_New(0);
893 if (!out)
894 return NULL;
895
896 if (!self->extra)
897 return out;
898
899 for (i = 0; i < self->extra->length; i++) {
900 PyObject* item = self->extra->children[i];
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300901 int rc;
902 if (!Element_CheckExact(item))
903 continue;
904 Py_INCREF(item);
905 rc = PyObject_Compare(((ElementObject*)item)->tag, tag);
906 if (rc == 0)
907 rc = PyList_Append(out, item);
908 Py_DECREF(item);
909 if (rc < 0 && PyErr_Occurred()) {
910 Py_DECREF(out);
911 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000912 }
913 }
914
915 return out;
916}
917
918static PyObject*
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000919element_iterfind(ElementObject* self, PyObject* args)
920{
921 PyObject* tag;
922 PyObject* namespaces = Py_None;
923 if (!PyArg_ParseTuple(args, "O|O:iterfind", &tag, &namespaces))
924 return NULL;
925
926 return PyObject_CallMethod(
927 elementpath_obj, "iterfind", "OOO", self, tag, namespaces
928 );
929}
930
931static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000932element_get(ElementObject* self, PyObject* args)
933{
934 PyObject* value;
935
936 PyObject* key;
937 PyObject* default_value = Py_None;
938 if (!PyArg_ParseTuple(args, "O|O:get", &key, &default_value))
939 return NULL;
940
941 if (!self->extra || self->extra->attrib == Py_None)
942 value = default_value;
943 else {
944 value = PyDict_GetItem(self->extra->attrib, key);
945 if (!value)
946 value = default_value;
947 }
948
949 Py_INCREF(value);
950 return value;
951}
952
953static PyObject*
954element_getchildren(ElementObject* self, PyObject* args)
955{
956 int i;
957 PyObject* list;
958
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000959 /* FIXME: report as deprecated? */
960
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000961 if (!PyArg_ParseTuple(args, ":getchildren"))
962 return NULL;
963
964 if (!self->extra)
965 return PyList_New(0);
966
967 list = PyList_New(self->extra->length);
968 if (!list)
969 return NULL;
970
971 for (i = 0; i < self->extra->length; i++) {
972 PyObject* item = self->extra->children[i];
973 Py_INCREF(item);
974 PyList_SET_ITEM(list, i, item);
975 }
976
977 return list;
978}
979
980static PyObject*
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000981element_iter(ElementObject* self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000982{
983 PyObject* result;
984
985 PyObject* tag = Py_None;
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000986 if (!PyArg_ParseTuple(args, "|O:iter", &tag))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000987 return NULL;
988
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000989 if (!elementtree_iter_obj) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000990 PyErr_SetString(
991 PyExc_RuntimeError,
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000992 "iter helper not found"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000993 );
994 return NULL;
995 }
996
997 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000998 if (!args)
999 return NULL;
Neal Norwitz02876df2006-02-07 06:58:52 +00001000
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001001 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
1002 Py_INCREF(tag); PyTuple_SET_ITEM(args, 1, (PyObject*) tag);
1003
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001004 result = PyObject_CallObject(elementtree_iter_obj, args);
1005
1006 Py_DECREF(args);
1007
1008 return result;
1009}
1010
1011
1012static PyObject*
1013element_itertext(ElementObject* self, PyObject* args)
1014{
1015 PyObject* result;
1016
1017 if (!PyArg_ParseTuple(args, ":itertext"))
1018 return NULL;
1019
1020 if (!elementtree_itertext_obj) {
1021 PyErr_SetString(
1022 PyExc_RuntimeError,
1023 "itertext helper not found"
1024 );
1025 return NULL;
1026 }
1027
1028 args = PyTuple_New(1);
1029 if (!args)
1030 return NULL;
1031
1032 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
1033
1034 result = PyObject_CallObject(elementtree_itertext_obj, args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001035
1036 Py_DECREF(args);
1037
1038 return result;
1039}
1040
1041static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001042element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001043{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001044 ElementObject* self = (ElementObject*) self_;
1045
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001046 if (!self->extra || index < 0 || index >= self->extra->length) {
1047 PyErr_SetString(
1048 PyExc_IndexError,
1049 "child index out of range"
1050 );
1051 return NULL;
1052 }
1053
1054 Py_INCREF(self->extra->children[index]);
1055 return self->extra->children[index];
1056}
1057
1058static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001059element_insert(ElementObject* self, PyObject* args)
1060{
1061 int i;
1062
1063 int index;
1064 PyObject* element;
1065 if (!PyArg_ParseTuple(args, "iO!:insert", &index,
1066 &Element_Type, &element))
1067 return NULL;
1068
1069 if (!self->extra)
1070 element_new_extra(self, NULL);
1071
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001072 if (index < 0) {
1073 index += self->extra->length;
1074 if (index < 0)
1075 index = 0;
1076 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001077 if (index > self->extra->length)
1078 index = self->extra->length;
1079
1080 if (element_resize(self, 1) < 0)
1081 return NULL;
1082
1083 for (i = self->extra->length; i > index; i--)
1084 self->extra->children[i] = self->extra->children[i-1];
1085
1086 Py_INCREF(element);
1087 self->extra->children[index] = element;
1088
1089 self->extra->length++;
1090
1091 Py_RETURN_NONE;
1092}
1093
1094static PyObject*
1095element_items(ElementObject* self, PyObject* args)
1096{
1097 if (!PyArg_ParseTuple(args, ":items"))
1098 return NULL;
1099
1100 if (!self->extra || self->extra->attrib == Py_None)
1101 return PyList_New(0);
1102
1103 return PyDict_Items(self->extra->attrib);
1104}
1105
1106static PyObject*
1107element_keys(ElementObject* self, PyObject* args)
1108{
1109 if (!PyArg_ParseTuple(args, ":keys"))
1110 return NULL;
1111
1112 if (!self->extra || self->extra->attrib == Py_None)
1113 return PyList_New(0);
1114
1115 return PyDict_Keys(self->extra->attrib);
1116}
1117
Martin v. Löwis18e16552006-02-15 17:27:45 +00001118static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001119element_length(ElementObject* self)
1120{
1121 if (!self->extra)
1122 return 0;
1123
1124 return self->extra->length;
1125}
1126
1127static PyObject*
1128element_makeelement(PyObject* self, PyObject* args, PyObject* kw)
1129{
1130 PyObject* elem;
1131
1132 PyObject* tag;
1133 PyObject* attrib;
1134 if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib))
1135 return NULL;
1136
1137 attrib = PyDict_Copy(attrib);
1138 if (!attrib)
1139 return NULL;
1140
1141 elem = element_new(tag, attrib);
1142
1143 Py_DECREF(attrib);
1144
1145 return elem;
1146}
1147
1148static PyObject*
1149element_reduce(ElementObject* self, PyObject* args)
1150{
1151 if (!PyArg_ParseTuple(args, ":__reduce__"))
1152 return NULL;
1153
1154 /* Hack alert: This method is used to work around a __copy__
1155 problem on certain 2.3 and 2.4 versions. To save time and
1156 simplify the code, we create the copy in here, and use a dummy
1157 copyelement helper to trick the copy module into doing the
1158 right thing. */
1159
1160 if (!elementtree_copyelement_obj) {
1161 PyErr_SetString(
1162 PyExc_RuntimeError,
1163 "copyelement helper not found"
1164 );
1165 return NULL;
1166 }
1167
1168 return Py_BuildValue(
1169 "O(N)", elementtree_copyelement_obj, element_copy(self, args)
1170 );
1171}
1172
1173static PyObject*
1174element_remove(ElementObject* self, PyObject* args)
1175{
1176 int i;
Serhiy Storchaka25598f32015-05-18 18:28:57 +03001177 int rc;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001178 PyObject* element;
Serhiy Storchaka25598f32015-05-18 18:28:57 +03001179 PyObject* found;
1180
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001181 if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element))
1182 return NULL;
1183
1184 if (!self->extra) {
1185 /* element has no children, so raise exception */
1186 PyErr_SetString(
1187 PyExc_ValueError,
1188 "list.remove(x): x not in list"
1189 );
1190 return NULL;
1191 }
1192
1193 for (i = 0; i < self->extra->length; i++) {
1194 if (self->extra->children[i] == element)
1195 break;
Serhiy Storchaka25598f32015-05-18 18:28:57 +03001196 rc = PyObject_Compare(self->extra->children[i], element);
1197 if (rc == 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001198 break;
Serhiy Storchaka25598f32015-05-18 18:28:57 +03001199 if (rc < 0 && PyErr_Occurred())
1200 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001201 }
1202
Serhiy Storchaka25598f32015-05-18 18:28:57 +03001203 if (i >= self->extra->length) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001204 /* element is not in children, so raise exception */
1205 PyErr_SetString(
1206 PyExc_ValueError,
1207 "list.remove(x): x not in list"
1208 );
1209 return NULL;
1210 }
1211
Serhiy Storchaka25598f32015-05-18 18:28:57 +03001212 found = self->extra->children[i];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001213
1214 self->extra->length--;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001215 for (; i < self->extra->length; i++)
1216 self->extra->children[i] = self->extra->children[i+1];
1217
Serhiy Storchaka25598f32015-05-18 18:28:57 +03001218 Py_DECREF(found);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001219 Py_RETURN_NONE;
1220}
1221
1222static PyObject*
1223element_repr(ElementObject* self)
1224{
Serhiy Storchaka1f7586e2016-06-12 10:06:32 +03001225 int status;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001226
Serhiy Storchaka1f7586e2016-06-12 10:06:32 +03001227 if (self->tag == NULL)
1228 return PyUnicode_FromFormat("<Element at %p>", self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001229
Serhiy Storchaka1f7586e2016-06-12 10:06:32 +03001230 status = Py_ReprEnter((PyObject *)self);
1231 if (status == 0) {
1232 PyObject *repr, *tag;
1233 tag = PyObject_Repr(self->tag);
1234 if (!tag)
1235 return NULL;
Florent Xiclunae2e81e82010-03-11 15:55:11 +00001236
Serhiy Storchaka1f7586e2016-06-12 10:06:32 +03001237 repr = PyString_FromFormat("<Element %s at %p>",
1238 PyString_AS_STRING(tag), self);
1239 Py_DECREF(tag);
1240 return repr;
1241 }
1242 if (status > 0)
1243 PyErr_Format(PyExc_RuntimeError,
1244 "reentrant call inside %s.__repr__",
1245 Py_TYPE(self)->tp_name);
1246 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001247}
1248
1249static PyObject*
1250element_set(ElementObject* self, PyObject* args)
1251{
1252 PyObject* attrib;
1253
1254 PyObject* key;
1255 PyObject* value;
1256 if (!PyArg_ParseTuple(args, "OO:set", &key, &value))
1257 return NULL;
1258
1259 if (!self->extra)
1260 element_new_extra(self, NULL);
1261
1262 attrib = element_get_attrib(self);
1263 if (!attrib)
1264 return NULL;
1265
1266 if (PyDict_SetItem(attrib, key, value) < 0)
1267 return NULL;
1268
1269 Py_RETURN_NONE;
1270}
1271
1272static int
Serhiy Storchakab5b76c32015-11-26 11:21:47 +02001273element_setitem(PyObject* self_, Py_ssize_t index_, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001274{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001275 ElementObject* self = (ElementObject*) self_;
Serhiy Storchakac4c64be2015-11-25 20:12:58 +02001276 int i, index;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001277 PyObject* old;
1278
Serhiy Storchakac4c64be2015-11-25 20:12:58 +02001279 if (!self->extra || index_ < 0 || index_ >= self->extra->length) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001280 PyErr_SetString(
1281 PyExc_IndexError,
1282 "child assignment index out of range");
1283 return -1;
1284 }
Serhiy Storchakac4c64be2015-11-25 20:12:58 +02001285 index = (int)index_;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001286
1287 old = self->extra->children[index];
1288
1289 if (item) {
1290 Py_INCREF(item);
1291 self->extra->children[index] = item;
1292 } else {
1293 self->extra->length--;
1294 for (i = index; i < self->extra->length; i++)
1295 self->extra->children[i] = self->extra->children[i+1];
1296 }
1297
1298 Py_DECREF(old);
1299
1300 return 0;
1301}
1302
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001303static PyObject*
1304element_subscr(PyObject* self_, PyObject* item)
1305{
1306 ElementObject* self = (ElementObject*) self_;
1307
1308#if (PY_VERSION_HEX < 0x02050000)
1309 if (PyInt_Check(item) || PyLong_Check(item)) {
1310 long i = PyInt_AsLong(item);
1311#else
1312 if (PyIndex_Check(item)) {
1313 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1314#endif
1315
1316 if (i == -1 && PyErr_Occurred()) {
1317 return NULL;
1318 }
1319 if (i < 0 && self->extra)
1320 i += self->extra->length;
1321 return element_getitem(self_, i);
1322 }
1323 else if (PySlice_Check(item)) {
1324 Py_ssize_t start, stop, step, slicelen, cur, i;
1325 PyObject* list;
1326
1327 if (!self->extra)
1328 return PyList_New(0);
1329
1330 if (PySlice_GetIndicesEx((PySliceObject *)item,
1331 self->extra->length,
1332 &start, &stop, &step, &slicelen) < 0) {
1333 return NULL;
1334 }
1335
1336 if (slicelen <= 0)
1337 return PyList_New(0);
1338 else {
1339 list = PyList_New(slicelen);
1340 if (!list)
1341 return NULL;
1342
1343 for (cur = start, i = 0; i < slicelen;
1344 cur += step, i++) {
1345 PyObject* item = self->extra->children[cur];
1346 Py_INCREF(item);
1347 PyList_SET_ITEM(list, i, item);
1348 }
1349
1350 return list;
1351 }
1352 }
1353 else {
1354 PyErr_SetString(PyExc_TypeError,
1355 "element indices must be integers");
1356 return NULL;
1357 }
1358}
1359
1360static int
1361element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1362{
1363 ElementObject* self = (ElementObject*) self_;
1364
1365#if (PY_VERSION_HEX < 0x02050000)
1366 if (PyInt_Check(item) || PyLong_Check(item)) {
1367 long i = PyInt_AsLong(item);
1368#else
1369 if (PyIndex_Check(item)) {
1370 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1371#endif
1372
1373 if (i == -1 && PyErr_Occurred()) {
1374 return -1;
1375 }
1376 if (i < 0 && self->extra)
1377 i += self->extra->length;
1378 return element_setitem(self_, i, value);
1379 }
1380 else if (PySlice_Check(item)) {
1381 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1382
1383 PyObject* recycle = NULL;
1384 PyObject* seq = NULL;
1385
1386 if (!self->extra)
1387 element_new_extra(self, NULL);
1388
1389 if (PySlice_GetIndicesEx((PySliceObject *)item,
1390 self->extra->length,
1391 &start, &stop, &step, &slicelen) < 0) {
1392 return -1;
1393 }
Serhiy Storchakac4c64be2015-11-25 20:12:58 +02001394 assert(slicelen <= self->extra->length);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001395
1396 if (value == NULL)
1397 newlen = 0;
1398 else {
1399 seq = PySequence_Fast(value, "");
1400 if (!seq) {
1401 PyErr_Format(
1402 PyExc_TypeError,
1403 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1404 );
1405 return -1;
1406 }
1407 newlen = PySequence_Size(seq);
1408 }
1409
1410 if (step != 1 && newlen != slicelen)
1411 {
Serhiy Storchakaa0ae9ff2015-11-22 12:31:11 +02001412 Py_XDECREF(seq);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001413 PyErr_Format(PyExc_ValueError,
1414#if (PY_VERSION_HEX < 0x02050000)
1415 "attempt to assign sequence of size %d "
1416 "to extended slice of size %d",
Serhiy Storchakaa0ae9ff2015-11-22 12:31:11 +02001417 (int)newlen, (int)slicelen
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001418#else
1419 "attempt to assign sequence of size %zd "
1420 "to extended slice of size %zd",
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001421 newlen, slicelen
Serhiy Storchakaa0ae9ff2015-11-22 12:31:11 +02001422#endif
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001423 );
1424 return -1;
1425 }
1426
1427
1428 /* Resize before creating the recycle bin, to prevent refleaks. */
1429 if (newlen > slicelen) {
1430 if (element_resize(self, newlen - slicelen) < 0) {
Serhiy Storchakaa0ae9ff2015-11-22 12:31:11 +02001431 Py_XDECREF(seq);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001432 return -1;
1433 }
1434 }
Serhiy Storchakac4c64be2015-11-25 20:12:58 +02001435 assert(newlen - slicelen <= INT_MAX - self->extra->length);
1436 assert(newlen - slicelen >= -self->extra->length);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001437
1438 if (slicelen > 0) {
1439 /* to avoid recursive calls to this method (via decref), move
1440 old items to the recycle bin here, and get rid of them when
1441 we're done modifying the element */
1442 recycle = PyList_New(slicelen);
1443 if (!recycle) {
Serhiy Storchakaa0ae9ff2015-11-22 12:31:11 +02001444 Py_XDECREF(seq);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001445 return -1;
1446 }
1447 for (cur = start, i = 0; i < slicelen;
1448 cur += step, i++)
1449 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1450 }
1451
1452 if (newlen < slicelen) {
1453 /* delete slice */
1454 for (i = stop; i < self->extra->length; i++)
1455 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1456 } else if (newlen > slicelen) {
1457 /* insert slice */
1458 for (i = self->extra->length-1; i >= stop; i--)
1459 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1460 }
1461
1462 /* replace the slice */
1463 for (cur = start, i = 0; i < newlen;
1464 cur += step, i++) {
1465 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1466 Py_INCREF(element);
1467 self->extra->children[cur] = element;
1468 }
1469
Serhiy Storchakac4c64be2015-11-25 20:12:58 +02001470 self->extra->length += (int)(newlen - slicelen);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001471
Serhiy Storchakaa0ae9ff2015-11-22 12:31:11 +02001472 Py_XDECREF(seq);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001473
1474 /* discard the recycle bin, and everything in it */
1475 Py_XDECREF(recycle);
1476
1477 return 0;
1478 }
1479 else {
1480 PyErr_SetString(PyExc_TypeError,
1481 "element indices must be integers");
1482 return -1;
1483 }
1484}
1485
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001486static PyMethodDef element_methods[] = {
1487
1488 {"clear", (PyCFunction) element_clear, METH_VARARGS},
1489
1490 {"get", (PyCFunction) element_get, METH_VARARGS},
1491 {"set", (PyCFunction) element_set, METH_VARARGS},
1492
1493 {"find", (PyCFunction) element_find, METH_VARARGS},
1494 {"findtext", (PyCFunction) element_findtext, METH_VARARGS},
1495 {"findall", (PyCFunction) element_findall, METH_VARARGS},
1496
1497 {"append", (PyCFunction) element_append, METH_VARARGS},
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001498 {"extend", (PyCFunction) element_extend, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001499 {"insert", (PyCFunction) element_insert, METH_VARARGS},
1500 {"remove", (PyCFunction) element_remove, METH_VARARGS},
1501
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001502 {"iter", (PyCFunction) element_iter, METH_VARARGS},
1503 {"itertext", (PyCFunction) element_itertext, METH_VARARGS},
1504 {"iterfind", (PyCFunction) element_iterfind, METH_VARARGS},
1505
1506 {"getiterator", (PyCFunction) element_iter, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001507 {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS},
1508
1509 {"items", (PyCFunction) element_items, METH_VARARGS},
1510 {"keys", (PyCFunction) element_keys, METH_VARARGS},
1511
1512 {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS},
1513
1514 {"__copy__", (PyCFunction) element_copy, METH_VARARGS},
1515 {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS},
1516
1517 /* Some 2.3 and 2.4 versions do not handle the __copy__ method on
1518 C objects correctly, so we have to fake it using a __reduce__-
1519 based hack (see the element_reduce implementation above for
1520 details). */
1521
1522 /* The behaviour has been changed in 2.3.5 and 2.4.1, so we're
1523 using a runtime test to figure out if we need to fake things
1524 or now (see the init code below). The following entry is
1525 enabled only if the hack is needed. */
1526
1527 {"!__reduce__", (PyCFunction) element_reduce, METH_VARARGS},
1528
1529 {NULL, NULL}
1530};
1531
1532static PyObject*
1533element_getattr(ElementObject* self, char* name)
1534{
1535 PyObject* res;
1536
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001537 /* handle common attributes first */
1538 if (strcmp(name, "tag") == 0) {
1539 res = self->tag;
1540 Py_INCREF(res);
1541 return res;
1542 } else if (strcmp(name, "text") == 0) {
1543 res = element_get_text(self);
1544 Py_INCREF(res);
1545 return res;
1546 }
1547
1548 /* methods */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001549 res = Py_FindMethod(element_methods, (PyObject*) self, name);
1550 if (res)
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001551 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001552
1553 PyErr_Clear();
1554
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001555 /* less common attributes */
1556 if (strcmp(name, "tail") == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001557 res = element_get_tail(self);
1558 } else if (strcmp(name, "attrib") == 0) {
1559 if (!self->extra)
1560 element_new_extra(self, NULL);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001561 res = element_get_attrib(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001562 } else {
1563 PyErr_SetString(PyExc_AttributeError, name);
1564 return NULL;
1565 }
1566
1567 if (!res)
1568 return NULL;
1569
1570 Py_INCREF(res);
1571 return res;
1572}
1573
1574static int
1575element_setattr(ElementObject* self, const char* name, PyObject* value)
1576{
1577 if (value == NULL) {
1578 PyErr_SetString(
1579 PyExc_AttributeError,
1580 "can't delete element attributes"
1581 );
1582 return -1;
1583 }
1584
1585 if (strcmp(name, "tag") == 0) {
Serhiy Storchaka2e6c8292015-12-27 15:41:58 +02001586 Py_INCREF(value);
Serhiy Storchaka763a61c2016-04-10 18:05:12 +03001587 Py_SETREF(self->tag, value);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001588 } else if (strcmp(name, "text") == 0) {
1589 Py_DECREF(JOIN_OBJ(self->text));
1590 self->text = value;
1591 Py_INCREF(self->text);
1592 } else if (strcmp(name, "tail") == 0) {
1593 Py_DECREF(JOIN_OBJ(self->tail));
1594 self->tail = value;
1595 Py_INCREF(self->tail);
1596 } else if (strcmp(name, "attrib") == 0) {
1597 if (!self->extra)
1598 element_new_extra(self, NULL);
Serhiy Storchaka2e6c8292015-12-27 15:41:58 +02001599 Py_INCREF(value);
Serhiy Storchaka763a61c2016-04-10 18:05:12 +03001600 Py_SETREF(self->extra->attrib, value);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001601 } else {
1602 PyErr_SetString(PyExc_AttributeError, name);
1603 return -1;
1604 }
1605
1606 return 0;
1607}
1608
1609static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001610 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001611 0, /* sq_concat */
1612 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001613 element_getitem,
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001614 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001615 element_setitem,
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001616 0,
1617};
1618
1619static PyMappingMethods element_as_mapping = {
1620 (lenfunc) element_length,
1621 (binaryfunc) element_subscr,
1622 (objobjargproc) element_ass_subscr,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001623};
1624
1625statichere PyTypeObject Element_Type = {
1626 PyObject_HEAD_INIT(NULL)
1627 0, "Element", sizeof(ElementObject), 0,
1628 /* methods */
1629 (destructor)element_dealloc, /* tp_dealloc */
1630 0, /* tp_print */
1631 (getattrfunc)element_getattr, /* tp_getattr */
1632 (setattrfunc)element_setattr, /* tp_setattr */
1633 0, /* tp_compare */
1634 (reprfunc)element_repr, /* tp_repr */
1635 0, /* tp_as_number */
1636 &element_as_sequence, /* tp_as_sequence */
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001637 &element_as_mapping, /* tp_as_mapping */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001638};
1639
1640/* ==================================================================== */
1641/* the tree builder type */
1642
1643typedef struct {
1644 PyObject_HEAD
1645
1646 PyObject* root; /* root node (first created node) */
1647
1648 ElementObject* this; /* current node */
1649 ElementObject* last; /* most recently created node */
1650
1651 PyObject* data; /* data collector (string or list), or NULL */
1652
1653 PyObject* stack; /* element stack */
Neal Norwitzc7074382006-06-12 02:06:17 +00001654 Py_ssize_t index; /* current stack size (0=empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001655
1656 /* element tracing */
1657 PyObject* events; /* list of events, or NULL if not collecting */
1658 PyObject* start_event_obj; /* event objects (NULL to ignore) */
1659 PyObject* end_event_obj;
1660 PyObject* start_ns_event_obj;
1661 PyObject* end_ns_event_obj;
1662
1663} TreeBuilderObject;
1664
1665staticforward PyTypeObject TreeBuilder_Type;
1666
Christian Heimese93237d2007-12-19 02:37:44 +00001667#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001668
1669/* -------------------------------------------------------------------- */
1670/* constructor and destructor */
1671
1672LOCAL(PyObject*)
1673treebuilder_new(void)
1674{
1675 TreeBuilderObject* self;
1676
1677 self = PyObject_New(TreeBuilderObject, &TreeBuilder_Type);
1678 if (self == NULL)
1679 return NULL;
1680
1681 self->root = NULL;
1682
1683 Py_INCREF(Py_None);
1684 self->this = (ElementObject*) Py_None;
1685
1686 Py_INCREF(Py_None);
1687 self->last = (ElementObject*) Py_None;
1688
1689 self->data = NULL;
1690
1691 self->stack = PyList_New(20);
1692 self->index = 0;
1693
1694 self->events = NULL;
1695 self->start_event_obj = self->end_event_obj = NULL;
1696 self->start_ns_event_obj = self->end_ns_event_obj = NULL;
1697
1698 ALLOC(sizeof(TreeBuilderObject), "create treebuilder");
1699
1700 return (PyObject*) self;
1701}
1702
1703static PyObject*
Fredrik Lundh81707f12006-06-03 21:56:05 +00001704treebuilder(PyObject* self_, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001705{
1706 if (!PyArg_ParseTuple(args, ":TreeBuilder"))
1707 return NULL;
1708
1709 return treebuilder_new();
1710}
1711
1712static void
1713treebuilder_dealloc(TreeBuilderObject* self)
1714{
1715 Py_XDECREF(self->end_ns_event_obj);
1716 Py_XDECREF(self->start_ns_event_obj);
1717 Py_XDECREF(self->end_event_obj);
1718 Py_XDECREF(self->start_event_obj);
1719 Py_XDECREF(self->events);
1720 Py_DECREF(self->stack);
1721 Py_XDECREF(self->data);
1722 Py_DECREF(self->last);
1723 Py_DECREF(self->this);
1724 Py_XDECREF(self->root);
1725
1726 RELEASE(sizeof(TreeBuilderObject), "destroy treebuilder");
1727
1728 PyObject_Del(self);
1729}
1730
Serhiy Storchaka45cf0b72015-12-06 23:51:53 +02001731LOCAL(int)
1732treebuilder_append_event(TreeBuilderObject *self, PyObject *action,
1733 PyObject *node)
1734{
1735 if (action != NULL) {
1736 PyObject *res = PyTuple_Pack(2, action, node);
1737 if (res == NULL)
1738 return -1;
1739 if (PyList_Append(self->events, res) < 0) {
1740 Py_DECREF(res);
1741 return -1;
1742 }
1743 Py_DECREF(res);
1744 }
1745 return 0;
1746}
1747
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001748/* -------------------------------------------------------------------- */
1749/* handlers */
1750
1751LOCAL(PyObject*)
1752treebuilder_handle_xml(TreeBuilderObject* self, PyObject* encoding,
1753 PyObject* standalone)
1754{
1755 Py_RETURN_NONE;
1756}
1757
1758LOCAL(PyObject*)
1759treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
1760 PyObject* attrib)
1761{
1762 PyObject* node;
1763 PyObject* this;
1764
1765 if (self->data) {
1766 if (self->this == self->last) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001767 Py_DECREF(JOIN_OBJ(self->last->text));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001768 self->last->text = JOIN_SET(
1769 self->data, PyList_CheckExact(self->data)
1770 );
1771 } else {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001772 Py_DECREF(JOIN_OBJ(self->last->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001773 self->last->tail = JOIN_SET(
1774 self->data, PyList_CheckExact(self->data)
1775 );
1776 }
1777 self->data = NULL;
1778 }
1779
1780 node = element_new(tag, attrib);
1781 if (!node)
1782 return NULL;
1783
1784 this = (PyObject*) self->this;
1785
1786 if (this != Py_None) {
1787 if (element_add_subelement((ElementObject*) this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001788 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001789 } else {
1790 if (self->root) {
1791 PyErr_SetString(
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001792 elementtree_parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001793 "multiple elements on top level"
1794 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001795 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001796 }
1797 Py_INCREF(node);
1798 self->root = node;
1799 }
1800
1801 if (self->index < PyList_GET_SIZE(self->stack)) {
1802 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001803 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001804 Py_INCREF(this);
1805 } else {
1806 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001807 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001808 }
1809 self->index++;
1810
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001811 Py_INCREF(node);
Serhiy Storchaka763a61c2016-04-10 18:05:12 +03001812 Py_SETREF(self->this, (ElementObject*) node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001813
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001814 Py_INCREF(node);
Serhiy Storchaka763a61c2016-04-10 18:05:12 +03001815 Py_SETREF(self->last, (ElementObject*) node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001816
Serhiy Storchaka45cf0b72015-12-06 23:51:53 +02001817 if (treebuilder_append_event(self, self->start_event_obj, node) < 0)
1818 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001819
1820 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001821
1822 error:
1823 Py_DECREF(node);
1824 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001825}
1826
1827LOCAL(PyObject*)
1828treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
1829{
1830 if (!self->data) {
Fredrik Lundhdc075b92006-08-16 16:47:07 +00001831 if (self->last == (ElementObject*) Py_None) {
1832 /* ignore calls to data before the first call to start */
1833 Py_RETURN_NONE;
1834 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001835 /* store the first item as is */
1836 Py_INCREF(data); self->data = data;
1837 } else {
1838 /* more than one item; use a list to collect items */
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001839 if (PyString_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
1840 PyString_CheckExact(data) && PyString_GET_SIZE(data) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001841 /* expat often generates single character data sections; handle
1842 the most common case by resizing the existing string... */
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001843 Py_ssize_t size = PyString_GET_SIZE(self->data);
1844 if (_PyString_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001845 return NULL;
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001846 PyString_AS_STRING(self->data)[size] = PyString_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001847 } else if (PyList_CheckExact(self->data)) {
1848 if (PyList_Append(self->data, data) < 0)
1849 return NULL;
1850 } else {
1851 PyObject* list = PyList_New(2);
1852 if (!list)
1853 return NULL;
1854 PyList_SET_ITEM(list, 0, self->data);
1855 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
1856 self->data = list;
1857 }
1858 }
1859
1860 Py_RETURN_NONE;
1861}
1862
1863LOCAL(PyObject*)
1864treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
1865{
Serhiy Storchaka2e6c8292015-12-27 15:41:58 +02001866 ElementObject *item;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001867
1868 if (self->data) {
1869 if (self->this == self->last) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001870 Py_DECREF(JOIN_OBJ(self->last->text));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001871 self->last->text = JOIN_SET(
1872 self->data, PyList_CheckExact(self->data)
1873 );
1874 } else {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001875 Py_DECREF(JOIN_OBJ(self->last->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001876 self->last->tail = JOIN_SET(
1877 self->data, PyList_CheckExact(self->data)
1878 );
1879 }
1880 self->data = NULL;
1881 }
1882
1883 if (self->index == 0) {
1884 PyErr_SetString(
1885 PyExc_IndexError,
1886 "pop from empty stack"
1887 );
1888 return NULL;
1889 }
1890
Serhiy Storchaka2e6c8292015-12-27 15:41:58 +02001891 item = self->last;
1892 self->last = self->this;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001893 self->index--;
Serhiy Storchaka2e6c8292015-12-27 15:41:58 +02001894 self->this = (ElementObject *) PyList_GET_ITEM(self->stack, self->index);
1895 Py_INCREF(self->this);
1896 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001897
Serhiy Storchaka45cf0b72015-12-06 23:51:53 +02001898 if (treebuilder_append_event(self, self->end_event_obj, (PyObject*)self->last) < 0)
1899 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001900
1901 Py_INCREF(self->last);
1902 return (PyObject*) self->last;
1903}
1904
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001905/* -------------------------------------------------------------------- */
1906/* methods (in alphabetical order) */
1907
1908static PyObject*
1909treebuilder_data(TreeBuilderObject* self, PyObject* args)
1910{
1911 PyObject* data;
1912 if (!PyArg_ParseTuple(args, "O:data", &data))
1913 return NULL;
1914
1915 return treebuilder_handle_data(self, data);
1916}
1917
1918static PyObject*
1919treebuilder_end(TreeBuilderObject* self, PyObject* args)
1920{
1921 PyObject* tag;
1922 if (!PyArg_ParseTuple(args, "O:end", &tag))
1923 return NULL;
1924
1925 return treebuilder_handle_end(self, tag);
1926}
1927
1928LOCAL(PyObject*)
1929treebuilder_done(TreeBuilderObject* self)
1930{
1931 PyObject* res;
1932
1933 /* FIXME: check stack size? */
1934
1935 if (self->root)
1936 res = self->root;
1937 else
1938 res = Py_None;
1939
1940 Py_INCREF(res);
1941 return res;
1942}
1943
1944static PyObject*
1945treebuilder_close(TreeBuilderObject* self, PyObject* args)
1946{
1947 if (!PyArg_ParseTuple(args, ":close"))
1948 return NULL;
1949
1950 return treebuilder_done(self);
1951}
1952
1953static PyObject*
1954treebuilder_start(TreeBuilderObject* self, PyObject* args)
1955{
1956 PyObject* tag;
1957 PyObject* attrib = Py_None;
1958 if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib))
1959 return NULL;
1960
1961 return treebuilder_handle_start(self, tag, attrib);
1962}
1963
1964static PyObject*
1965treebuilder_xml(TreeBuilderObject* self, PyObject* args)
1966{
1967 PyObject* encoding;
1968 PyObject* standalone;
1969 if (!PyArg_ParseTuple(args, "OO:xml", &encoding, &standalone))
1970 return NULL;
1971
1972 return treebuilder_handle_xml(self, encoding, standalone);
1973}
1974
1975static PyMethodDef treebuilder_methods[] = {
1976 {"data", (PyCFunction) treebuilder_data, METH_VARARGS},
1977 {"start", (PyCFunction) treebuilder_start, METH_VARARGS},
1978 {"end", (PyCFunction) treebuilder_end, METH_VARARGS},
1979 {"xml", (PyCFunction) treebuilder_xml, METH_VARARGS},
1980 {"close", (PyCFunction) treebuilder_close, METH_VARARGS},
1981 {NULL, NULL}
1982};
1983
1984static PyObject*
1985treebuilder_getattr(TreeBuilderObject* self, char* name)
1986{
1987 return Py_FindMethod(treebuilder_methods, (PyObject*) self, name);
1988}
1989
1990statichere PyTypeObject TreeBuilder_Type = {
1991 PyObject_HEAD_INIT(NULL)
1992 0, "TreeBuilder", sizeof(TreeBuilderObject), 0,
1993 /* methods */
1994 (destructor)treebuilder_dealloc, /* tp_dealloc */
1995 0, /* tp_print */
1996 (getattrfunc)treebuilder_getattr, /* tp_getattr */
1997};
1998
1999/* ==================================================================== */
2000/* the expat interface */
2001
2002#if defined(USE_EXPAT)
2003
2004#include "expat.h"
2005
2006#if defined(USE_PYEXPAT_CAPI)
2007#include "pyexpat.h"
2008static struct PyExpat_CAPI* expat_capi;
2009#define EXPAT(func) (expat_capi->func)
2010#else
2011#define EXPAT(func) (XML_##func)
2012#endif
2013
2014typedef struct {
2015 PyObject_HEAD
2016
2017 XML_Parser parser;
2018
2019 PyObject* target;
2020 PyObject* entity;
2021
2022 PyObject* names;
2023
2024 PyObject* handle_xml;
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002025
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002026 PyObject* handle_start;
2027 PyObject* handle_data;
2028 PyObject* handle_end;
2029
2030 PyObject* handle_comment;
2031 PyObject* handle_pi;
2032
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002033 PyObject* handle_close;
2034
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002035} XMLParserObject;
2036
2037staticforward PyTypeObject XMLParser_Type;
2038
2039/* helpers */
2040
2041#if defined(Py_USING_UNICODE)
2042LOCAL(int)
2043checkstring(const char* string, int size)
2044{
2045 int i;
2046
2047 /* check if an 8-bit string contains UTF-8 characters */
2048 for (i = 0; i < size; i++)
2049 if (string[i] & 0x80)
2050 return 1;
2051
2052 return 0;
2053}
2054#endif
2055
2056LOCAL(PyObject*)
2057makestring(const char* string, int size)
2058{
2059 /* convert a UTF-8 string to either a 7-bit ascii string or a
2060 Unicode string */
2061
2062#if defined(Py_USING_UNICODE)
2063 if (checkstring(string, size))
2064 return PyUnicode_DecodeUTF8(string, size, "strict");
2065#endif
2066
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002067 return PyString_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002068}
2069
2070LOCAL(PyObject*)
2071makeuniversal(XMLParserObject* self, const char* string)
2072{
2073 /* convert a UTF-8 tag/attribute name from the expat parser
2074 to a universal name string */
2075
2076 int size = strlen(string);
2077 PyObject* key;
2078 PyObject* value;
2079
2080 /* look the 'raw' name up in the names dictionary */
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002081 key = PyString_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002082 if (!key)
2083 return NULL;
2084
2085 value = PyDict_GetItem(self->names, key);
2086
2087 if (value) {
2088 Py_INCREF(value);
2089 } else {
2090 /* new name. convert to universal name, and decode as
2091 necessary */
2092
2093 PyObject* tag;
2094 char* p;
2095 int i;
2096
2097 /* look for namespace separator */
2098 for (i = 0; i < size; i++)
2099 if (string[i] == '}')
2100 break;
2101 if (i != size) {
2102 /* convert to universal name */
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002103 tag = PyString_FromStringAndSize(NULL, size+1);
2104 p = PyString_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002105 p[0] = '{';
2106 memcpy(p+1, string, size);
2107 size++;
2108 } else {
2109 /* plain name; use key as tag */
2110 Py_INCREF(key);
2111 tag = key;
2112 }
2113
2114 /* decode universal name */
2115#if defined(Py_USING_UNICODE)
2116 /* inline makestring, to avoid duplicating the source string if
Martin Panter6a8163a2016-04-15 02:14:19 +00002117 it's not a utf-8 string */
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002118 p = PyString_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002119 if (checkstring(p, size)) {
2120 value = PyUnicode_DecodeUTF8(p, size, "strict");
2121 Py_DECREF(tag);
2122 if (!value) {
2123 Py_DECREF(key);
2124 return NULL;
2125 }
2126 } else
2127#endif
2128 value = tag; /* use tag as is */
2129
2130 /* add to names dictionary */
2131 if (PyDict_SetItem(self->names, key, value) < 0) {
2132 Py_DECREF(key);
2133 Py_DECREF(value);
2134 return NULL;
2135 }
2136 }
2137
2138 Py_DECREF(key);
2139 return value;
2140}
2141
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002142static void
2143expat_set_error(const char* message, int line, int column)
2144{
2145 PyObject *error;
2146 PyObject *position;
2147 char buffer[256];
2148
2149 sprintf(buffer, "%s: line %d, column %d", message, line, column);
2150
2151 error = PyObject_CallFunction(elementtree_parseerror_obj, "s", buffer);
2152 if (!error)
2153 return;
2154
2155 /* add position attribute */
2156 position = Py_BuildValue("(ii)", line, column);
2157 if (!position) {
2158 Py_DECREF(error);
2159 return;
2160 }
2161 if (PyObject_SetAttrString(error, "position", position) == -1) {
2162 Py_DECREF(error);
2163 Py_DECREF(position);
2164 return;
2165 }
2166 Py_DECREF(position);
2167
2168 PyErr_SetObject(elementtree_parseerror_obj, error);
2169 Py_DECREF(error);
2170}
2171
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002172/* -------------------------------------------------------------------- */
2173/* handlers */
2174
2175static void
2176expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2177 int data_len)
2178{
2179 PyObject* key;
2180 PyObject* value;
2181 PyObject* res;
2182
2183 if (data_len < 2 || data_in[0] != '&')
2184 return;
2185
2186 key = makestring(data_in + 1, data_len - 2);
2187 if (!key)
2188 return;
2189
2190 value = PyDict_GetItem(self->entity, key);
2191
2192 if (value) {
2193 if (TreeBuilder_CheckExact(self->target))
2194 res = treebuilder_handle_data(
2195 (TreeBuilderObject*) self->target, value
2196 );
2197 else if (self->handle_data)
2198 res = PyObject_CallFunction(self->handle_data, "O", value);
2199 else
2200 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002201 Py_XDECREF(res);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002202 } else if (!PyErr_Occurred()) {
2203 /* Report the first error, not the last */
2204 char message[128];
2205 sprintf(message, "undefined entity &%.100s;", PyString_AS_STRING(key));
2206 expat_set_error(
2207 message,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002208 EXPAT(GetErrorLineNumber)(self->parser),
2209 EXPAT(GetErrorColumnNumber)(self->parser)
2210 );
2211 }
2212
2213 Py_DECREF(key);
2214}
2215
2216static void
2217expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2218 const XML_Char **attrib_in)
2219{
2220 PyObject* res;
2221 PyObject* tag;
2222 PyObject* attrib;
2223 int ok;
2224
2225 /* tag name */
2226 tag = makeuniversal(self, tag_in);
2227 if (!tag)
2228 return; /* parser will look for errors */
2229
2230 /* attributes */
2231 if (attrib_in[0]) {
2232 attrib = PyDict_New();
Serhiy Storchaka33ea2972015-12-09 19:44:30 +02002233 if (!attrib) {
2234 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002235 return;
Serhiy Storchaka33ea2972015-12-09 19:44:30 +02002236 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002237 while (attrib_in[0] && attrib_in[1]) {
2238 PyObject* key = makeuniversal(self, attrib_in[0]);
2239 PyObject* value = makestring(attrib_in[1], strlen(attrib_in[1]));
2240 if (!key || !value) {
2241 Py_XDECREF(value);
2242 Py_XDECREF(key);
2243 Py_DECREF(attrib);
Serhiy Storchaka33ea2972015-12-09 19:44:30 +02002244 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002245 return;
2246 }
2247 ok = PyDict_SetItem(attrib, key, value);
2248 Py_DECREF(value);
2249 Py_DECREF(key);
2250 if (ok < 0) {
2251 Py_DECREF(attrib);
Serhiy Storchaka33ea2972015-12-09 19:44:30 +02002252 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002253 return;
2254 }
2255 attrib_in += 2;
2256 }
2257 } else {
2258 Py_INCREF(Py_None);
2259 attrib = Py_None;
2260 }
2261
2262 if (TreeBuilder_CheckExact(self->target))
2263 /* shortcut */
2264 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2265 tag, attrib);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002266 else if (self->handle_start) {
2267 if (attrib == Py_None) {
2268 Py_DECREF(attrib);
2269 attrib = PyDict_New();
Serhiy Storchaka33ea2972015-12-09 19:44:30 +02002270 if (!attrib) {
2271 Py_DECREF(tag);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002272 return;
Serhiy Storchaka33ea2972015-12-09 19:44:30 +02002273 }
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002274 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002275 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002276 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002277 res = NULL;
2278
2279 Py_DECREF(tag);
2280 Py_DECREF(attrib);
2281
2282 Py_XDECREF(res);
2283}
2284
2285static void
2286expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2287 int data_len)
2288{
2289 PyObject* data;
2290 PyObject* res;
2291
2292 data = makestring(data_in, data_len);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002293 if (!data)
2294 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002295
2296 if (TreeBuilder_CheckExact(self->target))
2297 /* shortcut */
2298 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
2299 else if (self->handle_data)
2300 res = PyObject_CallFunction(self->handle_data, "O", data);
2301 else
2302 res = NULL;
2303
2304 Py_DECREF(data);
2305
2306 Py_XDECREF(res);
2307}
2308
2309static void
2310expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
2311{
2312 PyObject* tag;
2313 PyObject* res = NULL;
2314
2315 if (TreeBuilder_CheckExact(self->target))
2316 /* shortcut */
2317 /* the standard tree builder doesn't look at the end tag */
2318 res = treebuilder_handle_end(
2319 (TreeBuilderObject*) self->target, Py_None
2320 );
2321 else if (self->handle_end) {
2322 tag = makeuniversal(self, tag_in);
2323 if (tag) {
2324 res = PyObject_CallFunction(self->handle_end, "O", tag);
2325 Py_DECREF(tag);
2326 }
2327 }
2328
2329 Py_XDECREF(res);
2330}
2331
2332static void
2333expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
2334 const XML_Char *uri)
2335{
Serhiy Storchaka45cf0b72015-12-06 23:51:53 +02002336 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
2337 PyObject *parcel;
2338 PyObject *sprefix = NULL;
2339 PyObject *suri = NULL;
2340
2341 if (PyErr_Occurred())
2342 return;
2343
2344 if (!target->events || !target->start_ns_event_obj)
2345 return;
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002346
Eli Benderskyf933e082013-11-28 06:25:45 -08002347 if (uri)
Eli Bendersky71142c42013-11-28 06:37:25 -08002348 suri = makestring(uri, strlen(uri));
Eli Benderskyf933e082013-11-28 06:25:45 -08002349 else
Eli Bendersky71142c42013-11-28 06:37:25 -08002350 suri = PyString_FromStringAndSize("", 0);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002351 if (!suri)
2352 return;
2353
2354 if (prefix)
2355 sprefix = makestring(prefix, strlen(prefix));
2356 else
2357 sprefix = PyString_FromStringAndSize("", 0);
2358 if (!sprefix) {
2359 Py_DECREF(suri);
2360 return;
2361 }
2362
Serhiy Storchaka45cf0b72015-12-06 23:51:53 +02002363 parcel = PyTuple_Pack(2, sprefix, suri);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002364 Py_DECREF(sprefix);
2365 Py_DECREF(suri);
Serhiy Storchaka45cf0b72015-12-06 23:51:53 +02002366 if (!parcel)
2367 return;
2368 treebuilder_append_event(target, target->start_ns_event_obj, parcel);
2369 Py_DECREF(parcel);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002370}
2371
2372static void
2373expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
2374{
Serhiy Storchaka45cf0b72015-12-06 23:51:53 +02002375 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
2376
2377 if (PyErr_Occurred())
2378 return;
2379
2380 if (!target->events)
2381 return;
2382
2383 treebuilder_append_event(target, target->end_ns_event_obj, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002384}
2385
2386static void
2387expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
2388{
2389 PyObject* comment;
2390 PyObject* res;
2391
2392 if (self->handle_comment) {
2393 comment = makestring(comment_in, strlen(comment_in));
2394 if (comment) {
2395 res = PyObject_CallFunction(self->handle_comment, "O", comment);
2396 Py_XDECREF(res);
2397 Py_DECREF(comment);
2398 }
2399 }
2400}
2401
2402static void
2403expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
2404 const XML_Char* data_in)
2405{
2406 PyObject* target;
2407 PyObject* data;
2408 PyObject* res;
2409
2410 if (self->handle_pi) {
2411 target = makestring(target_in, strlen(target_in));
2412 data = makestring(data_in, strlen(data_in));
2413 if (target && data) {
2414 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
2415 Py_XDECREF(res);
2416 Py_DECREF(data);
2417 Py_DECREF(target);
2418 } else {
2419 Py_XDECREF(data);
2420 Py_XDECREF(target);
2421 }
2422 }
2423}
2424
2425#if defined(Py_USING_UNICODE)
2426static int
2427expat_unknown_encoding_handler(XMLParserObject *self, const XML_Char *name,
2428 XML_Encoding *info)
2429{
2430 PyObject* u;
2431 Py_UNICODE* p;
2432 unsigned char s[256];
2433 int i;
2434
2435 memset(info, 0, sizeof(XML_Encoding));
2436
2437 for (i = 0; i < 256; i++)
2438 s[i] = i;
2439
Fredrik Lundhc3389992005-12-25 11:40:19 +00002440 u = PyUnicode_Decode((char*) s, 256, name, "replace");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002441 if (!u)
2442 return XML_STATUS_ERROR;
2443
2444 if (PyUnicode_GET_SIZE(u) != 256) {
2445 Py_DECREF(u);
Eli Benderskyb6717012013-08-04 06:09:49 -07002446 PyErr_SetString(PyExc_ValueError,
2447 "multi-byte encodings are not supported");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002448 return XML_STATUS_ERROR;
2449 }
2450
2451 p = PyUnicode_AS_UNICODE(u);
2452
2453 for (i = 0; i < 256; i++) {
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002454 if (p[i] != Py_UNICODE_REPLACEMENT_CHARACTER)
2455 info->map[i] = p[i];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002456 else
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002457 info->map[i] = -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002458 }
2459
2460 Py_DECREF(u);
2461
2462 return XML_STATUS_OK;
2463}
2464#endif
2465
2466/* -------------------------------------------------------------------- */
2467/* constructor and destructor */
2468
2469static PyObject*
Fredrik Lundh81707f12006-06-03 21:56:05 +00002470xmlparser(PyObject* self_, PyObject* args, PyObject* kw)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002471{
2472 XMLParserObject* self;
2473 /* FIXME: does this need to be static? */
2474 static XML_Memory_Handling_Suite memory_handler;
2475
2476 PyObject* target = NULL;
2477 char* encoding = NULL;
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +00002478 static char* kwlist[] = { "target", "encoding", NULL };
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002479 if (!PyArg_ParseTupleAndKeywords(args, kw, "|Oz:XMLParser", kwlist,
2480 &target, &encoding))
2481 return NULL;
2482
2483#if defined(USE_PYEXPAT_CAPI)
2484 if (!expat_capi) {
2485 PyErr_SetString(
2486 PyExc_RuntimeError, "cannot load dispatch table from pyexpat"
2487 );
2488 return NULL;
2489 }
2490#endif
2491
2492 self = PyObject_New(XMLParserObject, &XMLParser_Type);
2493 if (self == NULL)
2494 return NULL;
2495
2496 self->entity = PyDict_New();
2497 if (!self->entity) {
2498 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002499 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002500 }
2501
2502 self->names = PyDict_New();
2503 if (!self->names) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002504 PyObject_Del(self->entity);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002505 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002506 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002507 }
2508
2509 memory_handler.malloc_fcn = PyObject_Malloc;
2510 memory_handler.realloc_fcn = PyObject_Realloc;
2511 memory_handler.free_fcn = PyObject_Free;
2512
2513 self->parser = EXPAT(ParserCreate_MM)(encoding, &memory_handler, "}");
2514 if (!self->parser) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002515 PyObject_Del(self->names);
2516 PyObject_Del(self->entity);
2517 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002518 PyErr_NoMemory();
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002519 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002520 }
2521
2522 /* setup target handlers */
2523 if (!target) {
2524 target = treebuilder_new();
2525 if (!target) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002526 EXPAT(ParserFree)(self->parser);
2527 PyObject_Del(self->names);
2528 PyObject_Del(self->entity);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002529 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002530 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002531 }
2532 } else
2533 Py_INCREF(target);
2534 self->target = target;
2535
2536 self->handle_xml = PyObject_GetAttrString(target, "xml");
2537 self->handle_start = PyObject_GetAttrString(target, "start");
2538 self->handle_data = PyObject_GetAttrString(target, "data");
2539 self->handle_end = PyObject_GetAttrString(target, "end");
2540 self->handle_comment = PyObject_GetAttrString(target, "comment");
2541 self->handle_pi = PyObject_GetAttrString(target, "pi");
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002542 self->handle_close = PyObject_GetAttrString(target, "close");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002543
2544 PyErr_Clear();
2545
2546 /* configure parser */
2547 EXPAT(SetUserData)(self->parser, self);
2548 EXPAT(SetElementHandler)(
2549 self->parser,
2550 (XML_StartElementHandler) expat_start_handler,
2551 (XML_EndElementHandler) expat_end_handler
2552 );
2553 EXPAT(SetDefaultHandlerExpand)(
2554 self->parser,
2555 (XML_DefaultHandler) expat_default_handler
2556 );
2557 EXPAT(SetCharacterDataHandler)(
2558 self->parser,
2559 (XML_CharacterDataHandler) expat_data_handler
2560 );
2561 if (self->handle_comment)
2562 EXPAT(SetCommentHandler)(
2563 self->parser,
2564 (XML_CommentHandler) expat_comment_handler
2565 );
2566 if (self->handle_pi)
2567 EXPAT(SetProcessingInstructionHandler)(
2568 self->parser,
2569 (XML_ProcessingInstructionHandler) expat_pi_handler
2570 );
2571#if defined(Py_USING_UNICODE)
2572 EXPAT(SetUnknownEncodingHandler)(
2573 self->parser,
2574 (XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL
2575 );
2576#endif
2577
2578 ALLOC(sizeof(XMLParserObject), "create expatparser");
2579
2580 return (PyObject*) self;
2581}
2582
2583static void
2584xmlparser_dealloc(XMLParserObject* self)
2585{
2586 EXPAT(ParserFree)(self->parser);
2587
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002588 Py_XDECREF(self->handle_close);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002589 Py_XDECREF(self->handle_pi);
2590 Py_XDECREF(self->handle_comment);
2591 Py_XDECREF(self->handle_end);
2592 Py_XDECREF(self->handle_data);
2593 Py_XDECREF(self->handle_start);
2594 Py_XDECREF(self->handle_xml);
2595
2596 Py_DECREF(self->target);
2597 Py_DECREF(self->entity);
2598 Py_DECREF(self->names);
2599
2600 RELEASE(sizeof(XMLParserObject), "destroy expatparser");
2601
2602 PyObject_Del(self);
2603}
2604
2605/* -------------------------------------------------------------------- */
2606/* methods (in alphabetical order) */
2607
2608LOCAL(PyObject*)
2609expat_parse(XMLParserObject* self, char* data, int data_len, int final)
2610{
2611 int ok;
2612
2613 ok = EXPAT(Parse)(self->parser, data, data_len, final);
2614
2615 if (PyErr_Occurred())
2616 return NULL;
2617
2618 if (!ok) {
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002619 expat_set_error(
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002620 EXPAT(ErrorString)(EXPAT(GetErrorCode)(self->parser)),
2621 EXPAT(GetErrorLineNumber)(self->parser),
2622 EXPAT(GetErrorColumnNumber)(self->parser)
2623 );
2624 return NULL;
2625 }
2626
2627 Py_RETURN_NONE;
2628}
2629
2630static PyObject*
2631xmlparser_close(XMLParserObject* self, PyObject* args)
2632{
2633 /* end feeding data to parser */
2634
2635 PyObject* res;
2636 if (!PyArg_ParseTuple(args, ":close"))
2637 return NULL;
2638
2639 res = expat_parse(self, "", 0, 1);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002640 if (!res)
2641 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002642
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002643 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002644 Py_DECREF(res);
2645 return treebuilder_done((TreeBuilderObject*) self->target);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002646 } if (self->handle_close) {
2647 Py_DECREF(res);
2648 return PyObject_CallFunction(self->handle_close, "");
2649 } else
2650 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002651}
2652
2653static PyObject*
2654xmlparser_feed(XMLParserObject* self, PyObject* args)
2655{
2656 /* feed data to parser */
2657
2658 char* data;
2659 int data_len;
2660 if (!PyArg_ParseTuple(args, "s#:feed", &data, &data_len))
2661 return NULL;
2662
2663 return expat_parse(self, data, data_len, 0);
2664}
2665
2666static PyObject*
2667xmlparser_parse(XMLParserObject* self, PyObject* args)
2668{
2669 /* (internal) parse until end of input stream */
2670
2671 PyObject* reader;
2672 PyObject* buffer;
2673 PyObject* res;
2674
2675 PyObject* fileobj;
2676 if (!PyArg_ParseTuple(args, "O:_parse", &fileobj))
2677 return NULL;
2678
2679 reader = PyObject_GetAttrString(fileobj, "read");
2680 if (!reader)
2681 return NULL;
2682
2683 /* read from open file object */
2684 for (;;) {
2685
2686 buffer = PyObject_CallFunction(reader, "i", 64*1024);
2687
2688 if (!buffer) {
2689 /* read failed (e.g. due to KeyboardInterrupt) */
2690 Py_DECREF(reader);
2691 return NULL;
2692 }
2693
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002694 if (!PyString_CheckExact(buffer) || PyString_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002695 Py_DECREF(buffer);
2696 break;
2697 }
2698
Serhiy Storchakac4c64be2015-11-25 20:12:58 +02002699 if (PyString_GET_SIZE(buffer) > INT_MAX) {
2700 Py_DECREF(buffer);
2701 Py_DECREF(reader);
2702 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
2703 return NULL;
2704 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002705 res = expat_parse(
Serhiy Storchakac4c64be2015-11-25 20:12:58 +02002706 self, PyString_AS_STRING(buffer), (int)PyString_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002707 );
2708
2709 Py_DECREF(buffer);
2710
2711 if (!res) {
2712 Py_DECREF(reader);
2713 return NULL;
2714 }
2715 Py_DECREF(res);
2716
2717 }
2718
2719 Py_DECREF(reader);
2720
2721 res = expat_parse(self, "", 0, 1);
2722
2723 if (res && TreeBuilder_CheckExact(self->target)) {
2724 Py_DECREF(res);
2725 return treebuilder_done((TreeBuilderObject*) self->target);
2726 }
2727
2728 return res;
2729}
2730
2731static PyObject*
2732xmlparser_setevents(XMLParserObject* self, PyObject* args)
2733{
2734 /* activate element event reporting */
2735
Neal Norwitzc7074382006-06-12 02:06:17 +00002736 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002737 TreeBuilderObject* target;
2738
2739 PyObject* events; /* event collector */
2740 PyObject* event_set = Py_None;
2741 if (!PyArg_ParseTuple(args, "O!|O:_setevents", &PyList_Type, &events,
2742 &event_set))
2743 return NULL;
2744
2745 if (!TreeBuilder_CheckExact(self->target)) {
2746 PyErr_SetString(
2747 PyExc_TypeError,
2748 "event handling only supported for cElementTree.Treebuilder "
2749 "targets"
2750 );
2751 return NULL;
2752 }
2753
2754 target = (TreeBuilderObject*) self->target;
2755
2756 Py_INCREF(events);
Serhiy Storchakabc62af12016-04-06 09:51:18 +03002757 Py_XSETREF(target->events, events);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002758
2759 /* clear out existing events */
Serhiy Storchaka98a97222014-02-09 13:14:04 +02002760 Py_CLEAR(target->start_event_obj);
2761 Py_CLEAR(target->end_event_obj);
2762 Py_CLEAR(target->start_ns_event_obj);
2763 Py_CLEAR(target->end_ns_event_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002764
2765 if (event_set == Py_None) {
2766 /* default is "end" only */
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002767 target->end_event_obj = PyString_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002768 Py_RETURN_NONE;
2769 }
2770
2771 if (!PyTuple_Check(event_set)) /* FIXME: handle arbitrary sequences */
2772 goto error;
2773
2774 for (i = 0; i < PyTuple_GET_SIZE(event_set); i++) {
2775 PyObject* item = PyTuple_GET_ITEM(event_set, i);
2776 char* event;
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002777 if (!PyString_Check(item))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002778 goto error;
Serhiy Storchaka20a003b2015-12-24 11:51:24 +02002779 Py_INCREF(item);
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002780 event = PyString_AS_STRING(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002781 if (strcmp(event, "start") == 0) {
Serhiy Storchakabc62af12016-04-06 09:51:18 +03002782 Py_XSETREF(target->start_event_obj, item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002783 } else if (strcmp(event, "end") == 0) {
Serhiy Storchakabc62af12016-04-06 09:51:18 +03002784 Py_XSETREF(target->end_event_obj, item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002785 } else if (strcmp(event, "start-ns") == 0) {
Serhiy Storchakabc62af12016-04-06 09:51:18 +03002786 Py_XSETREF(target->start_ns_event_obj, item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002787 EXPAT(SetNamespaceDeclHandler)(
2788 self->parser,
2789 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2790 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2791 );
2792 } else if (strcmp(event, "end-ns") == 0) {
Serhiy Storchakabc62af12016-04-06 09:51:18 +03002793 Py_XSETREF(target->end_ns_event_obj, item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002794 EXPAT(SetNamespaceDeclHandler)(
2795 self->parser,
2796 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2797 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2798 );
2799 } else {
Serhiy Storchaka20a003b2015-12-24 11:51:24 +02002800 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002801 PyErr_Format(
2802 PyExc_ValueError,
2803 "unknown event '%s'", event
2804 );
2805 return NULL;
2806 }
2807 }
2808
2809 Py_RETURN_NONE;
2810
2811 error:
2812 PyErr_SetString(
2813 PyExc_TypeError,
2814 "invalid event tuple"
2815 );
2816 return NULL;
2817}
2818
2819static PyMethodDef xmlparser_methods[] = {
2820 {"feed", (PyCFunction) xmlparser_feed, METH_VARARGS},
2821 {"close", (PyCFunction) xmlparser_close, METH_VARARGS},
2822 {"_parse", (PyCFunction) xmlparser_parse, METH_VARARGS},
2823 {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
2824 {NULL, NULL}
2825};
2826
2827static PyObject*
2828xmlparser_getattr(XMLParserObject* self, char* name)
2829{
2830 PyObject* res;
2831
2832 res = Py_FindMethod(xmlparser_methods, (PyObject*) self, name);
2833 if (res)
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002834 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002835
2836 PyErr_Clear();
2837
2838 if (strcmp(name, "entity") == 0)
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002839 res = self->entity;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002840 else if (strcmp(name, "target") == 0)
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002841 res = self->target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002842 else if (strcmp(name, "version") == 0) {
2843 char buffer[100];
2844 sprintf(buffer, "Expat %d.%d.%d", XML_MAJOR_VERSION,
2845 XML_MINOR_VERSION, XML_MICRO_VERSION);
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002846 return PyString_FromString(buffer);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002847 } else {
2848 PyErr_SetString(PyExc_AttributeError, name);
2849 return NULL;
2850 }
2851
2852 Py_INCREF(res);
2853 return res;
2854}
2855
2856statichere PyTypeObject XMLParser_Type = {
2857 PyObject_HEAD_INIT(NULL)
2858 0, "XMLParser", sizeof(XMLParserObject), 0,
2859 /* methods */
2860 (destructor)xmlparser_dealloc, /* tp_dealloc */
2861 0, /* tp_print */
2862 (getattrfunc)xmlparser_getattr, /* tp_getattr */
2863};
2864
2865#endif
2866
2867/* ==================================================================== */
2868/* python module interface */
2869
2870static PyMethodDef _functions[] = {
2871 {"Element", (PyCFunction) element, METH_VARARGS|METH_KEYWORDS},
2872 {"SubElement", (PyCFunction) subelement, METH_VARARGS|METH_KEYWORDS},
2873 {"TreeBuilder", (PyCFunction) treebuilder, METH_VARARGS},
2874#if defined(USE_EXPAT)
2875 {"XMLParser", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
2876 {"XMLTreeBuilder", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
2877#endif
2878 {NULL, NULL}
2879};
2880
2881DL_EXPORT(void)
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002882init_elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002883{
2884 PyObject* m;
2885 PyObject* g;
2886 char* bootstrap;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002887
2888 /* Patch object type */
Christian Heimese93237d2007-12-19 02:37:44 +00002889 Py_TYPE(&Element_Type) = Py_TYPE(&TreeBuilder_Type) = &PyType_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002890#if defined(USE_EXPAT)
Christian Heimese93237d2007-12-19 02:37:44 +00002891 Py_TYPE(&XMLParser_Type) = &PyType_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002892#endif
2893
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002894 m = Py_InitModule("_elementtree", _functions);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002895 if (!m)
2896 return;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002897
2898 /* python glue code */
2899
2900 g = PyDict_New();
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002901 if (!g)
2902 return;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002903
2904 PyDict_SetItemString(g, "__builtins__", PyEval_GetBuiltins());
2905
2906 bootstrap = (
2907
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002908 "from copy import copy, deepcopy\n"
2909
2910 "try:\n"
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002911 " from xml.etree import ElementTree\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002912 "except ImportError:\n"
2913 " import ElementTree\n"
2914 "ET = ElementTree\n"
2915 "del ElementTree\n"
2916
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002917 "import _elementtree as cElementTree\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002918
2919 "try:\n" /* check if copy works as is */
2920 " copy(cElementTree.Element('x'))\n"
2921 "except:\n"
2922 " def copyelement(elem):\n"
2923 " return elem\n"
2924
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002925 "class CommentProxy:\n"
2926 " def __call__(self, text=None):\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002927 " element = cElementTree.Element(ET.Comment)\n"
2928 " element.text = text\n"
2929 " return element\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002930 " def __cmp__(self, other):\n"
2931 " return cmp(ET.Comment, other)\n"
2932 "cElementTree.Comment = CommentProxy()\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002933
2934 "class ElementTree(ET.ElementTree):\n" /* public */
2935 " def parse(self, source, parser=None):\n"
Florent Xicluna67d5d0e2011-10-29 03:38:56 +02002936 " close_source = False\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002937 " if not hasattr(source, 'read'):\n"
2938 " source = open(source, 'rb')\n"
Florent Xicluna67d5d0e2011-10-29 03:38:56 +02002939 " close_source = False\n"
2940 " try:\n"
2941 " if parser is not None:\n"
2942 " while 1:\n"
2943 " data = source.read(65536)\n"
2944 " if not data:\n"
2945 " break\n"
2946 " parser.feed(data)\n"
2947 " self._root = parser.close()\n"
2948 " else:\n"
2949 " parser = cElementTree.XMLParser()\n"
2950 " self._root = parser._parse(source)\n"
2951 " return self._root\n"
2952 " finally:\n"
2953 " if close_source:\n"
2954 " source.close()\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002955 "cElementTree.ElementTree = ElementTree\n"
2956
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002957 "def iter(node, tag=None):\n" /* helper */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002958 " if tag == '*':\n"
2959 " tag = None\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002960 " if tag is None or node.tag == tag:\n"
2961 " yield node\n"
2962 " for node in node:\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002963 " for node in iter(node, tag):\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002964 " yield node\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002965
2966 "def itertext(node):\n" /* helper */
2967 " if node.text:\n"
2968 " yield node.text\n"
2969 " for e in node:\n"
2970 " for s in e.itertext():\n"
2971 " yield s\n"
2972 " if e.tail:\n"
2973 " yield e.tail\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002974
2975 "def parse(source, parser=None):\n" /* public */
2976 " tree = ElementTree()\n"
2977 " tree.parse(source, parser)\n"
2978 " return tree\n"
2979 "cElementTree.parse = parse\n"
2980
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002981 "class iterparse(object):\n"
2982 " root = None\n"
2983 " def __init__(self, file, events=None):\n"
Florent Xicluna67d5d0e2011-10-29 03:38:56 +02002984 " self._close_file = False\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002985 " if not hasattr(file, 'read'):\n"
2986 " file = open(file, 'rb')\n"
Florent Xicluna67d5d0e2011-10-29 03:38:56 +02002987 " self._close_file = True\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002988 " self._file = file\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002989 " self._events = []\n"
2990 " self._index = 0\n"
Florent Xicluna0965ee22011-11-01 23:34:41 +01002991 " self._error = None\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002992 " self.root = self._root = None\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002993 " b = cElementTree.TreeBuilder()\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002994 " self._parser = cElementTree.XMLParser(b)\n"
2995 " self._parser._setevents(self._events, events)\n"
2996 " def next(self):\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002997 " while 1:\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002998 " try:\n"
2999 " item = self._events[self._index]\n"
Florent Xicluna0965ee22011-11-01 23:34:41 +01003000 " self._index += 1\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003001 " return item\n"
Florent Xicluna0965ee22011-11-01 23:34:41 +01003002 " except IndexError:\n"
3003 " pass\n"
3004 " if self._error:\n"
3005 " e = self._error\n"
3006 " self._error = None\n"
3007 " raise e\n"
3008 " if self._parser is None:\n"
3009 " self.root = self._root\n"
3010 " if self._close_file:\n"
3011 " self._file.close()\n"
3012 " raise StopIteration\n"
3013 " # load event buffer\n"
3014 " del self._events[:]\n"
3015 " self._index = 0\n"
3016 " data = self._file.read(16384)\n"
3017 " if data:\n"
3018 " try:\n"
3019 " self._parser.feed(data)\n"
3020 " except SyntaxError as exc:\n"
3021 " self._error = exc\n"
3022 " else:\n"
3023 " self._root = self._parser.close()\n"
3024 " self._parser = None\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003025 " def __iter__(self):\n"
3026 " return self\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003027 "cElementTree.iterparse = iterparse\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003028
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003029 "class PIProxy:\n"
3030 " def __call__(self, target, text=None):\n"
3031 " element = cElementTree.Element(ET.PI)\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003032 " element.text = target\n"
3033 " if text:\n"
3034 " element.text = element.text + ' ' + text\n"
3035 " return element\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003036 " def __cmp__(self, other):\n"
3037 " return cmp(ET.PI, other)\n"
3038 "cElementTree.PI = cElementTree.ProcessingInstruction = PIProxy()\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003039
3040 "def XML(text):\n" /* public */
3041 " parser = cElementTree.XMLParser()\n"
3042 " parser.feed(text)\n"
3043 " return parser.close()\n"
3044 "cElementTree.XML = cElementTree.fromstring = XML\n"
3045
3046 "def XMLID(text):\n" /* public */
3047 " tree = XML(text)\n"
3048 " ids = {}\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003049 " for elem in tree.iter():\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003050 " id = elem.get('id')\n"
3051 " if id:\n"
3052 " ids[id] = elem\n"
3053 " return tree, ids\n"
3054 "cElementTree.XMLID = XMLID\n"
3055
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003056 "try:\n"
3057 " register_namespace = ET.register_namespace\n"
3058 "except AttributeError:\n"
3059 " def register_namespace(prefix, uri):\n"
3060 " ET._namespace_map[uri] = prefix\n"
3061 "cElementTree.register_namespace = register_namespace\n"
3062
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003063 "cElementTree.dump = ET.dump\n"
3064 "cElementTree.ElementPath = ElementPath = ET.ElementPath\n"
3065 "cElementTree.iselement = ET.iselement\n"
3066 "cElementTree.QName = ET.QName\n"
3067 "cElementTree.tostring = ET.tostring\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003068 "cElementTree.fromstringlist = ET.fromstringlist\n"
3069 "cElementTree.tostringlist = ET.tostringlist\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003070 "cElementTree.VERSION = '" VERSION "'\n"
3071 "cElementTree.__version__ = '" VERSION "'\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003072
3073 );
3074
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003075 if (!PyRun_String(bootstrap, Py_file_input, g, NULL))
3076 return;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003077
3078 elementpath_obj = PyDict_GetItemString(g, "ElementPath");
3079
3080 elementtree_copyelement_obj = PyDict_GetItemString(g, "copyelement");
3081 if (elementtree_copyelement_obj) {
3082 /* reduce hack needed; enable reduce method */
3083 PyMethodDef* mp;
3084 for (mp = element_methods; mp->ml_name; mp++)
3085 if (mp->ml_meth == (PyCFunction) element_reduce) {
3086 mp->ml_name = "__reduce__";
3087 break;
3088 }
3089 } else
3090 PyErr_Clear();
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003091
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003092 elementtree_deepcopy_obj = PyDict_GetItemString(g, "deepcopy");
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003093 elementtree_iter_obj = PyDict_GetItemString(g, "iter");
3094 elementtree_itertext_obj = PyDict_GetItemString(g, "itertext");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003095
3096#if defined(USE_PYEXPAT_CAPI)
3097 /* link against pyexpat, if possible */
Larry Hastings402b73f2010-03-25 00:54:54 +00003098 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003099 if (expat_capi) {
3100 /* check that it's usable */
3101 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
3102 expat_capi->size < sizeof(struct PyExpat_CAPI) ||
3103 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
3104 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
3105 expat_capi->MICRO_VERSION != XML_MICRO_VERSION)
3106 expat_capi = NULL;
3107 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003108#endif
3109
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003110 elementtree_parseerror_obj = PyErr_NewException(
3111 "cElementTree.ParseError", PyExc_SyntaxError, NULL
3112 );
3113 Py_INCREF(elementtree_parseerror_obj);
3114 PyModule_AddObject(m, "ParseError", elementtree_parseerror_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003115}