blob: 0a01c3cfada620688783db07e7457c1027166b1e [file] [log] [blame]
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001/*
2 * ElementTree
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003 * $Id: _elementtree.c 3473 2009-01-11 22:53:55Z fredrik $
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
5 * elementtree accelerator
6 *
7 * History:
8 * 1999-06-20 fl created (as part of sgmlop)
9 * 2001-05-29 fl effdom edition
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000010 * 2003-02-27 fl elementtree edition (alpha)
11 * 2004-06-03 fl updates for elementtree 1.2
Florent Xicluna3e8c1892010-03-11 14:36:19 +000012 * 2005-01-05 fl major optimization effort
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000013 * 2005-01-11 fl first public release (cElementTree 0.8)
14 * 2005-01-12 fl split element object into base and extras
15 * 2005-01-13 fl use tagged pointers for tail/text (cElementTree 0.9)
16 * 2005-01-17 fl added treebuilder close method
17 * 2005-01-17 fl fixed crash in getchildren
18 * 2005-01-18 fl removed observer api, added iterparse (cElementTree 0.9.3)
19 * 2005-01-23 fl revised iterparse api; added namespace event support (0.9.8)
20 * 2005-01-26 fl added VERSION module property (cElementTree 1.0)
21 * 2005-01-28 fl added remove method (1.0.1)
22 * 2005-03-01 fl added iselement function; fixed makeelement aliasing (1.0.2)
23 * 2005-03-13 fl export Comment and ProcessingInstruction/PI helpers
24 * 2005-03-26 fl added Comment and PI support to XMLParser
25 * 2005-03-27 fl event optimizations; complain about bogus events
26 * 2005-08-08 fl fixed read error handling in parse
27 * 2005-08-11 fl added runtime test for copy workaround (1.0.3)
28 * 2005-12-13 fl added expat_capi support (for xml.etree) (1.0.4)
29 * 2005-12-16 fl added support for non-standard encodings
Fredrik Lundh44ed4db2006-03-12 21:06:35 +000030 * 2006-03-08 fl fixed a couple of potential null-refs and leaks
31 * 2006-03-12 fl merge in 2.5 ssize_t changes
Florent Xicluna3e8c1892010-03-11 14:36:19 +000032 * 2007-08-25 fl call custom builder's close method from XMLParser
33 * 2007-08-31 fl added iter, extend from ET 1.3
34 * 2007-09-01 fl fixed ParseError exception, setslice source type, etc
35 * 2007-09-03 fl fixed handling of negative insert indexes
36 * 2007-09-04 fl added itertext from ET 1.3
37 * 2007-09-06 fl added position attribute to ParseError exception
38 * 2008-06-06 fl delay error reporting in iterparse (from Hrvoje Niksic)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000039 *
Florent Xicluna3e8c1892010-03-11 14:36:19 +000040 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
41 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000042 *
43 * info@pythonware.com
44 * http://www.pythonware.com
45 */
46
Fredrik Lundh6d52b552005-12-16 22:06:43 +000047/* Licensed to PSF under a Contributor Agreement. */
Florent Xicluna3e8c1892010-03-11 14:36:19 +000048/* See http://www.python.org/psf/license for licensing details. */
Fredrik Lundh6d52b552005-12-16 22:06:43 +000049
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000050#include "Python.h"
51
Fredrik Lundhdc075b92006-08-16 16:47:07 +000052#define VERSION "1.0.6"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000053
54/* -------------------------------------------------------------------- */
55/* configuration */
56
57/* Leave defined to include the expat-based XMLParser type */
58#define USE_EXPAT
59
Florent Xicluna3e8c1892010-03-11 14:36:19 +000060/* Define to do all expat calls via pyexpat's embedded expat library */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000061/* #define USE_PYEXPAT_CAPI */
62
63/* An element can hold this many children without extra memory
64 allocations. */
65#define STATIC_CHILDREN 4
66
67/* For best performance, chose a value so that 80-90% of all nodes
68 have no more than the given number of children. Set this to zero
69 to minimize the size of the element structure itself (this only
70 helps if you have lots of leaf nodes with attributes). */
71
72/* Also note that pymalloc always allocates blocks in multiples of
73 eight bytes. For the current version of cElementTree, this means
74 that the number of children should be an even number, at least on
75 32-bit platforms. */
76
77/* -------------------------------------------------------------------- */
78
79#if 0
80static int memory = 0;
81#define ALLOC(size, comment)\
82do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
83#define RELEASE(size, comment)\
84do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
85#else
86#define ALLOC(size, comment)
87#define RELEASE(size, comment)
88#endif
89
90/* compiler tweaks */
91#if defined(_MSC_VER)
92#define LOCAL(type) static __inline type __fastcall
93#else
94#define LOCAL(type) static type
95#endif
96
97/* compatibility macros */
Florent Xicluna3e8c1892010-03-11 14:36:19 +000098#if (PY_VERSION_HEX < 0x02060000)
99#define Py_REFCNT(ob) (((PyObject*)(ob))->ob_refcnt)
100#define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)
101#endif
102
Martin v. Löwis18e16552006-02-15 17:27:45 +0000103#if (PY_VERSION_HEX < 0x02050000)
104typedef int Py_ssize_t;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000105#define lenfunc inquiry
Martin v. Löwis18e16552006-02-15 17:27:45 +0000106#endif
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000107
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000108#if (PY_VERSION_HEX < 0x02040000)
109#define PyDict_CheckExact PyDict_Check
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000110
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000111#if !defined(Py_RETURN_NONE)
112#define Py_RETURN_NONE return Py_INCREF(Py_None), Py_None
113#endif
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000114#endif
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000115
116/* macros used to store 'join' flags in string object pointers. note
117 that all use of text and tail as object pointers must be wrapped in
118 JOIN_OBJ. see comments in the ElementObject definition for more
119 info. */
120#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
121#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
122#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~1))
123
124/* glue functions (see the init function for details) */
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000125static PyObject* elementtree_parseerror_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000126static PyObject* elementtree_copyelement_obj;
127static PyObject* elementtree_deepcopy_obj;
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000128static PyObject* elementtree_iter_obj;
129static PyObject* elementtree_itertext_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000130static PyObject* elementpath_obj;
131
132/* helpers */
133
134LOCAL(PyObject*)
135deepcopy(PyObject* object, PyObject* memo)
136{
137 /* do a deep copy of the given object */
138
139 PyObject* args;
140 PyObject* result;
141
142 if (!elementtree_deepcopy_obj) {
143 PyErr_SetString(
144 PyExc_RuntimeError,
145 "deepcopy helper not found"
146 );
147 return NULL;
148 }
149
150 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000151 if (!args)
152 return NULL;
153
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000154 Py_INCREF(object); PyTuple_SET_ITEM(args, 0, (PyObject*) object);
155 Py_INCREF(memo); PyTuple_SET_ITEM(args, 1, (PyObject*) memo);
156
157 result = PyObject_CallObject(elementtree_deepcopy_obj, args);
158
159 Py_DECREF(args);
160
161 return result;
162}
163
164LOCAL(PyObject*)
165list_join(PyObject* list)
166{
Serhiy Storchaka9c2c42c2017-04-02 20:37:03 +0300167 /* join list elements */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000168 PyObject* joiner;
169 PyObject* function;
170 PyObject* args;
171 PyObject* result;
172
173 switch (PyList_GET_SIZE(list)) {
174 case 0:
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000175 return PyString_FromString("");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000176 case 1:
177 result = PyList_GET_ITEM(list, 0);
178 Py_INCREF(result);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000179 return result;
180 }
181
182 /* two or more elements: slice out a suitable separator from the
183 first member, and use that to join the entire list */
184
185 joiner = PySequence_GetSlice(PyList_GET_ITEM(list, 0), 0, 0);
186 if (!joiner)
187 return NULL;
188
189 function = PyObject_GetAttrString(joiner, "join");
190 if (!function) {
191 Py_DECREF(joiner);
192 return NULL;
193 }
194
195 args = PyTuple_New(1);
Serhiy Storchaka9c2c42c2017-04-02 20:37:03 +0300196 if (!args) {
197 Py_DECREF(function);
198 Py_DECREF(joiner);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000199 return NULL;
Serhiy Storchaka9c2c42c2017-04-02 20:37:03 +0300200 }
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000201
Serhiy Storchaka9c2c42c2017-04-02 20:37:03 +0300202 Py_INCREF(list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000203 PyTuple_SET_ITEM(args, 0, list);
204
205 result = PyObject_CallObject(function, args);
206
207 Py_DECREF(args); /* also removes list */
208 Py_DECREF(function);
209 Py_DECREF(joiner);
210
211 return result;
212}
213
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000214/* -------------------------------------------------------------------- */
215/* the element type */
216
217typedef struct {
218
219 /* attributes (a dictionary object), or None if no attributes */
220 PyObject* attrib;
221
222 /* child elements */
223 int length; /* actual number of items */
224 int allocated; /* allocated items */
225
226 /* this either points to _children or to a malloced buffer */
227 PyObject* *children;
228
229 PyObject* _children[STATIC_CHILDREN];
230
231} ElementObjectExtra;
232
233typedef struct {
234 PyObject_HEAD
235
236 /* element tag (a string). */
237 PyObject* tag;
238
239 /* text before first child. note that this is a tagged pointer;
240 use JOIN_OBJ to get the object pointer. the join flag is used
241 to distinguish lists created by the tree builder from lists
242 assigned to the attribute by application code; the former
243 should be joined before being returned to the user, the latter
244 should be left intact. */
245 PyObject* text;
246
247 /* text after this element, in parent. note that this is a tagged
248 pointer; use JOIN_OBJ to get the object pointer. */
249 PyObject* tail;
250
251 ElementObjectExtra* extra;
252
253} ElementObject;
254
255staticforward PyTypeObject Element_Type;
256
Christian Heimese93237d2007-12-19 02:37:44 +0000257#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000258
259/* -------------------------------------------------------------------- */
260/* element constructor and destructor */
261
262LOCAL(int)
263element_new_extra(ElementObject* self, PyObject* attrib)
264{
265 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
266 if (!self->extra)
267 return -1;
268
269 if (!attrib)
270 attrib = Py_None;
271
272 Py_INCREF(attrib);
273 self->extra->attrib = attrib;
274
275 self->extra->length = 0;
276 self->extra->allocated = STATIC_CHILDREN;
277 self->extra->children = self->extra->_children;
278
279 return 0;
280}
281
282LOCAL(void)
283element_dealloc_extra(ElementObject* self)
284{
285 int i;
286
287 Py_DECREF(self->extra->attrib);
288
289 for (i = 0; i < self->extra->length; i++)
290 Py_DECREF(self->extra->children[i]);
291
292 if (self->extra->children != self->extra->_children)
293 PyObject_Free(self->extra->children);
294
295 PyObject_Free(self->extra);
296}
297
298LOCAL(PyObject*)
299element_new(PyObject* tag, PyObject* attrib)
300{
301 ElementObject* self;
302
303 self = PyObject_New(ElementObject, &Element_Type);
304 if (self == NULL)
305 return NULL;
306
307 /* use None for empty dictionaries */
308 if (PyDict_CheckExact(attrib) && !PyDict_Size(attrib))
309 attrib = Py_None;
310
311 self->extra = NULL;
312
313 if (attrib != Py_None) {
314
Neal Norwitzc6a989a2006-05-10 06:57:58 +0000315 if (element_new_extra(self, attrib) < 0) {
316 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000317 return NULL;
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000318 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000319
320 self->extra->length = 0;
321 self->extra->allocated = STATIC_CHILDREN;
322 self->extra->children = self->extra->_children;
323
324 }
325
326 Py_INCREF(tag);
327 self->tag = tag;
328
329 Py_INCREF(Py_None);
330 self->text = Py_None;
331
332 Py_INCREF(Py_None);
333 self->tail = Py_None;
334
335 ALLOC(sizeof(ElementObject), "create element");
336
337 return (PyObject*) self;
338}
339
340LOCAL(int)
Serhiy Storchakac4c64be2015-11-25 20:12:58 +0200341element_resize(ElementObject* self, Py_ssize_t extra)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000342{
Serhiy Storchakac4c64be2015-11-25 20:12:58 +0200343 Py_ssize_t size;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000344 PyObject* *children;
345
346 /* make sure self->children can hold the given number of extra
347 elements. set an exception and return -1 if allocation failed */
348
349 if (!self->extra)
350 element_new_extra(self, NULL);
351
352 size = self->extra->length + extra;
353
354 if (size > self->extra->allocated) {
355 /* use Python 2.4's list growth strategy */
356 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes87dcf3d2008-01-18 08:04:57 +0000357 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
358 * which needs at least 4 bytes.
359 * Although it's a false alarm always assume at least one child to
360 * be safe.
361 */
362 size = size ? size : 1;
Serhiy Storchakac4c64be2015-11-25 20:12:58 +0200363 if ((size_t)size > PY_SSIZE_T_MAX/sizeof(PyObject*))
364 goto nomemory;
365 if (size > INT_MAX) {
366 PyErr_SetString(PyExc_OverflowError,
367 "too many children");
368 return -1;
369 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000370 if (self->extra->children != self->extra->_children) {
Christian Heimes87dcf3d2008-01-18 08:04:57 +0000371 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
372 * "children", which needs at least 4 bytes. Although it's a
373 * false alarm always assume at least one child to be safe.
374 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000375 children = PyObject_Realloc(self->extra->children,
376 size * sizeof(PyObject*));
377 if (!children)
378 goto nomemory;
379 } else {
380 children = PyObject_Malloc(size * sizeof(PyObject*));
381 if (!children)
382 goto nomemory;
383 /* copy existing children from static area to malloc buffer */
384 memcpy(children, self->extra->children,
385 self->extra->length * sizeof(PyObject*));
386 }
387 self->extra->children = children;
388 self->extra->allocated = size;
389 }
390
391 return 0;
392
393 nomemory:
394 PyErr_NoMemory();
395 return -1;
396}
397
398LOCAL(int)
399element_add_subelement(ElementObject* self, PyObject* element)
400{
401 /* add a child element to a parent */
402
403 if (element_resize(self, 1) < 0)
404 return -1;
405
406 Py_INCREF(element);
407 self->extra->children[self->extra->length] = element;
408
409 self->extra->length++;
410
411 return 0;
412}
413
414LOCAL(PyObject*)
415element_get_attrib(ElementObject* self)
416{
417 /* return borrowed reference to attrib dictionary */
418 /* note: this function assumes that the extra section exists */
419
420 PyObject* res = self->extra->attrib;
421
422 if (res == Py_None) {
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000423 Py_DECREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000424 /* create missing dictionary */
425 res = PyDict_New();
426 if (!res)
427 return NULL;
428 self->extra->attrib = res;
429 }
430
431 return res;
432}
433
434LOCAL(PyObject*)
435element_get_text(ElementObject* self)
436{
437 /* return borrowed reference to text attribute */
438
Serhiy Storchaka9c2c42c2017-04-02 20:37:03 +0300439 PyObject *res = self->text;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000440
441 if (JOIN_GET(res)) {
442 res = JOIN_OBJ(res);
443 if (PyList_CheckExact(res)) {
Serhiy Storchaka9c2c42c2017-04-02 20:37:03 +0300444 PyObject *tmp = list_join(res);
445 if (!tmp)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000446 return NULL;
Serhiy Storchaka9c2c42c2017-04-02 20:37:03 +0300447 self->text = tmp;
448 Py_DECREF(res);
449 res = tmp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000450 }
451 }
452
453 return res;
454}
455
456LOCAL(PyObject*)
457element_get_tail(ElementObject* self)
458{
459 /* return borrowed reference to text attribute */
460
Serhiy Storchaka9c2c42c2017-04-02 20:37:03 +0300461 PyObject *res = self->tail;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000462
463 if (JOIN_GET(res)) {
464 res = JOIN_OBJ(res);
465 if (PyList_CheckExact(res)) {
Serhiy Storchaka9c2c42c2017-04-02 20:37:03 +0300466 PyObject *tmp = list_join(res);
467 if (!tmp)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000468 return NULL;
Serhiy Storchaka9c2c42c2017-04-02 20:37:03 +0300469 self->tail = tmp;
470 Py_DECREF(res);
471 res = tmp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000472 }
473 }
474
475 return res;
476}
477
478static PyObject*
479element(PyObject* self, PyObject* args, PyObject* kw)
480{
481 PyObject* elem;
482
483 PyObject* tag;
484 PyObject* attrib = NULL;
485 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag,
486 &PyDict_Type, &attrib))
487 return NULL;
488
489 if (attrib || kw) {
490 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
491 if (!attrib)
492 return NULL;
493 if (kw)
494 PyDict_Update(attrib, kw);
495 } else {
496 Py_INCREF(Py_None);
497 attrib = Py_None;
498 }
499
500 elem = element_new(tag, attrib);
501
502 Py_DECREF(attrib);
503
504 return elem;
505}
506
507static PyObject*
508subelement(PyObject* self, PyObject* args, PyObject* kw)
509{
510 PyObject* elem;
511
512 ElementObject* parent;
513 PyObject* tag;
514 PyObject* attrib = NULL;
515 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
516 &Element_Type, &parent, &tag,
517 &PyDict_Type, &attrib))
518 return NULL;
519
520 if (attrib || kw) {
521 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
522 if (!attrib)
523 return NULL;
524 if (kw)
525 PyDict_Update(attrib, kw);
526 } else {
527 Py_INCREF(Py_None);
528 attrib = Py_None;
529 }
530
531 elem = element_new(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000532 Py_DECREF(attrib);
Xiang Zhang9c0408d2017-03-22 14:32:52 +0800533 if (elem == NULL)
534 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000535
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000536 if (element_add_subelement(parent, elem) < 0) {
537 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000538 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000539 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000540
541 return elem;
542}
543
544static void
545element_dealloc(ElementObject* self)
546{
Serhiy Storchaka14518742016-12-28 09:23:17 +0200547 if (self->extra)
548 element_dealloc_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000549
550 /* discard attributes */
551 Py_DECREF(self->tag);
Serhiy Storchaka14518742016-12-28 09:23:17 +0200552 Py_DECREF(JOIN_OBJ(self->text));
553 Py_DECREF(JOIN_OBJ(self->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000554
555 RELEASE(sizeof(ElementObject), "destroy element");
556
557 PyObject_Del(self);
558}
559
560/* -------------------------------------------------------------------- */
561/* methods (in alphabetical order) */
562
563static PyObject*
564element_append(ElementObject* self, PyObject* args)
565{
566 PyObject* element;
567 if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element))
568 return NULL;
569
570 if (element_add_subelement(self, element) < 0)
571 return NULL;
572
573 Py_RETURN_NONE;
574}
575
576static PyObject*
577element_clear(ElementObject* self, PyObject* args)
578{
579 if (!PyArg_ParseTuple(args, ":clear"))
580 return NULL;
581
582 if (self->extra) {
583 element_dealloc_extra(self);
584 self->extra = NULL;
585 }
586
587 Py_INCREF(Py_None);
588 Py_DECREF(JOIN_OBJ(self->text));
589 self->text = Py_None;
590
591 Py_INCREF(Py_None);
592 Py_DECREF(JOIN_OBJ(self->tail));
593 self->tail = Py_None;
594
595 Py_RETURN_NONE;
596}
597
598static PyObject*
599element_copy(ElementObject* self, PyObject* args)
600{
601 int i;
602 ElementObject* element;
603
604 if (!PyArg_ParseTuple(args, ":__copy__"))
605 return NULL;
606
607 element = (ElementObject*) element_new(
608 self->tag, (self->extra) ? self->extra->attrib : Py_None
609 );
610 if (!element)
611 return NULL;
612
613 Py_DECREF(JOIN_OBJ(element->text));
614 element->text = self->text;
615 Py_INCREF(JOIN_OBJ(element->text));
616
617 Py_DECREF(JOIN_OBJ(element->tail));
618 element->tail = self->tail;
619 Py_INCREF(JOIN_OBJ(element->tail));
620
621 if (self->extra) {
622
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000623 if (element_resize(element, self->extra->length) < 0) {
624 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000625 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000626 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000627
628 for (i = 0; i < self->extra->length; i++) {
629 Py_INCREF(self->extra->children[i]);
630 element->extra->children[i] = self->extra->children[i];
631 }
632
633 element->extra->length = self->extra->length;
634
635 }
636
637 return (PyObject*) element;
638}
639
640static PyObject*
641element_deepcopy(ElementObject* self, PyObject* args)
642{
643 int i;
644 ElementObject* element;
645 PyObject* tag;
646 PyObject* attrib;
647 PyObject* text;
648 PyObject* tail;
649 PyObject* id;
650
651 PyObject* memo;
652 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
653 return NULL;
654
655 tag = deepcopy(self->tag, memo);
656 if (!tag)
657 return NULL;
658
659 if (self->extra) {
660 attrib = deepcopy(self->extra->attrib, memo);
661 if (!attrib) {
662 Py_DECREF(tag);
663 return NULL;
664 }
665 } else {
666 Py_INCREF(Py_None);
667 attrib = Py_None;
668 }
669
670 element = (ElementObject*) element_new(tag, attrib);
671
672 Py_DECREF(tag);
673 Py_DECREF(attrib);
674
675 if (!element)
676 return NULL;
677
678 text = deepcopy(JOIN_OBJ(self->text), memo);
679 if (!text)
680 goto error;
681 Py_DECREF(element->text);
682 element->text = JOIN_SET(text, JOIN_GET(self->text));
683
684 tail = deepcopy(JOIN_OBJ(self->tail), memo);
685 if (!tail)
686 goto error;
687 Py_DECREF(element->tail);
688 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
689
690 if (self->extra) {
691
692 if (element_resize(element, self->extra->length) < 0)
693 goto error;
694
695 for (i = 0; i < self->extra->length; i++) {
696 PyObject* child = deepcopy(self->extra->children[i], memo);
697 if (!child) {
698 element->extra->length = i;
699 goto error;
700 }
701 element->extra->children[i] = child;
702 }
703
704 element->extra->length = self->extra->length;
705
706 }
707
708 /* add object to memo dictionary (so deepcopy won't visit it again) */
709 id = PyInt_FromLong((Py_uintptr_t) self);
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000710 if (!id)
711 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000712
713 i = PyDict_SetItem(memo, id, (PyObject*) element);
714
715 Py_DECREF(id);
716
717 if (i < 0)
718 goto error;
719
720 return (PyObject*) element;
721
722 error:
723 Py_DECREF(element);
724 return NULL;
725}
726
727LOCAL(int)
728checkpath(PyObject* tag)
729{
Neal Norwitzc7074382006-06-12 02:06:17 +0000730 Py_ssize_t i;
731 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000732
733 /* check if a tag contains an xpath character */
734
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000735#define PATHCHAR(ch) \
736 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000737
738#if defined(Py_USING_UNICODE)
739 if (PyUnicode_Check(tag)) {
740 Py_UNICODE *p = PyUnicode_AS_UNICODE(tag);
741 for (i = 0; i < PyUnicode_GET_SIZE(tag); i++) {
742 if (p[i] == '{')
743 check = 0;
744 else if (p[i] == '}')
745 check = 1;
746 else if (check && PATHCHAR(p[i]))
747 return 1;
748 }
749 return 0;
750 }
751#endif
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000752 if (PyString_Check(tag)) {
753 char *p = PyString_AS_STRING(tag);
754 for (i = 0; i < PyString_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000755 if (p[i] == '{')
756 check = 0;
757 else if (p[i] == '}')
758 check = 1;
759 else if (check && PATHCHAR(p[i]))
760 return 1;
761 }
762 return 0;
763 }
764
765 return 1; /* unknown type; might be path expression */
766}
767
768static PyObject*
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000769element_extend(ElementObject* self, PyObject* args)
770{
771 PyObject* seq;
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300772 Py_ssize_t i;
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000773
774 PyObject* seq_in;
775 if (!PyArg_ParseTuple(args, "O:extend", &seq_in))
776 return NULL;
777
778 seq = PySequence_Fast(seq_in, "");
779 if (!seq) {
780 PyErr_Format(
781 PyExc_TypeError,
782 "expected sequence, not \"%.200s\"", Py_TYPE(seq_in)->tp_name
783 );
784 return NULL;
785 }
786
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300787 for (i = 0; i < PySequence_Fast_GET_SIZE(seq); i++) {
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000788 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
789 if (element_add_subelement(self, element) < 0) {
790 Py_DECREF(seq);
791 return NULL;
792 }
793 }
794
795 Py_DECREF(seq);
796
797 Py_RETURN_NONE;
798}
799
800static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000801element_find(ElementObject* self, PyObject* args)
802{
803 int i;
804
805 PyObject* tag;
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000806 PyObject* namespaces = Py_None;
807 if (!PyArg_ParseTuple(args, "O|O:find", &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000808 return NULL;
809
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000810 if (checkpath(tag) || namespaces != Py_None)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000811 return PyObject_CallMethod(
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000812 elementpath_obj, "find", "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000813 );
814
815 if (!self->extra)
816 Py_RETURN_NONE;
817
818 for (i = 0; i < self->extra->length; i++) {
819 PyObject* item = self->extra->children[i];
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300820 int rc;
821 if (!Element_CheckExact(item))
822 continue;
823 Py_INCREF(item);
824 rc = PyObject_Compare(((ElementObject*)item)->tag, tag);
825 if (rc == 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000826 return item;
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300827 Py_DECREF(item);
828 if (rc < 0 && PyErr_Occurred())
829 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000830 }
831
832 Py_RETURN_NONE;
833}
834
835static PyObject*
836element_findtext(ElementObject* self, PyObject* args)
837{
838 int i;
839
840 PyObject* tag;
841 PyObject* default_value = Py_None;
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000842 PyObject* namespaces = Py_None;
843 if (!PyArg_ParseTuple(args, "O|OO:findtext", &tag, &default_value, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000844 return NULL;
845
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000846 if (checkpath(tag) || namespaces != Py_None)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000847 return PyObject_CallMethod(
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000848 elementpath_obj, "findtext", "OOOO", self, tag, default_value, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000849 );
850
851 if (!self->extra) {
852 Py_INCREF(default_value);
853 return default_value;
854 }
855
856 for (i = 0; i < self->extra->length; i++) {
857 ElementObject* item = (ElementObject*) self->extra->children[i];
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300858 int rc;
859 if (!Element_CheckExact(item))
860 continue;
861 Py_INCREF(item);
862 rc = PyObject_Compare(item->tag, tag);
863 if (rc == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000864 PyObject* text = element_get_text(item);
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300865 if (text == Py_None) {
866 Py_DECREF(item);
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000867 return PyString_FromString("");
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300868 }
Neal Norwitz6f5ff3f2006-08-12 01:43:40 +0000869 Py_XINCREF(text);
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300870 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000871 return text;
872 }
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300873 Py_DECREF(item);
874 if (rc < 0 && PyErr_Occurred())
875 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000876 }
877
878 Py_INCREF(default_value);
879 return default_value;
880}
881
882static PyObject*
883element_findall(ElementObject* self, PyObject* args)
884{
885 int i;
886 PyObject* out;
887
888 PyObject* tag;
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000889 PyObject* namespaces = Py_None;
890 if (!PyArg_ParseTuple(args, "O|O:findall", &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000891 return NULL;
892
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000893 if (checkpath(tag) || namespaces != Py_None)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000894 return PyObject_CallMethod(
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000895 elementpath_obj, "findall", "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000896 );
897
898 out = PyList_New(0);
899 if (!out)
900 return NULL;
901
902 if (!self->extra)
903 return out;
904
905 for (i = 0; i < self->extra->length; i++) {
906 PyObject* item = self->extra->children[i];
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300907 int rc;
908 if (!Element_CheckExact(item))
909 continue;
910 Py_INCREF(item);
911 rc = PyObject_Compare(((ElementObject*)item)->tag, tag);
912 if (rc == 0)
913 rc = PyList_Append(out, item);
914 Py_DECREF(item);
915 if (rc < 0 && PyErr_Occurred()) {
916 Py_DECREF(out);
917 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000918 }
919 }
920
921 return out;
922}
923
924static PyObject*
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000925element_iterfind(ElementObject* self, PyObject* args)
926{
927 PyObject* tag;
928 PyObject* namespaces = Py_None;
929 if (!PyArg_ParseTuple(args, "O|O:iterfind", &tag, &namespaces))
930 return NULL;
931
932 return PyObject_CallMethod(
933 elementpath_obj, "iterfind", "OOO", self, tag, namespaces
934 );
935}
936
937static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000938element_get(ElementObject* self, PyObject* args)
939{
940 PyObject* value;
941
942 PyObject* key;
943 PyObject* default_value = Py_None;
944 if (!PyArg_ParseTuple(args, "O|O:get", &key, &default_value))
945 return NULL;
946
947 if (!self->extra || self->extra->attrib == Py_None)
948 value = default_value;
949 else {
950 value = PyDict_GetItem(self->extra->attrib, key);
951 if (!value)
952 value = default_value;
953 }
954
955 Py_INCREF(value);
956 return value;
957}
958
959static PyObject*
960element_getchildren(ElementObject* self, PyObject* args)
961{
962 int i;
963 PyObject* list;
964
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000965 /* FIXME: report as deprecated? */
966
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000967 if (!PyArg_ParseTuple(args, ":getchildren"))
968 return NULL;
969
970 if (!self->extra)
971 return PyList_New(0);
972
973 list = PyList_New(self->extra->length);
974 if (!list)
975 return NULL;
976
977 for (i = 0; i < self->extra->length; i++) {
978 PyObject* item = self->extra->children[i];
979 Py_INCREF(item);
980 PyList_SET_ITEM(list, i, item);
981 }
982
983 return list;
984}
985
986static PyObject*
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000987element_iter(ElementObject* self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000988{
989 PyObject* result;
990
991 PyObject* tag = Py_None;
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000992 if (!PyArg_ParseTuple(args, "|O:iter", &tag))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000993 return NULL;
994
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000995 if (!elementtree_iter_obj) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000996 PyErr_SetString(
997 PyExc_RuntimeError,
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000998 "iter helper not found"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000999 );
1000 return NULL;
1001 }
1002
1003 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001004 if (!args)
1005 return NULL;
Neal Norwitz02876df2006-02-07 06:58:52 +00001006
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001007 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
1008 Py_INCREF(tag); PyTuple_SET_ITEM(args, 1, (PyObject*) tag);
1009
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001010 result = PyObject_CallObject(elementtree_iter_obj, args);
1011
1012 Py_DECREF(args);
1013
1014 return result;
1015}
1016
1017
1018static PyObject*
1019element_itertext(ElementObject* self, PyObject* args)
1020{
1021 PyObject* result;
1022
1023 if (!PyArg_ParseTuple(args, ":itertext"))
1024 return NULL;
1025
1026 if (!elementtree_itertext_obj) {
1027 PyErr_SetString(
1028 PyExc_RuntimeError,
1029 "itertext helper not found"
1030 );
1031 return NULL;
1032 }
1033
1034 args = PyTuple_New(1);
1035 if (!args)
1036 return NULL;
1037
1038 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
1039
1040 result = PyObject_CallObject(elementtree_itertext_obj, args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001041
1042 Py_DECREF(args);
1043
1044 return result;
1045}
1046
1047static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001048element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001049{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001050 ElementObject* self = (ElementObject*) self_;
1051
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001052 if (!self->extra || index < 0 || index >= self->extra->length) {
1053 PyErr_SetString(
1054 PyExc_IndexError,
1055 "child index out of range"
1056 );
1057 return NULL;
1058 }
1059
1060 Py_INCREF(self->extra->children[index]);
1061 return self->extra->children[index];
1062}
1063
1064static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001065element_insert(ElementObject* self, PyObject* args)
1066{
1067 int i;
1068
1069 int index;
1070 PyObject* element;
1071 if (!PyArg_ParseTuple(args, "iO!:insert", &index,
1072 &Element_Type, &element))
1073 return NULL;
1074
1075 if (!self->extra)
1076 element_new_extra(self, NULL);
1077
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001078 if (index < 0) {
1079 index += self->extra->length;
1080 if (index < 0)
1081 index = 0;
1082 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001083 if (index > self->extra->length)
1084 index = self->extra->length;
1085
1086 if (element_resize(self, 1) < 0)
1087 return NULL;
1088
1089 for (i = self->extra->length; i > index; i--)
1090 self->extra->children[i] = self->extra->children[i-1];
1091
1092 Py_INCREF(element);
1093 self->extra->children[index] = element;
1094
1095 self->extra->length++;
1096
1097 Py_RETURN_NONE;
1098}
1099
1100static PyObject*
1101element_items(ElementObject* self, PyObject* args)
1102{
1103 if (!PyArg_ParseTuple(args, ":items"))
1104 return NULL;
1105
1106 if (!self->extra || self->extra->attrib == Py_None)
1107 return PyList_New(0);
1108
1109 return PyDict_Items(self->extra->attrib);
1110}
1111
1112static PyObject*
1113element_keys(ElementObject* self, PyObject* args)
1114{
1115 if (!PyArg_ParseTuple(args, ":keys"))
1116 return NULL;
1117
1118 if (!self->extra || self->extra->attrib == Py_None)
1119 return PyList_New(0);
1120
1121 return PyDict_Keys(self->extra->attrib);
1122}
1123
Martin v. Löwis18e16552006-02-15 17:27:45 +00001124static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001125element_length(ElementObject* self)
1126{
1127 if (!self->extra)
1128 return 0;
1129
1130 return self->extra->length;
1131}
1132
1133static PyObject*
1134element_makeelement(PyObject* self, PyObject* args, PyObject* kw)
1135{
1136 PyObject* elem;
1137
1138 PyObject* tag;
1139 PyObject* attrib;
1140 if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib))
1141 return NULL;
1142
1143 attrib = PyDict_Copy(attrib);
1144 if (!attrib)
1145 return NULL;
1146
1147 elem = element_new(tag, attrib);
1148
1149 Py_DECREF(attrib);
1150
1151 return elem;
1152}
1153
1154static PyObject*
1155element_reduce(ElementObject* self, PyObject* args)
1156{
1157 if (!PyArg_ParseTuple(args, ":__reduce__"))
1158 return NULL;
1159
1160 /* Hack alert: This method is used to work around a __copy__
1161 problem on certain 2.3 and 2.4 versions. To save time and
1162 simplify the code, we create the copy in here, and use a dummy
1163 copyelement helper to trick the copy module into doing the
1164 right thing. */
1165
1166 if (!elementtree_copyelement_obj) {
1167 PyErr_SetString(
1168 PyExc_RuntimeError,
1169 "copyelement helper not found"
1170 );
1171 return NULL;
1172 }
1173
1174 return Py_BuildValue(
1175 "O(N)", elementtree_copyelement_obj, element_copy(self, args)
1176 );
1177}
1178
1179static PyObject*
1180element_remove(ElementObject* self, PyObject* args)
1181{
1182 int i;
Serhiy Storchaka25598f32015-05-18 18:28:57 +03001183 int rc;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001184 PyObject* element;
Serhiy Storchaka25598f32015-05-18 18:28:57 +03001185 PyObject* found;
1186
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001187 if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element))
1188 return NULL;
1189
1190 if (!self->extra) {
1191 /* element has no children, so raise exception */
1192 PyErr_SetString(
1193 PyExc_ValueError,
1194 "list.remove(x): x not in list"
1195 );
1196 return NULL;
1197 }
1198
1199 for (i = 0; i < self->extra->length; i++) {
1200 if (self->extra->children[i] == element)
1201 break;
Serhiy Storchaka25598f32015-05-18 18:28:57 +03001202 rc = PyObject_Compare(self->extra->children[i], element);
1203 if (rc == 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001204 break;
Serhiy Storchaka25598f32015-05-18 18:28:57 +03001205 if (rc < 0 && PyErr_Occurred())
1206 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001207 }
1208
Serhiy Storchaka25598f32015-05-18 18:28:57 +03001209 if (i >= self->extra->length) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001210 /* element is not in children, so raise exception */
1211 PyErr_SetString(
1212 PyExc_ValueError,
1213 "list.remove(x): x not in list"
1214 );
1215 return NULL;
1216 }
1217
Serhiy Storchaka25598f32015-05-18 18:28:57 +03001218 found = self->extra->children[i];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001219
1220 self->extra->length--;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001221 for (; i < self->extra->length; i++)
1222 self->extra->children[i] = self->extra->children[i+1];
1223
Serhiy Storchaka25598f32015-05-18 18:28:57 +03001224 Py_DECREF(found);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001225 Py_RETURN_NONE;
1226}
1227
1228static PyObject*
1229element_repr(ElementObject* self)
1230{
Serhiy Storchaka1f7586e2016-06-12 10:06:32 +03001231 int status;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001232
Serhiy Storchaka1f7586e2016-06-12 10:06:32 +03001233 if (self->tag == NULL)
1234 return PyUnicode_FromFormat("<Element at %p>", self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001235
Serhiy Storchaka1f7586e2016-06-12 10:06:32 +03001236 status = Py_ReprEnter((PyObject *)self);
1237 if (status == 0) {
1238 PyObject *repr, *tag;
1239 tag = PyObject_Repr(self->tag);
1240 if (!tag)
1241 return NULL;
Florent Xiclunae2e81e82010-03-11 15:55:11 +00001242
Serhiy Storchaka1f7586e2016-06-12 10:06:32 +03001243 repr = PyString_FromFormat("<Element %s at %p>",
1244 PyString_AS_STRING(tag), self);
Benjamin Petersond7324bc2016-12-03 11:30:04 -08001245 Py_ReprLeave((PyObject *)self);
Serhiy Storchaka1f7586e2016-06-12 10:06:32 +03001246 Py_DECREF(tag);
1247 return repr;
1248 }
1249 if (status > 0)
1250 PyErr_Format(PyExc_RuntimeError,
1251 "reentrant call inside %s.__repr__",
1252 Py_TYPE(self)->tp_name);
1253 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001254}
1255
1256static PyObject*
1257element_set(ElementObject* self, PyObject* args)
1258{
1259 PyObject* attrib;
1260
1261 PyObject* key;
1262 PyObject* value;
1263 if (!PyArg_ParseTuple(args, "OO:set", &key, &value))
1264 return NULL;
1265
1266 if (!self->extra)
1267 element_new_extra(self, NULL);
1268
1269 attrib = element_get_attrib(self);
1270 if (!attrib)
1271 return NULL;
1272
1273 if (PyDict_SetItem(attrib, key, value) < 0)
1274 return NULL;
1275
1276 Py_RETURN_NONE;
1277}
1278
1279static int
Serhiy Storchakab5b76c32015-11-26 11:21:47 +02001280element_setitem(PyObject* self_, Py_ssize_t index_, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001281{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001282 ElementObject* self = (ElementObject*) self_;
Serhiy Storchakac4c64be2015-11-25 20:12:58 +02001283 int i, index;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001284 PyObject* old;
1285
Serhiy Storchakac4c64be2015-11-25 20:12:58 +02001286 if (!self->extra || index_ < 0 || index_ >= self->extra->length) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001287 PyErr_SetString(
1288 PyExc_IndexError,
1289 "child assignment index out of range");
1290 return -1;
1291 }
Serhiy Storchakac4c64be2015-11-25 20:12:58 +02001292 index = (int)index_;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001293
1294 old = self->extra->children[index];
1295
1296 if (item) {
1297 Py_INCREF(item);
1298 self->extra->children[index] = item;
1299 } else {
1300 self->extra->length--;
1301 for (i = index; i < self->extra->length; i++)
1302 self->extra->children[i] = self->extra->children[i+1];
1303 }
1304
1305 Py_DECREF(old);
1306
1307 return 0;
1308}
1309
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001310static PyObject*
1311element_subscr(PyObject* self_, PyObject* item)
1312{
1313 ElementObject* self = (ElementObject*) self_;
1314
1315#if (PY_VERSION_HEX < 0x02050000)
1316 if (PyInt_Check(item) || PyLong_Check(item)) {
1317 long i = PyInt_AsLong(item);
1318#else
1319 if (PyIndex_Check(item)) {
1320 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1321#endif
1322
1323 if (i == -1 && PyErr_Occurred()) {
1324 return NULL;
1325 }
1326 if (i < 0 && self->extra)
1327 i += self->extra->length;
1328 return element_getitem(self_, i);
1329 }
1330 else if (PySlice_Check(item)) {
1331 Py_ssize_t start, stop, step, slicelen, cur, i;
1332 PyObject* list;
1333
1334 if (!self->extra)
1335 return PyList_New(0);
1336
Serhiy Storchaka5e793212017-04-15 20:11:12 +03001337 if (_PySlice_Unpack(item, &start, &stop, &step) < 0) {
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001338 return NULL;
1339 }
Serhiy Storchakae41390a2017-04-08 11:48:57 +03001340 slicelen = _PySlice_AdjustIndices(self->extra->length, &start, &stop,
1341 step);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001342
1343 if (slicelen <= 0)
1344 return PyList_New(0);
1345 else {
1346 list = PyList_New(slicelen);
1347 if (!list)
1348 return NULL;
1349
1350 for (cur = start, i = 0; i < slicelen;
1351 cur += step, i++) {
1352 PyObject* item = self->extra->children[cur];
1353 Py_INCREF(item);
1354 PyList_SET_ITEM(list, i, item);
1355 }
1356
1357 return list;
1358 }
1359 }
1360 else {
1361 PyErr_SetString(PyExc_TypeError,
1362 "element indices must be integers");
1363 return NULL;
1364 }
1365}
1366
1367static int
1368element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1369{
1370 ElementObject* self = (ElementObject*) self_;
1371
1372#if (PY_VERSION_HEX < 0x02050000)
1373 if (PyInt_Check(item) || PyLong_Check(item)) {
1374 long i = PyInt_AsLong(item);
1375#else
1376 if (PyIndex_Check(item)) {
1377 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1378#endif
1379
1380 if (i == -1 && PyErr_Occurred()) {
1381 return -1;
1382 }
1383 if (i < 0 && self->extra)
1384 i += self->extra->length;
1385 return element_setitem(self_, i, value);
1386 }
1387 else if (PySlice_Check(item)) {
1388 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1389
1390 PyObject* recycle = NULL;
1391 PyObject* seq = NULL;
1392
1393 if (!self->extra)
1394 element_new_extra(self, NULL);
1395
Serhiy Storchaka5e793212017-04-15 20:11:12 +03001396 if (_PySlice_Unpack(item, &start, &stop, &step) < 0) {
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001397 return -1;
1398 }
Serhiy Storchakae41390a2017-04-08 11:48:57 +03001399 slicelen = _PySlice_AdjustIndices(self->extra->length, &start, &stop,
1400 step);
Serhiy Storchakac4c64be2015-11-25 20:12:58 +02001401 assert(slicelen <= self->extra->length);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001402
1403 if (value == NULL)
1404 newlen = 0;
1405 else {
1406 seq = PySequence_Fast(value, "");
1407 if (!seq) {
1408 PyErr_Format(
1409 PyExc_TypeError,
1410 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1411 );
1412 return -1;
1413 }
1414 newlen = PySequence_Size(seq);
1415 }
1416
1417 if (step != 1 && newlen != slicelen)
1418 {
Serhiy Storchakaa0ae9ff2015-11-22 12:31:11 +02001419 Py_XDECREF(seq);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001420 PyErr_Format(PyExc_ValueError,
1421#if (PY_VERSION_HEX < 0x02050000)
1422 "attempt to assign sequence of size %d "
1423 "to extended slice of size %d",
Serhiy Storchakaa0ae9ff2015-11-22 12:31:11 +02001424 (int)newlen, (int)slicelen
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001425#else
1426 "attempt to assign sequence of size %zd "
1427 "to extended slice of size %zd",
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001428 newlen, slicelen
Serhiy Storchakaa0ae9ff2015-11-22 12:31:11 +02001429#endif
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001430 );
1431 return -1;
1432 }
1433
1434
1435 /* Resize before creating the recycle bin, to prevent refleaks. */
1436 if (newlen > slicelen) {
1437 if (element_resize(self, newlen - slicelen) < 0) {
Serhiy Storchakaa0ae9ff2015-11-22 12:31:11 +02001438 Py_XDECREF(seq);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001439 return -1;
1440 }
1441 }
Serhiy Storchakac4c64be2015-11-25 20:12:58 +02001442 assert(newlen - slicelen <= INT_MAX - self->extra->length);
1443 assert(newlen - slicelen >= -self->extra->length);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001444
1445 if (slicelen > 0) {
1446 /* to avoid recursive calls to this method (via decref), move
1447 old items to the recycle bin here, and get rid of them when
1448 we're done modifying the element */
1449 recycle = PyList_New(slicelen);
1450 if (!recycle) {
Serhiy Storchakaa0ae9ff2015-11-22 12:31:11 +02001451 Py_XDECREF(seq);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001452 return -1;
1453 }
1454 for (cur = start, i = 0; i < slicelen;
1455 cur += step, i++)
1456 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1457 }
1458
1459 if (newlen < slicelen) {
1460 /* delete slice */
1461 for (i = stop; i < self->extra->length; i++)
1462 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1463 } else if (newlen > slicelen) {
1464 /* insert slice */
1465 for (i = self->extra->length-1; i >= stop; i--)
1466 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1467 }
1468
1469 /* replace the slice */
1470 for (cur = start, i = 0; i < newlen;
1471 cur += step, i++) {
1472 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1473 Py_INCREF(element);
1474 self->extra->children[cur] = element;
1475 }
1476
Serhiy Storchakac4c64be2015-11-25 20:12:58 +02001477 self->extra->length += (int)(newlen - slicelen);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001478
Serhiy Storchakaa0ae9ff2015-11-22 12:31:11 +02001479 Py_XDECREF(seq);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001480
1481 /* discard the recycle bin, and everything in it */
1482 Py_XDECREF(recycle);
1483
1484 return 0;
1485 }
1486 else {
1487 PyErr_SetString(PyExc_TypeError,
1488 "element indices must be integers");
1489 return -1;
1490 }
1491}
1492
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001493static PyMethodDef element_methods[] = {
1494
1495 {"clear", (PyCFunction) element_clear, METH_VARARGS},
1496
1497 {"get", (PyCFunction) element_get, METH_VARARGS},
1498 {"set", (PyCFunction) element_set, METH_VARARGS},
1499
1500 {"find", (PyCFunction) element_find, METH_VARARGS},
1501 {"findtext", (PyCFunction) element_findtext, METH_VARARGS},
1502 {"findall", (PyCFunction) element_findall, METH_VARARGS},
1503
1504 {"append", (PyCFunction) element_append, METH_VARARGS},
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001505 {"extend", (PyCFunction) element_extend, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001506 {"insert", (PyCFunction) element_insert, METH_VARARGS},
1507 {"remove", (PyCFunction) element_remove, METH_VARARGS},
1508
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001509 {"iter", (PyCFunction) element_iter, METH_VARARGS},
1510 {"itertext", (PyCFunction) element_itertext, METH_VARARGS},
1511 {"iterfind", (PyCFunction) element_iterfind, METH_VARARGS},
1512
1513 {"getiterator", (PyCFunction) element_iter, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001514 {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS},
1515
1516 {"items", (PyCFunction) element_items, METH_VARARGS},
1517 {"keys", (PyCFunction) element_keys, METH_VARARGS},
1518
1519 {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS},
1520
1521 {"__copy__", (PyCFunction) element_copy, METH_VARARGS},
1522 {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS},
1523
1524 /* Some 2.3 and 2.4 versions do not handle the __copy__ method on
1525 C objects correctly, so we have to fake it using a __reduce__-
1526 based hack (see the element_reduce implementation above for
1527 details). */
1528
1529 /* The behaviour has been changed in 2.3.5 and 2.4.1, so we're
1530 using a runtime test to figure out if we need to fake things
1531 or now (see the init code below). The following entry is
1532 enabled only if the hack is needed. */
1533
1534 {"!__reduce__", (PyCFunction) element_reduce, METH_VARARGS},
1535
1536 {NULL, NULL}
1537};
1538
1539static PyObject*
1540element_getattr(ElementObject* self, char* name)
1541{
1542 PyObject* res;
1543
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001544 /* handle common attributes first */
1545 if (strcmp(name, "tag") == 0) {
1546 res = self->tag;
1547 Py_INCREF(res);
1548 return res;
1549 } else if (strcmp(name, "text") == 0) {
1550 res = element_get_text(self);
Xiang Zhang827c7832017-03-22 12:25:51 +08001551 Py_XINCREF(res);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001552 return res;
1553 }
1554
1555 /* methods */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001556 res = Py_FindMethod(element_methods, (PyObject*) self, name);
1557 if (res)
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001558 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001559
1560 PyErr_Clear();
1561
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001562 /* less common attributes */
1563 if (strcmp(name, "tail") == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001564 res = element_get_tail(self);
1565 } else if (strcmp(name, "attrib") == 0) {
1566 if (!self->extra)
1567 element_new_extra(self, NULL);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001568 res = element_get_attrib(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001569 } else {
1570 PyErr_SetString(PyExc_AttributeError, name);
1571 return NULL;
1572 }
1573
1574 if (!res)
1575 return NULL;
1576
1577 Py_INCREF(res);
1578 return res;
1579}
1580
1581static int
1582element_setattr(ElementObject* self, const char* name, PyObject* value)
1583{
1584 if (value == NULL) {
1585 PyErr_SetString(
1586 PyExc_AttributeError,
1587 "can't delete element attributes"
1588 );
1589 return -1;
1590 }
1591
1592 if (strcmp(name, "tag") == 0) {
Serhiy Storchaka2e6c8292015-12-27 15:41:58 +02001593 Py_INCREF(value);
Serhiy Storchaka763a61c2016-04-10 18:05:12 +03001594 Py_SETREF(self->tag, value);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001595 } else if (strcmp(name, "text") == 0) {
1596 Py_DECREF(JOIN_OBJ(self->text));
1597 self->text = value;
1598 Py_INCREF(self->text);
1599 } else if (strcmp(name, "tail") == 0) {
1600 Py_DECREF(JOIN_OBJ(self->tail));
1601 self->tail = value;
1602 Py_INCREF(self->tail);
1603 } else if (strcmp(name, "attrib") == 0) {
1604 if (!self->extra)
1605 element_new_extra(self, NULL);
Serhiy Storchaka2e6c8292015-12-27 15:41:58 +02001606 Py_INCREF(value);
Serhiy Storchaka763a61c2016-04-10 18:05:12 +03001607 Py_SETREF(self->extra->attrib, value);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001608 } else {
1609 PyErr_SetString(PyExc_AttributeError, name);
1610 return -1;
1611 }
1612
1613 return 0;
1614}
1615
1616static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001617 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001618 0, /* sq_concat */
1619 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001620 element_getitem,
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001621 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001622 element_setitem,
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001623 0,
1624};
1625
1626static PyMappingMethods element_as_mapping = {
1627 (lenfunc) element_length,
1628 (binaryfunc) element_subscr,
1629 (objobjargproc) element_ass_subscr,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001630};
1631
1632statichere PyTypeObject Element_Type = {
1633 PyObject_HEAD_INIT(NULL)
1634 0, "Element", sizeof(ElementObject), 0,
1635 /* methods */
1636 (destructor)element_dealloc, /* tp_dealloc */
1637 0, /* tp_print */
1638 (getattrfunc)element_getattr, /* tp_getattr */
1639 (setattrfunc)element_setattr, /* tp_setattr */
1640 0, /* tp_compare */
1641 (reprfunc)element_repr, /* tp_repr */
1642 0, /* tp_as_number */
1643 &element_as_sequence, /* tp_as_sequence */
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001644 &element_as_mapping, /* tp_as_mapping */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001645};
1646
1647/* ==================================================================== */
1648/* the tree builder type */
1649
1650typedef struct {
1651 PyObject_HEAD
1652
1653 PyObject* root; /* root node (first created node) */
1654
1655 ElementObject* this; /* current node */
1656 ElementObject* last; /* most recently created node */
1657
1658 PyObject* data; /* data collector (string or list), or NULL */
1659
1660 PyObject* stack; /* element stack */
Neal Norwitzc7074382006-06-12 02:06:17 +00001661 Py_ssize_t index; /* current stack size (0=empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001662
1663 /* element tracing */
1664 PyObject* events; /* list of events, or NULL if not collecting */
1665 PyObject* start_event_obj; /* event objects (NULL to ignore) */
1666 PyObject* end_event_obj;
1667 PyObject* start_ns_event_obj;
1668 PyObject* end_ns_event_obj;
1669
1670} TreeBuilderObject;
1671
1672staticforward PyTypeObject TreeBuilder_Type;
1673
Christian Heimese93237d2007-12-19 02:37:44 +00001674#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001675
1676/* -------------------------------------------------------------------- */
1677/* constructor and destructor */
1678
1679LOCAL(PyObject*)
1680treebuilder_new(void)
1681{
1682 TreeBuilderObject* self;
1683
1684 self = PyObject_New(TreeBuilderObject, &TreeBuilder_Type);
1685 if (self == NULL)
1686 return NULL;
1687
1688 self->root = NULL;
1689
1690 Py_INCREF(Py_None);
1691 self->this = (ElementObject*) Py_None;
1692
1693 Py_INCREF(Py_None);
1694 self->last = (ElementObject*) Py_None;
1695
1696 self->data = NULL;
1697
1698 self->stack = PyList_New(20);
1699 self->index = 0;
1700
1701 self->events = NULL;
1702 self->start_event_obj = self->end_event_obj = NULL;
1703 self->start_ns_event_obj = self->end_ns_event_obj = NULL;
1704
1705 ALLOC(sizeof(TreeBuilderObject), "create treebuilder");
1706
1707 return (PyObject*) self;
1708}
1709
1710static PyObject*
Fredrik Lundh81707f12006-06-03 21:56:05 +00001711treebuilder(PyObject* self_, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001712{
1713 if (!PyArg_ParseTuple(args, ":TreeBuilder"))
1714 return NULL;
1715
1716 return treebuilder_new();
1717}
1718
1719static void
1720treebuilder_dealloc(TreeBuilderObject* self)
1721{
1722 Py_XDECREF(self->end_ns_event_obj);
1723 Py_XDECREF(self->start_ns_event_obj);
1724 Py_XDECREF(self->end_event_obj);
1725 Py_XDECREF(self->start_event_obj);
1726 Py_XDECREF(self->events);
1727 Py_DECREF(self->stack);
1728 Py_XDECREF(self->data);
1729 Py_DECREF(self->last);
1730 Py_DECREF(self->this);
1731 Py_XDECREF(self->root);
1732
1733 RELEASE(sizeof(TreeBuilderObject), "destroy treebuilder");
1734
1735 PyObject_Del(self);
1736}
1737
Serhiy Storchaka9c2c42c2017-04-02 20:37:03 +03001738/* -------------------------------------------------------------------- */
1739/* helpers for handling of arbitrary element-like objects */
1740
1741static void
1742treebuilder_set_element_text_or_tail(PyObject **data, PyObject **dest)
1743{
1744 PyObject *tmp = JOIN_OBJ(*dest);
1745 *dest = JOIN_SET(*data, PyList_CheckExact(*data));
1746 *data = NULL;
1747 Py_DECREF(tmp);
1748}
1749
1750LOCAL(void)
1751treebuilder_flush_data(TreeBuilderObject* self)
1752{
1753 ElementObject *element = self->last;
1754
1755 if (self->data) {
1756 if (self->this == element) {
1757 treebuilder_set_element_text_or_tail(
1758 &self->data,
1759 &element->text);
1760 }
1761 else {
1762 treebuilder_set_element_text_or_tail(
1763 &self->data,
1764 &element->tail);
1765 }
1766 }
1767}
1768
Serhiy Storchaka45cf0b72015-12-06 23:51:53 +02001769LOCAL(int)
1770treebuilder_append_event(TreeBuilderObject *self, PyObject *action,
1771 PyObject *node)
1772{
1773 if (action != NULL) {
1774 PyObject *res = PyTuple_Pack(2, action, node);
1775 if (res == NULL)
1776 return -1;
1777 if (PyList_Append(self->events, res) < 0) {
1778 Py_DECREF(res);
1779 return -1;
1780 }
1781 Py_DECREF(res);
1782 }
1783 return 0;
1784}
1785
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001786/* -------------------------------------------------------------------- */
1787/* handlers */
1788
1789LOCAL(PyObject*)
1790treebuilder_handle_xml(TreeBuilderObject* self, PyObject* encoding,
1791 PyObject* standalone)
1792{
1793 Py_RETURN_NONE;
1794}
1795
1796LOCAL(PyObject*)
1797treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
1798 PyObject* attrib)
1799{
1800 PyObject* node;
1801 PyObject* this;
1802
Serhiy Storchaka9c2c42c2017-04-02 20:37:03 +03001803 treebuilder_flush_data(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001804
1805 node = element_new(tag, attrib);
1806 if (!node)
1807 return NULL;
1808
1809 this = (PyObject*) self->this;
1810
1811 if (this != Py_None) {
1812 if (element_add_subelement((ElementObject*) this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001813 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001814 } else {
1815 if (self->root) {
1816 PyErr_SetString(
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001817 elementtree_parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001818 "multiple elements on top level"
1819 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001820 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001821 }
1822 Py_INCREF(node);
1823 self->root = node;
1824 }
1825
1826 if (self->index < PyList_GET_SIZE(self->stack)) {
1827 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001828 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001829 Py_INCREF(this);
1830 } else {
1831 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001832 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001833 }
1834 self->index++;
1835
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001836 Py_INCREF(node);
Serhiy Storchaka763a61c2016-04-10 18:05:12 +03001837 Py_SETREF(self->this, (ElementObject*) node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001838
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001839 Py_INCREF(node);
Serhiy Storchaka763a61c2016-04-10 18:05:12 +03001840 Py_SETREF(self->last, (ElementObject*) node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001841
Serhiy Storchaka45cf0b72015-12-06 23:51:53 +02001842 if (treebuilder_append_event(self, self->start_event_obj, node) < 0)
1843 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001844
1845 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001846
1847 error:
1848 Py_DECREF(node);
1849 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001850}
1851
1852LOCAL(PyObject*)
1853treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
1854{
1855 if (!self->data) {
Fredrik Lundhdc075b92006-08-16 16:47:07 +00001856 if (self->last == (ElementObject*) Py_None) {
1857 /* ignore calls to data before the first call to start */
1858 Py_RETURN_NONE;
1859 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001860 /* store the first item as is */
1861 Py_INCREF(data); self->data = data;
1862 } else {
1863 /* more than one item; use a list to collect items */
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001864 if (PyString_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
1865 PyString_CheckExact(data) && PyString_GET_SIZE(data) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001866 /* expat often generates single character data sections; handle
1867 the most common case by resizing the existing string... */
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001868 Py_ssize_t size = PyString_GET_SIZE(self->data);
1869 if (_PyString_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001870 return NULL;
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001871 PyString_AS_STRING(self->data)[size] = PyString_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001872 } else if (PyList_CheckExact(self->data)) {
1873 if (PyList_Append(self->data, data) < 0)
1874 return NULL;
1875 } else {
1876 PyObject* list = PyList_New(2);
1877 if (!list)
1878 return NULL;
1879 PyList_SET_ITEM(list, 0, self->data);
1880 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
1881 self->data = list;
1882 }
1883 }
1884
1885 Py_RETURN_NONE;
1886}
1887
1888LOCAL(PyObject*)
1889treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
1890{
Serhiy Storchaka2e6c8292015-12-27 15:41:58 +02001891 ElementObject *item;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001892
Serhiy Storchaka9c2c42c2017-04-02 20:37:03 +03001893 treebuilder_flush_data(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001894
1895 if (self->index == 0) {
1896 PyErr_SetString(
1897 PyExc_IndexError,
1898 "pop from empty stack"
1899 );
1900 return NULL;
1901 }
1902
Serhiy Storchaka2e6c8292015-12-27 15:41:58 +02001903 item = self->last;
1904 self->last = self->this;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001905 self->index--;
Serhiy Storchaka2e6c8292015-12-27 15:41:58 +02001906 self->this = (ElementObject *) PyList_GET_ITEM(self->stack, self->index);
1907 Py_INCREF(self->this);
1908 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001909
Serhiy Storchaka45cf0b72015-12-06 23:51:53 +02001910 if (treebuilder_append_event(self, self->end_event_obj, (PyObject*)self->last) < 0)
1911 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001912
1913 Py_INCREF(self->last);
1914 return (PyObject*) self->last;
1915}
1916
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001917/* -------------------------------------------------------------------- */
1918/* methods (in alphabetical order) */
1919
1920static PyObject*
1921treebuilder_data(TreeBuilderObject* self, PyObject* args)
1922{
1923 PyObject* data;
1924 if (!PyArg_ParseTuple(args, "O:data", &data))
1925 return NULL;
1926
1927 return treebuilder_handle_data(self, data);
1928}
1929
1930static PyObject*
1931treebuilder_end(TreeBuilderObject* self, PyObject* args)
1932{
1933 PyObject* tag;
1934 if (!PyArg_ParseTuple(args, "O:end", &tag))
1935 return NULL;
1936
1937 return treebuilder_handle_end(self, tag);
1938}
1939
1940LOCAL(PyObject*)
1941treebuilder_done(TreeBuilderObject* self)
1942{
1943 PyObject* res;
1944
1945 /* FIXME: check stack size? */
1946
1947 if (self->root)
1948 res = self->root;
1949 else
1950 res = Py_None;
1951
1952 Py_INCREF(res);
1953 return res;
1954}
1955
1956static PyObject*
1957treebuilder_close(TreeBuilderObject* self, PyObject* args)
1958{
1959 if (!PyArg_ParseTuple(args, ":close"))
1960 return NULL;
1961
1962 return treebuilder_done(self);
1963}
1964
1965static PyObject*
1966treebuilder_start(TreeBuilderObject* self, PyObject* args)
1967{
1968 PyObject* tag;
1969 PyObject* attrib = Py_None;
1970 if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib))
1971 return NULL;
1972
1973 return treebuilder_handle_start(self, tag, attrib);
1974}
1975
1976static PyObject*
1977treebuilder_xml(TreeBuilderObject* self, PyObject* args)
1978{
1979 PyObject* encoding;
1980 PyObject* standalone;
1981 if (!PyArg_ParseTuple(args, "OO:xml", &encoding, &standalone))
1982 return NULL;
1983
1984 return treebuilder_handle_xml(self, encoding, standalone);
1985}
1986
1987static PyMethodDef treebuilder_methods[] = {
1988 {"data", (PyCFunction) treebuilder_data, METH_VARARGS},
1989 {"start", (PyCFunction) treebuilder_start, METH_VARARGS},
1990 {"end", (PyCFunction) treebuilder_end, METH_VARARGS},
1991 {"xml", (PyCFunction) treebuilder_xml, METH_VARARGS},
1992 {"close", (PyCFunction) treebuilder_close, METH_VARARGS},
1993 {NULL, NULL}
1994};
1995
1996static PyObject*
1997treebuilder_getattr(TreeBuilderObject* self, char* name)
1998{
1999 return Py_FindMethod(treebuilder_methods, (PyObject*) self, name);
2000}
2001
2002statichere PyTypeObject TreeBuilder_Type = {
2003 PyObject_HEAD_INIT(NULL)
2004 0, "TreeBuilder", sizeof(TreeBuilderObject), 0,
2005 /* methods */
2006 (destructor)treebuilder_dealloc, /* tp_dealloc */
2007 0, /* tp_print */
2008 (getattrfunc)treebuilder_getattr, /* tp_getattr */
2009};
2010
2011/* ==================================================================== */
2012/* the expat interface */
2013
2014#if defined(USE_EXPAT)
2015
2016#include "expat.h"
2017
2018#if defined(USE_PYEXPAT_CAPI)
2019#include "pyexpat.h"
2020static struct PyExpat_CAPI* expat_capi;
2021#define EXPAT(func) (expat_capi->func)
2022#else
2023#define EXPAT(func) (XML_##func)
2024#endif
2025
2026typedef struct {
2027 PyObject_HEAD
2028
2029 XML_Parser parser;
2030
2031 PyObject* target;
2032 PyObject* entity;
2033
2034 PyObject* names;
2035
2036 PyObject* handle_xml;
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002037
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002038 PyObject* handle_start;
2039 PyObject* handle_data;
2040 PyObject* handle_end;
2041
2042 PyObject* handle_comment;
2043 PyObject* handle_pi;
2044
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002045 PyObject* handle_close;
2046
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002047} XMLParserObject;
2048
2049staticforward PyTypeObject XMLParser_Type;
2050
2051/* helpers */
2052
2053#if defined(Py_USING_UNICODE)
2054LOCAL(int)
2055checkstring(const char* string, int size)
2056{
2057 int i;
2058
2059 /* check if an 8-bit string contains UTF-8 characters */
2060 for (i = 0; i < size; i++)
2061 if (string[i] & 0x80)
2062 return 1;
2063
2064 return 0;
2065}
2066#endif
2067
2068LOCAL(PyObject*)
2069makestring(const char* string, int size)
2070{
2071 /* convert a UTF-8 string to either a 7-bit ascii string or a
2072 Unicode string */
2073
2074#if defined(Py_USING_UNICODE)
2075 if (checkstring(string, size))
2076 return PyUnicode_DecodeUTF8(string, size, "strict");
2077#endif
2078
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002079 return PyString_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002080}
2081
2082LOCAL(PyObject*)
2083makeuniversal(XMLParserObject* self, const char* string)
2084{
2085 /* convert a UTF-8 tag/attribute name from the expat parser
2086 to a universal name string */
2087
2088 int size = strlen(string);
2089 PyObject* key;
2090 PyObject* value;
2091
2092 /* look the 'raw' name up in the names dictionary */
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002093 key = PyString_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002094 if (!key)
2095 return NULL;
2096
2097 value = PyDict_GetItem(self->names, key);
2098
2099 if (value) {
2100 Py_INCREF(value);
2101 } else {
2102 /* new name. convert to universal name, and decode as
2103 necessary */
2104
2105 PyObject* tag;
2106 char* p;
2107 int i;
2108
2109 /* look for namespace separator */
2110 for (i = 0; i < size; i++)
2111 if (string[i] == '}')
2112 break;
2113 if (i != size) {
2114 /* convert to universal name */
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002115 tag = PyString_FromStringAndSize(NULL, size+1);
2116 p = PyString_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002117 p[0] = '{';
2118 memcpy(p+1, string, size);
2119 size++;
2120 } else {
2121 /* plain name; use key as tag */
2122 Py_INCREF(key);
2123 tag = key;
2124 }
2125
2126 /* decode universal name */
2127#if defined(Py_USING_UNICODE)
2128 /* inline makestring, to avoid duplicating the source string if
Martin Panter6a8163a2016-04-15 02:14:19 +00002129 it's not a utf-8 string */
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002130 p = PyString_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002131 if (checkstring(p, size)) {
2132 value = PyUnicode_DecodeUTF8(p, size, "strict");
2133 Py_DECREF(tag);
2134 if (!value) {
2135 Py_DECREF(key);
2136 return NULL;
2137 }
2138 } else
2139#endif
2140 value = tag; /* use tag as is */
2141
2142 /* add to names dictionary */
2143 if (PyDict_SetItem(self->names, key, value) < 0) {
2144 Py_DECREF(key);
2145 Py_DECREF(value);
2146 return NULL;
2147 }
2148 }
2149
2150 Py_DECREF(key);
2151 return value;
2152}
2153
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002154static void
2155expat_set_error(const char* message, int line, int column)
2156{
2157 PyObject *error;
2158 PyObject *position;
2159 char buffer[256];
2160
2161 sprintf(buffer, "%s: line %d, column %d", message, line, column);
2162
2163 error = PyObject_CallFunction(elementtree_parseerror_obj, "s", buffer);
2164 if (!error)
2165 return;
2166
2167 /* add position attribute */
2168 position = Py_BuildValue("(ii)", line, column);
2169 if (!position) {
2170 Py_DECREF(error);
2171 return;
2172 }
2173 if (PyObject_SetAttrString(error, "position", position) == -1) {
2174 Py_DECREF(error);
2175 Py_DECREF(position);
2176 return;
2177 }
2178 Py_DECREF(position);
2179
2180 PyErr_SetObject(elementtree_parseerror_obj, error);
2181 Py_DECREF(error);
2182}
2183
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002184/* -------------------------------------------------------------------- */
2185/* handlers */
2186
2187static void
2188expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2189 int data_len)
2190{
2191 PyObject* key;
2192 PyObject* value;
2193 PyObject* res;
2194
2195 if (data_len < 2 || data_in[0] != '&')
2196 return;
2197
2198 key = makestring(data_in + 1, data_len - 2);
2199 if (!key)
2200 return;
2201
2202 value = PyDict_GetItem(self->entity, key);
2203
2204 if (value) {
2205 if (TreeBuilder_CheckExact(self->target))
2206 res = treebuilder_handle_data(
2207 (TreeBuilderObject*) self->target, value
2208 );
2209 else if (self->handle_data)
2210 res = PyObject_CallFunction(self->handle_data, "O", value);
2211 else
2212 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002213 Py_XDECREF(res);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002214 } else if (!PyErr_Occurred()) {
2215 /* Report the first error, not the last */
2216 char message[128];
2217 sprintf(message, "undefined entity &%.100s;", PyString_AS_STRING(key));
2218 expat_set_error(
2219 message,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002220 EXPAT(GetErrorLineNumber)(self->parser),
2221 EXPAT(GetErrorColumnNumber)(self->parser)
2222 );
2223 }
2224
2225 Py_DECREF(key);
2226}
2227
2228static void
2229expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2230 const XML_Char **attrib_in)
2231{
2232 PyObject* res;
2233 PyObject* tag;
2234 PyObject* attrib;
2235 int ok;
2236
2237 /* tag name */
2238 tag = makeuniversal(self, tag_in);
2239 if (!tag)
2240 return; /* parser will look for errors */
2241
2242 /* attributes */
2243 if (attrib_in[0]) {
2244 attrib = PyDict_New();
Serhiy Storchaka33ea2972015-12-09 19:44:30 +02002245 if (!attrib) {
2246 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002247 return;
Serhiy Storchaka33ea2972015-12-09 19:44:30 +02002248 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002249 while (attrib_in[0] && attrib_in[1]) {
2250 PyObject* key = makeuniversal(self, attrib_in[0]);
2251 PyObject* value = makestring(attrib_in[1], strlen(attrib_in[1]));
2252 if (!key || !value) {
2253 Py_XDECREF(value);
2254 Py_XDECREF(key);
2255 Py_DECREF(attrib);
Serhiy Storchaka33ea2972015-12-09 19:44:30 +02002256 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002257 return;
2258 }
2259 ok = PyDict_SetItem(attrib, key, value);
2260 Py_DECREF(value);
2261 Py_DECREF(key);
2262 if (ok < 0) {
2263 Py_DECREF(attrib);
Serhiy Storchaka33ea2972015-12-09 19:44:30 +02002264 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002265 return;
2266 }
2267 attrib_in += 2;
2268 }
2269 } else {
2270 Py_INCREF(Py_None);
2271 attrib = Py_None;
2272 }
2273
2274 if (TreeBuilder_CheckExact(self->target))
2275 /* shortcut */
2276 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2277 tag, attrib);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002278 else if (self->handle_start) {
2279 if (attrib == Py_None) {
2280 Py_DECREF(attrib);
2281 attrib = PyDict_New();
Serhiy Storchaka33ea2972015-12-09 19:44:30 +02002282 if (!attrib) {
2283 Py_DECREF(tag);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002284 return;
Serhiy Storchaka33ea2972015-12-09 19:44:30 +02002285 }
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002286 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002287 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002288 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002289 res = NULL;
2290
2291 Py_DECREF(tag);
2292 Py_DECREF(attrib);
2293
2294 Py_XDECREF(res);
2295}
2296
2297static void
2298expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2299 int data_len)
2300{
2301 PyObject* data;
2302 PyObject* res;
2303
2304 data = makestring(data_in, data_len);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002305 if (!data)
2306 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002307
2308 if (TreeBuilder_CheckExact(self->target))
2309 /* shortcut */
2310 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
2311 else if (self->handle_data)
2312 res = PyObject_CallFunction(self->handle_data, "O", data);
2313 else
2314 res = NULL;
2315
2316 Py_DECREF(data);
2317
2318 Py_XDECREF(res);
2319}
2320
2321static void
2322expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
2323{
2324 PyObject* tag;
2325 PyObject* res = NULL;
2326
2327 if (TreeBuilder_CheckExact(self->target))
2328 /* shortcut */
2329 /* the standard tree builder doesn't look at the end tag */
2330 res = treebuilder_handle_end(
2331 (TreeBuilderObject*) self->target, Py_None
2332 );
2333 else if (self->handle_end) {
2334 tag = makeuniversal(self, tag_in);
2335 if (tag) {
2336 res = PyObject_CallFunction(self->handle_end, "O", tag);
2337 Py_DECREF(tag);
2338 }
2339 }
2340
2341 Py_XDECREF(res);
2342}
2343
2344static void
2345expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
2346 const XML_Char *uri)
2347{
Serhiy Storchaka45cf0b72015-12-06 23:51:53 +02002348 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
2349 PyObject *parcel;
2350 PyObject *sprefix = NULL;
2351 PyObject *suri = NULL;
2352
2353 if (PyErr_Occurred())
2354 return;
2355
2356 if (!target->events || !target->start_ns_event_obj)
2357 return;
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002358
Eli Benderskyf933e082013-11-28 06:25:45 -08002359 if (uri)
Eli Bendersky71142c42013-11-28 06:37:25 -08002360 suri = makestring(uri, strlen(uri));
Eli Benderskyf933e082013-11-28 06:25:45 -08002361 else
Eli Bendersky71142c42013-11-28 06:37:25 -08002362 suri = PyString_FromStringAndSize("", 0);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002363 if (!suri)
2364 return;
2365
2366 if (prefix)
2367 sprefix = makestring(prefix, strlen(prefix));
2368 else
2369 sprefix = PyString_FromStringAndSize("", 0);
2370 if (!sprefix) {
2371 Py_DECREF(suri);
2372 return;
2373 }
2374
Serhiy Storchaka45cf0b72015-12-06 23:51:53 +02002375 parcel = PyTuple_Pack(2, sprefix, suri);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002376 Py_DECREF(sprefix);
2377 Py_DECREF(suri);
Serhiy Storchaka45cf0b72015-12-06 23:51:53 +02002378 if (!parcel)
2379 return;
2380 treebuilder_append_event(target, target->start_ns_event_obj, parcel);
2381 Py_DECREF(parcel);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002382}
2383
2384static void
2385expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
2386{
Serhiy Storchaka45cf0b72015-12-06 23:51:53 +02002387 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
2388
2389 if (PyErr_Occurred())
2390 return;
2391
2392 if (!target->events)
2393 return;
2394
2395 treebuilder_append_event(target, target->end_ns_event_obj, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002396}
2397
2398static void
2399expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
2400{
2401 PyObject* comment;
2402 PyObject* res;
2403
2404 if (self->handle_comment) {
2405 comment = makestring(comment_in, strlen(comment_in));
2406 if (comment) {
2407 res = PyObject_CallFunction(self->handle_comment, "O", comment);
2408 Py_XDECREF(res);
2409 Py_DECREF(comment);
2410 }
2411 }
2412}
2413
2414static void
2415expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
2416 const XML_Char* data_in)
2417{
2418 PyObject* target;
2419 PyObject* data;
2420 PyObject* res;
2421
2422 if (self->handle_pi) {
2423 target = makestring(target_in, strlen(target_in));
2424 data = makestring(data_in, strlen(data_in));
2425 if (target && data) {
2426 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
2427 Py_XDECREF(res);
2428 Py_DECREF(data);
2429 Py_DECREF(target);
2430 } else {
2431 Py_XDECREF(data);
2432 Py_XDECREF(target);
2433 }
2434 }
2435}
2436
2437#if defined(Py_USING_UNICODE)
2438static int
2439expat_unknown_encoding_handler(XMLParserObject *self, const XML_Char *name,
2440 XML_Encoding *info)
2441{
2442 PyObject* u;
2443 Py_UNICODE* p;
2444 unsigned char s[256];
2445 int i;
2446
2447 memset(info, 0, sizeof(XML_Encoding));
2448
2449 for (i = 0; i < 256; i++)
2450 s[i] = i;
2451
Fredrik Lundhc3389992005-12-25 11:40:19 +00002452 u = PyUnicode_Decode((char*) s, 256, name, "replace");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002453 if (!u)
2454 return XML_STATUS_ERROR;
2455
2456 if (PyUnicode_GET_SIZE(u) != 256) {
2457 Py_DECREF(u);
Eli Benderskyb6717012013-08-04 06:09:49 -07002458 PyErr_SetString(PyExc_ValueError,
2459 "multi-byte encodings are not supported");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002460 return XML_STATUS_ERROR;
2461 }
2462
2463 p = PyUnicode_AS_UNICODE(u);
2464
2465 for (i = 0; i < 256; i++) {
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002466 if (p[i] != Py_UNICODE_REPLACEMENT_CHARACTER)
2467 info->map[i] = p[i];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002468 else
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002469 info->map[i] = -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002470 }
2471
2472 Py_DECREF(u);
2473
2474 return XML_STATUS_OK;
2475}
2476#endif
2477
2478/* -------------------------------------------------------------------- */
2479/* constructor and destructor */
2480
2481static PyObject*
Fredrik Lundh81707f12006-06-03 21:56:05 +00002482xmlparser(PyObject* self_, PyObject* args, PyObject* kw)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002483{
2484 XMLParserObject* self;
2485 /* FIXME: does this need to be static? */
2486 static XML_Memory_Handling_Suite memory_handler;
2487
2488 PyObject* target = NULL;
2489 char* encoding = NULL;
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +00002490 static char* kwlist[] = { "target", "encoding", NULL };
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002491 if (!PyArg_ParseTupleAndKeywords(args, kw, "|Oz:XMLParser", kwlist,
2492 &target, &encoding))
2493 return NULL;
2494
2495#if defined(USE_PYEXPAT_CAPI)
2496 if (!expat_capi) {
2497 PyErr_SetString(
2498 PyExc_RuntimeError, "cannot load dispatch table from pyexpat"
2499 );
2500 return NULL;
2501 }
2502#endif
2503
2504 self = PyObject_New(XMLParserObject, &XMLParser_Type);
2505 if (self == NULL)
2506 return NULL;
2507
2508 self->entity = PyDict_New();
2509 if (!self->entity) {
2510 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002511 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002512 }
2513
2514 self->names = PyDict_New();
2515 if (!self->names) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002516 PyObject_Del(self->entity);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002517 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002518 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002519 }
2520
2521 memory_handler.malloc_fcn = PyObject_Malloc;
2522 memory_handler.realloc_fcn = PyObject_Realloc;
2523 memory_handler.free_fcn = PyObject_Free;
2524
2525 self->parser = EXPAT(ParserCreate_MM)(encoding, &memory_handler, "}");
2526 if (!self->parser) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002527 PyObject_Del(self->names);
2528 PyObject_Del(self->entity);
2529 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002530 PyErr_NoMemory();
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002531 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002532 }
2533
2534 /* setup target handlers */
2535 if (!target) {
2536 target = treebuilder_new();
2537 if (!target) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002538 EXPAT(ParserFree)(self->parser);
2539 PyObject_Del(self->names);
2540 PyObject_Del(self->entity);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002541 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002542 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002543 }
2544 } else
2545 Py_INCREF(target);
2546 self->target = target;
2547
2548 self->handle_xml = PyObject_GetAttrString(target, "xml");
2549 self->handle_start = PyObject_GetAttrString(target, "start");
2550 self->handle_data = PyObject_GetAttrString(target, "data");
2551 self->handle_end = PyObject_GetAttrString(target, "end");
2552 self->handle_comment = PyObject_GetAttrString(target, "comment");
2553 self->handle_pi = PyObject_GetAttrString(target, "pi");
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002554 self->handle_close = PyObject_GetAttrString(target, "close");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002555
2556 PyErr_Clear();
2557
2558 /* configure parser */
2559 EXPAT(SetUserData)(self->parser, self);
2560 EXPAT(SetElementHandler)(
2561 self->parser,
2562 (XML_StartElementHandler) expat_start_handler,
2563 (XML_EndElementHandler) expat_end_handler
2564 );
2565 EXPAT(SetDefaultHandlerExpand)(
2566 self->parser,
2567 (XML_DefaultHandler) expat_default_handler
2568 );
2569 EXPAT(SetCharacterDataHandler)(
2570 self->parser,
2571 (XML_CharacterDataHandler) expat_data_handler
2572 );
2573 if (self->handle_comment)
2574 EXPAT(SetCommentHandler)(
2575 self->parser,
2576 (XML_CommentHandler) expat_comment_handler
2577 );
2578 if (self->handle_pi)
2579 EXPAT(SetProcessingInstructionHandler)(
2580 self->parser,
2581 (XML_ProcessingInstructionHandler) expat_pi_handler
2582 );
2583#if defined(Py_USING_UNICODE)
2584 EXPAT(SetUnknownEncodingHandler)(
2585 self->parser,
2586 (XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL
2587 );
2588#endif
2589
2590 ALLOC(sizeof(XMLParserObject), "create expatparser");
2591
2592 return (PyObject*) self;
2593}
2594
2595static void
2596xmlparser_dealloc(XMLParserObject* self)
2597{
2598 EXPAT(ParserFree)(self->parser);
2599
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002600 Py_XDECREF(self->handle_close);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002601 Py_XDECREF(self->handle_pi);
2602 Py_XDECREF(self->handle_comment);
2603 Py_XDECREF(self->handle_end);
2604 Py_XDECREF(self->handle_data);
2605 Py_XDECREF(self->handle_start);
2606 Py_XDECREF(self->handle_xml);
2607
2608 Py_DECREF(self->target);
2609 Py_DECREF(self->entity);
2610 Py_DECREF(self->names);
2611
2612 RELEASE(sizeof(XMLParserObject), "destroy expatparser");
2613
2614 PyObject_Del(self);
2615}
2616
2617/* -------------------------------------------------------------------- */
2618/* methods (in alphabetical order) */
2619
2620LOCAL(PyObject*)
2621expat_parse(XMLParserObject* self, char* data, int data_len, int final)
2622{
2623 int ok;
2624
2625 ok = EXPAT(Parse)(self->parser, data, data_len, final);
2626
2627 if (PyErr_Occurred())
2628 return NULL;
2629
2630 if (!ok) {
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002631 expat_set_error(
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002632 EXPAT(ErrorString)(EXPAT(GetErrorCode)(self->parser)),
2633 EXPAT(GetErrorLineNumber)(self->parser),
2634 EXPAT(GetErrorColumnNumber)(self->parser)
2635 );
2636 return NULL;
2637 }
2638
2639 Py_RETURN_NONE;
2640}
2641
2642static PyObject*
2643xmlparser_close(XMLParserObject* self, PyObject* args)
2644{
2645 /* end feeding data to parser */
2646
2647 PyObject* res;
2648 if (!PyArg_ParseTuple(args, ":close"))
2649 return NULL;
2650
2651 res = expat_parse(self, "", 0, 1);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002652 if (!res)
2653 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002654
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002655 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002656 Py_DECREF(res);
2657 return treebuilder_done((TreeBuilderObject*) self->target);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002658 } if (self->handle_close) {
2659 Py_DECREF(res);
2660 return PyObject_CallFunction(self->handle_close, "");
2661 } else
2662 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002663}
2664
2665static PyObject*
2666xmlparser_feed(XMLParserObject* self, PyObject* args)
2667{
2668 /* feed data to parser */
2669
2670 char* data;
2671 int data_len;
2672 if (!PyArg_ParseTuple(args, "s#:feed", &data, &data_len))
2673 return NULL;
2674
2675 return expat_parse(self, data, data_len, 0);
2676}
2677
2678static PyObject*
2679xmlparser_parse(XMLParserObject* self, PyObject* args)
2680{
2681 /* (internal) parse until end of input stream */
2682
2683 PyObject* reader;
2684 PyObject* buffer;
2685 PyObject* res;
2686
2687 PyObject* fileobj;
2688 if (!PyArg_ParseTuple(args, "O:_parse", &fileobj))
2689 return NULL;
2690
2691 reader = PyObject_GetAttrString(fileobj, "read");
2692 if (!reader)
2693 return NULL;
2694
2695 /* read from open file object */
2696 for (;;) {
2697
2698 buffer = PyObject_CallFunction(reader, "i", 64*1024);
2699
2700 if (!buffer) {
2701 /* read failed (e.g. due to KeyboardInterrupt) */
2702 Py_DECREF(reader);
2703 return NULL;
2704 }
2705
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002706 if (!PyString_CheckExact(buffer) || PyString_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002707 Py_DECREF(buffer);
2708 break;
2709 }
2710
Serhiy Storchakac4c64be2015-11-25 20:12:58 +02002711 if (PyString_GET_SIZE(buffer) > INT_MAX) {
2712 Py_DECREF(buffer);
2713 Py_DECREF(reader);
2714 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
2715 return NULL;
2716 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002717 res = expat_parse(
Serhiy Storchakac4c64be2015-11-25 20:12:58 +02002718 self, PyString_AS_STRING(buffer), (int)PyString_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002719 );
2720
2721 Py_DECREF(buffer);
2722
2723 if (!res) {
2724 Py_DECREF(reader);
2725 return NULL;
2726 }
2727 Py_DECREF(res);
2728
2729 }
2730
2731 Py_DECREF(reader);
2732
2733 res = expat_parse(self, "", 0, 1);
2734
2735 if (res && TreeBuilder_CheckExact(self->target)) {
2736 Py_DECREF(res);
2737 return treebuilder_done((TreeBuilderObject*) self->target);
2738 }
2739
2740 return res;
2741}
2742
2743static PyObject*
2744xmlparser_setevents(XMLParserObject* self, PyObject* args)
2745{
2746 /* activate element event reporting */
2747
Neal Norwitzc7074382006-06-12 02:06:17 +00002748 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002749 TreeBuilderObject* target;
2750
2751 PyObject* events; /* event collector */
2752 PyObject* event_set = Py_None;
2753 if (!PyArg_ParseTuple(args, "O!|O:_setevents", &PyList_Type, &events,
2754 &event_set))
2755 return NULL;
2756
2757 if (!TreeBuilder_CheckExact(self->target)) {
2758 PyErr_SetString(
2759 PyExc_TypeError,
2760 "event handling only supported for cElementTree.Treebuilder "
2761 "targets"
2762 );
2763 return NULL;
2764 }
2765
2766 target = (TreeBuilderObject*) self->target;
2767
2768 Py_INCREF(events);
Serhiy Storchakabc62af12016-04-06 09:51:18 +03002769 Py_XSETREF(target->events, events);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002770
2771 /* clear out existing events */
Serhiy Storchaka98a97222014-02-09 13:14:04 +02002772 Py_CLEAR(target->start_event_obj);
2773 Py_CLEAR(target->end_event_obj);
2774 Py_CLEAR(target->start_ns_event_obj);
2775 Py_CLEAR(target->end_ns_event_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002776
2777 if (event_set == Py_None) {
2778 /* default is "end" only */
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002779 target->end_event_obj = PyString_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002780 Py_RETURN_NONE;
2781 }
2782
2783 if (!PyTuple_Check(event_set)) /* FIXME: handle arbitrary sequences */
2784 goto error;
2785
2786 for (i = 0; i < PyTuple_GET_SIZE(event_set); i++) {
2787 PyObject* item = PyTuple_GET_ITEM(event_set, i);
2788 char* event;
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002789 if (!PyString_Check(item))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002790 goto error;
Serhiy Storchaka20a003b2015-12-24 11:51:24 +02002791 Py_INCREF(item);
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002792 event = PyString_AS_STRING(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002793 if (strcmp(event, "start") == 0) {
Serhiy Storchakabc62af12016-04-06 09:51:18 +03002794 Py_XSETREF(target->start_event_obj, item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002795 } else if (strcmp(event, "end") == 0) {
Serhiy Storchakabc62af12016-04-06 09:51:18 +03002796 Py_XSETREF(target->end_event_obj, item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002797 } else if (strcmp(event, "start-ns") == 0) {
Serhiy Storchakabc62af12016-04-06 09:51:18 +03002798 Py_XSETREF(target->start_ns_event_obj, item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002799 EXPAT(SetNamespaceDeclHandler)(
2800 self->parser,
2801 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2802 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2803 );
2804 } else if (strcmp(event, "end-ns") == 0) {
Serhiy Storchakabc62af12016-04-06 09:51:18 +03002805 Py_XSETREF(target->end_ns_event_obj, item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002806 EXPAT(SetNamespaceDeclHandler)(
2807 self->parser,
2808 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2809 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2810 );
2811 } else {
Serhiy Storchaka20a003b2015-12-24 11:51:24 +02002812 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002813 PyErr_Format(
2814 PyExc_ValueError,
2815 "unknown event '%s'", event
2816 );
2817 return NULL;
2818 }
2819 }
2820
2821 Py_RETURN_NONE;
2822
2823 error:
2824 PyErr_SetString(
2825 PyExc_TypeError,
2826 "invalid event tuple"
2827 );
2828 return NULL;
2829}
2830
2831static PyMethodDef xmlparser_methods[] = {
2832 {"feed", (PyCFunction) xmlparser_feed, METH_VARARGS},
2833 {"close", (PyCFunction) xmlparser_close, METH_VARARGS},
2834 {"_parse", (PyCFunction) xmlparser_parse, METH_VARARGS},
2835 {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
2836 {NULL, NULL}
2837};
2838
2839static PyObject*
2840xmlparser_getattr(XMLParserObject* self, char* name)
2841{
2842 PyObject* res;
2843
2844 res = Py_FindMethod(xmlparser_methods, (PyObject*) self, name);
2845 if (res)
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002846 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002847
2848 PyErr_Clear();
2849
2850 if (strcmp(name, "entity") == 0)
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002851 res = self->entity;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002852 else if (strcmp(name, "target") == 0)
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002853 res = self->target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002854 else if (strcmp(name, "version") == 0) {
2855 char buffer[100];
2856 sprintf(buffer, "Expat %d.%d.%d", XML_MAJOR_VERSION,
2857 XML_MINOR_VERSION, XML_MICRO_VERSION);
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002858 return PyString_FromString(buffer);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002859 } else {
2860 PyErr_SetString(PyExc_AttributeError, name);
2861 return NULL;
2862 }
2863
2864 Py_INCREF(res);
2865 return res;
2866}
2867
2868statichere PyTypeObject XMLParser_Type = {
2869 PyObject_HEAD_INIT(NULL)
2870 0, "XMLParser", sizeof(XMLParserObject), 0,
2871 /* methods */
2872 (destructor)xmlparser_dealloc, /* tp_dealloc */
2873 0, /* tp_print */
2874 (getattrfunc)xmlparser_getattr, /* tp_getattr */
2875};
2876
2877#endif
2878
2879/* ==================================================================== */
2880/* python module interface */
2881
2882static PyMethodDef _functions[] = {
2883 {"Element", (PyCFunction) element, METH_VARARGS|METH_KEYWORDS},
2884 {"SubElement", (PyCFunction) subelement, METH_VARARGS|METH_KEYWORDS},
2885 {"TreeBuilder", (PyCFunction) treebuilder, METH_VARARGS},
2886#if defined(USE_EXPAT)
2887 {"XMLParser", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
2888 {"XMLTreeBuilder", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
2889#endif
2890 {NULL, NULL}
2891};
2892
2893DL_EXPORT(void)
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002894init_elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002895{
2896 PyObject* m;
2897 PyObject* g;
2898 char* bootstrap;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002899
2900 /* Patch object type */
Christian Heimese93237d2007-12-19 02:37:44 +00002901 Py_TYPE(&Element_Type) = Py_TYPE(&TreeBuilder_Type) = &PyType_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002902#if defined(USE_EXPAT)
Christian Heimese93237d2007-12-19 02:37:44 +00002903 Py_TYPE(&XMLParser_Type) = &PyType_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002904#endif
2905
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002906 m = Py_InitModule("_elementtree", _functions);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002907 if (!m)
2908 return;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002909
2910 /* python glue code */
2911
2912 g = PyDict_New();
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002913 if (!g)
2914 return;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002915
2916 PyDict_SetItemString(g, "__builtins__", PyEval_GetBuiltins());
2917
2918 bootstrap = (
2919
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002920 "from copy import copy, deepcopy\n"
2921
2922 "try:\n"
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002923 " from xml.etree import ElementTree\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002924 "except ImportError:\n"
2925 " import ElementTree\n"
2926 "ET = ElementTree\n"
2927 "del ElementTree\n"
2928
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002929 "import _elementtree as cElementTree\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002930
2931 "try:\n" /* check if copy works as is */
2932 " copy(cElementTree.Element('x'))\n"
2933 "except:\n"
2934 " def copyelement(elem):\n"
2935 " return elem\n"
2936
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002937 "class CommentProxy:\n"
2938 " def __call__(self, text=None):\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002939 " element = cElementTree.Element(ET.Comment)\n"
2940 " element.text = text\n"
2941 " return element\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002942 " def __cmp__(self, other):\n"
2943 " return cmp(ET.Comment, other)\n"
2944 "cElementTree.Comment = CommentProxy()\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002945
2946 "class ElementTree(ET.ElementTree):\n" /* public */
2947 " def parse(self, source, parser=None):\n"
Florent Xicluna67d5d0e2011-10-29 03:38:56 +02002948 " close_source = False\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002949 " if not hasattr(source, 'read'):\n"
2950 " source = open(source, 'rb')\n"
Florent Xicluna67d5d0e2011-10-29 03:38:56 +02002951 " close_source = False\n"
2952 " try:\n"
2953 " if parser is not None:\n"
2954 " while 1:\n"
2955 " data = source.read(65536)\n"
2956 " if not data:\n"
2957 " break\n"
2958 " parser.feed(data)\n"
2959 " self._root = parser.close()\n"
2960 " else:\n"
2961 " parser = cElementTree.XMLParser()\n"
2962 " self._root = parser._parse(source)\n"
2963 " return self._root\n"
2964 " finally:\n"
2965 " if close_source:\n"
2966 " source.close()\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002967 "cElementTree.ElementTree = ElementTree\n"
2968
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002969 "def iter(node, tag=None):\n" /* helper */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002970 " if tag == '*':\n"
2971 " tag = None\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002972 " if tag is None or node.tag == tag:\n"
2973 " yield node\n"
2974 " for node in node:\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002975 " for node in iter(node, tag):\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002976 " yield node\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002977
2978 "def itertext(node):\n" /* helper */
2979 " if node.text:\n"
2980 " yield node.text\n"
2981 " for e in node:\n"
2982 " for s in e.itertext():\n"
2983 " yield s\n"
2984 " if e.tail:\n"
2985 " yield e.tail\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002986
2987 "def parse(source, parser=None):\n" /* public */
2988 " tree = ElementTree()\n"
2989 " tree.parse(source, parser)\n"
2990 " return tree\n"
2991 "cElementTree.parse = parse\n"
2992
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002993 "class iterparse(object):\n"
2994 " root = None\n"
2995 " def __init__(self, file, events=None):\n"
Florent Xicluna67d5d0e2011-10-29 03:38:56 +02002996 " self._close_file = False\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002997 " if not hasattr(file, 'read'):\n"
2998 " file = open(file, 'rb')\n"
Florent Xicluna67d5d0e2011-10-29 03:38:56 +02002999 " self._close_file = True\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003000 " self._file = file\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003001 " self._events = []\n"
3002 " self._index = 0\n"
Florent Xicluna0965ee22011-11-01 23:34:41 +01003003 " self._error = None\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003004 " self.root = self._root = None\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003005 " b = cElementTree.TreeBuilder()\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003006 " self._parser = cElementTree.XMLParser(b)\n"
3007 " self._parser._setevents(self._events, events)\n"
3008 " def next(self):\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003009 " while 1:\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003010 " try:\n"
3011 " item = self._events[self._index]\n"
Florent Xicluna0965ee22011-11-01 23:34:41 +01003012 " self._index += 1\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003013 " return item\n"
Florent Xicluna0965ee22011-11-01 23:34:41 +01003014 " except IndexError:\n"
3015 " pass\n"
3016 " if self._error:\n"
3017 " e = self._error\n"
3018 " self._error = None\n"
3019 " raise e\n"
3020 " if self._parser is None:\n"
3021 " self.root = self._root\n"
3022 " if self._close_file:\n"
3023 " self._file.close()\n"
3024 " raise StopIteration\n"
3025 " # load event buffer\n"
3026 " del self._events[:]\n"
3027 " self._index = 0\n"
3028 " data = self._file.read(16384)\n"
3029 " if data:\n"
3030 " try:\n"
3031 " self._parser.feed(data)\n"
3032 " except SyntaxError as exc:\n"
3033 " self._error = exc\n"
3034 " else:\n"
3035 " self._root = self._parser.close()\n"
3036 " self._parser = None\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003037 " def __iter__(self):\n"
3038 " return self\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003039 "cElementTree.iterparse = iterparse\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003040
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003041 "class PIProxy:\n"
3042 " def __call__(self, target, text=None):\n"
3043 " element = cElementTree.Element(ET.PI)\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003044 " element.text = target\n"
3045 " if text:\n"
3046 " element.text = element.text + ' ' + text\n"
3047 " return element\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003048 " def __cmp__(self, other):\n"
3049 " return cmp(ET.PI, other)\n"
3050 "cElementTree.PI = cElementTree.ProcessingInstruction = PIProxy()\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003051
3052 "def XML(text):\n" /* public */
3053 " parser = cElementTree.XMLParser()\n"
3054 " parser.feed(text)\n"
3055 " return parser.close()\n"
3056 "cElementTree.XML = cElementTree.fromstring = XML\n"
3057
3058 "def XMLID(text):\n" /* public */
3059 " tree = XML(text)\n"
3060 " ids = {}\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003061 " for elem in tree.iter():\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003062 " id = elem.get('id')\n"
3063 " if id:\n"
3064 " ids[id] = elem\n"
3065 " return tree, ids\n"
3066 "cElementTree.XMLID = XMLID\n"
3067
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003068 "try:\n"
3069 " register_namespace = ET.register_namespace\n"
3070 "except AttributeError:\n"
3071 " def register_namespace(prefix, uri):\n"
3072 " ET._namespace_map[uri] = prefix\n"
3073 "cElementTree.register_namespace = register_namespace\n"
3074
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003075 "cElementTree.dump = ET.dump\n"
3076 "cElementTree.ElementPath = ElementPath = ET.ElementPath\n"
3077 "cElementTree.iselement = ET.iselement\n"
3078 "cElementTree.QName = ET.QName\n"
3079 "cElementTree.tostring = ET.tostring\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003080 "cElementTree.fromstringlist = ET.fromstringlist\n"
3081 "cElementTree.tostringlist = ET.tostringlist\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003082 "cElementTree.VERSION = '" VERSION "'\n"
3083 "cElementTree.__version__ = '" VERSION "'\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003084
3085 );
3086
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003087 if (!PyRun_String(bootstrap, Py_file_input, g, NULL))
3088 return;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003089
3090 elementpath_obj = PyDict_GetItemString(g, "ElementPath");
3091
3092 elementtree_copyelement_obj = PyDict_GetItemString(g, "copyelement");
3093 if (elementtree_copyelement_obj) {
3094 /* reduce hack needed; enable reduce method */
3095 PyMethodDef* mp;
3096 for (mp = element_methods; mp->ml_name; mp++)
3097 if (mp->ml_meth == (PyCFunction) element_reduce) {
3098 mp->ml_name = "__reduce__";
3099 break;
3100 }
3101 } else
3102 PyErr_Clear();
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003103
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003104 elementtree_deepcopy_obj = PyDict_GetItemString(g, "deepcopy");
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003105 elementtree_iter_obj = PyDict_GetItemString(g, "iter");
3106 elementtree_itertext_obj = PyDict_GetItemString(g, "itertext");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003107
3108#if defined(USE_PYEXPAT_CAPI)
3109 /* link against pyexpat, if possible */
Larry Hastings402b73f2010-03-25 00:54:54 +00003110 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003111 if (expat_capi) {
3112 /* check that it's usable */
3113 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
3114 expat_capi->size < sizeof(struct PyExpat_CAPI) ||
3115 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
3116 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
3117 expat_capi->MICRO_VERSION != XML_MICRO_VERSION)
3118 expat_capi = NULL;
3119 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003120#endif
3121
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003122 elementtree_parseerror_obj = PyErr_NewException(
3123 "cElementTree.ParseError", PyExc_SyntaxError, NULL
3124 );
3125 Py_INCREF(elementtree_parseerror_obj);
3126 PyModule_AddObject(m, "ParseError", elementtree_parseerror_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003127}