blob: 929616f3e2e5b7dbcc3c97b5cfc9d7ed490af5a0 [file] [log] [blame]
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001/*
2 * ElementTree
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003 * $Id: _elementtree.c 3473 2009-01-11 22:53:55Z fredrik $
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
5 * elementtree accelerator
6 *
7 * History:
8 * 1999-06-20 fl created (as part of sgmlop)
9 * 2001-05-29 fl effdom edition
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000010 * 2003-02-27 fl elementtree edition (alpha)
11 * 2004-06-03 fl updates for elementtree 1.2
Florent Xicluna3e8c1892010-03-11 14:36:19 +000012 * 2005-01-05 fl major optimization effort
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000013 * 2005-01-11 fl first public release (cElementTree 0.8)
14 * 2005-01-12 fl split element object into base and extras
15 * 2005-01-13 fl use tagged pointers for tail/text (cElementTree 0.9)
16 * 2005-01-17 fl added treebuilder close method
17 * 2005-01-17 fl fixed crash in getchildren
18 * 2005-01-18 fl removed observer api, added iterparse (cElementTree 0.9.3)
19 * 2005-01-23 fl revised iterparse api; added namespace event support (0.9.8)
20 * 2005-01-26 fl added VERSION module property (cElementTree 1.0)
21 * 2005-01-28 fl added remove method (1.0.1)
22 * 2005-03-01 fl added iselement function; fixed makeelement aliasing (1.0.2)
23 * 2005-03-13 fl export Comment and ProcessingInstruction/PI helpers
24 * 2005-03-26 fl added Comment and PI support to XMLParser
25 * 2005-03-27 fl event optimizations; complain about bogus events
26 * 2005-08-08 fl fixed read error handling in parse
27 * 2005-08-11 fl added runtime test for copy workaround (1.0.3)
28 * 2005-12-13 fl added expat_capi support (for xml.etree) (1.0.4)
29 * 2005-12-16 fl added support for non-standard encodings
Fredrik Lundh44ed4db2006-03-12 21:06:35 +000030 * 2006-03-08 fl fixed a couple of potential null-refs and leaks
31 * 2006-03-12 fl merge in 2.5 ssize_t changes
Florent Xicluna3e8c1892010-03-11 14:36:19 +000032 * 2007-08-25 fl call custom builder's close method from XMLParser
33 * 2007-08-31 fl added iter, extend from ET 1.3
34 * 2007-09-01 fl fixed ParseError exception, setslice source type, etc
35 * 2007-09-03 fl fixed handling of negative insert indexes
36 * 2007-09-04 fl added itertext from ET 1.3
37 * 2007-09-06 fl added position attribute to ParseError exception
38 * 2008-06-06 fl delay error reporting in iterparse (from Hrvoje Niksic)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000039 *
Florent Xicluna3e8c1892010-03-11 14:36:19 +000040 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
41 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000042 *
43 * info@pythonware.com
44 * http://www.pythonware.com
45 */
46
Fredrik Lundh6d52b552005-12-16 22:06:43 +000047/* Licensed to PSF under a Contributor Agreement. */
Florent Xicluna3e8c1892010-03-11 14:36:19 +000048/* See http://www.python.org/psf/license for licensing details. */
Fredrik Lundh6d52b552005-12-16 22:06:43 +000049
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000050#include "Python.h"
51
Fredrik Lundhdc075b92006-08-16 16:47:07 +000052#define VERSION "1.0.6"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000053
54/* -------------------------------------------------------------------- */
55/* configuration */
56
57/* Leave defined to include the expat-based XMLParser type */
58#define USE_EXPAT
59
Florent Xicluna3e8c1892010-03-11 14:36:19 +000060/* Define to do all expat calls via pyexpat's embedded expat library */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000061/* #define USE_PYEXPAT_CAPI */
62
63/* An element can hold this many children without extra memory
64 allocations. */
65#define STATIC_CHILDREN 4
66
67/* For best performance, chose a value so that 80-90% of all nodes
68 have no more than the given number of children. Set this to zero
69 to minimize the size of the element structure itself (this only
70 helps if you have lots of leaf nodes with attributes). */
71
72/* Also note that pymalloc always allocates blocks in multiples of
73 eight bytes. For the current version of cElementTree, this means
74 that the number of children should be an even number, at least on
75 32-bit platforms. */
76
77/* -------------------------------------------------------------------- */
78
79#if 0
80static int memory = 0;
81#define ALLOC(size, comment)\
82do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
83#define RELEASE(size, comment)\
84do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
85#else
86#define ALLOC(size, comment)
87#define RELEASE(size, comment)
88#endif
89
90/* compiler tweaks */
91#if defined(_MSC_VER)
92#define LOCAL(type) static __inline type __fastcall
93#else
94#define LOCAL(type) static type
95#endif
96
97/* compatibility macros */
Florent Xicluna3e8c1892010-03-11 14:36:19 +000098#if (PY_VERSION_HEX < 0x02060000)
99#define Py_REFCNT(ob) (((PyObject*)(ob))->ob_refcnt)
100#define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)
101#endif
102
Martin v. Löwis18e16552006-02-15 17:27:45 +0000103#if (PY_VERSION_HEX < 0x02050000)
104typedef int Py_ssize_t;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000105#define lenfunc inquiry
Martin v. Löwis18e16552006-02-15 17:27:45 +0000106#endif
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000107
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000108#if (PY_VERSION_HEX < 0x02040000)
109#define PyDict_CheckExact PyDict_Check
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000110
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000111#if !defined(Py_RETURN_NONE)
112#define Py_RETURN_NONE return Py_INCREF(Py_None), Py_None
113#endif
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000114#endif
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000115
116/* macros used to store 'join' flags in string object pointers. note
117 that all use of text and tail as object pointers must be wrapped in
118 JOIN_OBJ. see comments in the ElementObject definition for more
119 info. */
120#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
121#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
122#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~1))
123
124/* glue functions (see the init function for details) */
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000125static PyObject* elementtree_parseerror_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000126static PyObject* elementtree_copyelement_obj;
127static PyObject* elementtree_deepcopy_obj;
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000128static PyObject* elementtree_iter_obj;
129static PyObject* elementtree_itertext_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000130static PyObject* elementpath_obj;
131
132/* helpers */
133
134LOCAL(PyObject*)
135deepcopy(PyObject* object, PyObject* memo)
136{
137 /* do a deep copy of the given object */
138
139 PyObject* args;
140 PyObject* result;
141
142 if (!elementtree_deepcopy_obj) {
143 PyErr_SetString(
144 PyExc_RuntimeError,
145 "deepcopy helper not found"
146 );
147 return NULL;
148 }
149
150 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000151 if (!args)
152 return NULL;
153
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000154 Py_INCREF(object); PyTuple_SET_ITEM(args, 0, (PyObject*) object);
155 Py_INCREF(memo); PyTuple_SET_ITEM(args, 1, (PyObject*) memo);
156
157 result = PyObject_CallObject(elementtree_deepcopy_obj, args);
158
159 Py_DECREF(args);
160
161 return result;
162}
163
164LOCAL(PyObject*)
165list_join(PyObject* list)
166{
Serhiy Storchaka9c2c42c2017-04-02 20:37:03 +0300167 /* join list elements */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000168 PyObject* joiner;
169 PyObject* function;
170 PyObject* args;
171 PyObject* result;
172
173 switch (PyList_GET_SIZE(list)) {
174 case 0:
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000175 return PyString_FromString("");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000176 case 1:
177 result = PyList_GET_ITEM(list, 0);
178 Py_INCREF(result);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000179 return result;
180 }
181
182 /* two or more elements: slice out a suitable separator from the
183 first member, and use that to join the entire list */
184
185 joiner = PySequence_GetSlice(PyList_GET_ITEM(list, 0), 0, 0);
186 if (!joiner)
187 return NULL;
188
189 function = PyObject_GetAttrString(joiner, "join");
190 if (!function) {
191 Py_DECREF(joiner);
192 return NULL;
193 }
194
195 args = PyTuple_New(1);
Serhiy Storchaka9c2c42c2017-04-02 20:37:03 +0300196 if (!args) {
197 Py_DECREF(function);
198 Py_DECREF(joiner);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000199 return NULL;
Serhiy Storchaka9c2c42c2017-04-02 20:37:03 +0300200 }
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000201
Serhiy Storchaka9c2c42c2017-04-02 20:37:03 +0300202 Py_INCREF(list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000203 PyTuple_SET_ITEM(args, 0, list);
204
205 result = PyObject_CallObject(function, args);
206
207 Py_DECREF(args); /* also removes list */
208 Py_DECREF(function);
209 Py_DECREF(joiner);
210
211 return result;
212}
213
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000214/* -------------------------------------------------------------------- */
215/* the element type */
216
217typedef struct {
218
219 /* attributes (a dictionary object), or None if no attributes */
220 PyObject* attrib;
221
222 /* child elements */
223 int length; /* actual number of items */
224 int allocated; /* allocated items */
225
226 /* this either points to _children or to a malloced buffer */
227 PyObject* *children;
228
229 PyObject* _children[STATIC_CHILDREN];
230
231} ElementObjectExtra;
232
233typedef struct {
234 PyObject_HEAD
235
236 /* element tag (a string). */
237 PyObject* tag;
238
239 /* text before first child. note that this is a tagged pointer;
240 use JOIN_OBJ to get the object pointer. the join flag is used
241 to distinguish lists created by the tree builder from lists
242 assigned to the attribute by application code; the former
243 should be joined before being returned to the user, the latter
244 should be left intact. */
245 PyObject* text;
246
247 /* text after this element, in parent. note that this is a tagged
248 pointer; use JOIN_OBJ to get the object pointer. */
249 PyObject* tail;
250
251 ElementObjectExtra* extra;
252
253} ElementObject;
254
255staticforward PyTypeObject Element_Type;
256
Christian Heimese93237d2007-12-19 02:37:44 +0000257#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000258
259/* -------------------------------------------------------------------- */
260/* element constructor and destructor */
261
262LOCAL(int)
263element_new_extra(ElementObject* self, PyObject* attrib)
264{
265 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
266 if (!self->extra)
267 return -1;
268
269 if (!attrib)
270 attrib = Py_None;
271
272 Py_INCREF(attrib);
273 self->extra->attrib = attrib;
274
275 self->extra->length = 0;
276 self->extra->allocated = STATIC_CHILDREN;
277 self->extra->children = self->extra->_children;
278
279 return 0;
280}
281
282LOCAL(void)
283element_dealloc_extra(ElementObject* self)
284{
285 int i;
286
287 Py_DECREF(self->extra->attrib);
288
289 for (i = 0; i < self->extra->length; i++)
290 Py_DECREF(self->extra->children[i]);
291
292 if (self->extra->children != self->extra->_children)
293 PyObject_Free(self->extra->children);
294
295 PyObject_Free(self->extra);
296}
297
298LOCAL(PyObject*)
299element_new(PyObject* tag, PyObject* attrib)
300{
301 ElementObject* self;
302
303 self = PyObject_New(ElementObject, &Element_Type);
304 if (self == NULL)
305 return NULL;
306
307 /* use None for empty dictionaries */
308 if (PyDict_CheckExact(attrib) && !PyDict_Size(attrib))
309 attrib = Py_None;
310
311 self->extra = NULL;
312
313 if (attrib != Py_None) {
314
Neal Norwitzc6a989a2006-05-10 06:57:58 +0000315 if (element_new_extra(self, attrib) < 0) {
316 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000317 return NULL;
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000318 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000319
320 self->extra->length = 0;
321 self->extra->allocated = STATIC_CHILDREN;
322 self->extra->children = self->extra->_children;
323
324 }
325
326 Py_INCREF(tag);
327 self->tag = tag;
328
329 Py_INCREF(Py_None);
330 self->text = Py_None;
331
332 Py_INCREF(Py_None);
333 self->tail = Py_None;
334
335 ALLOC(sizeof(ElementObject), "create element");
336
337 return (PyObject*) self;
338}
339
340LOCAL(int)
Serhiy Storchakac4c64be2015-11-25 20:12:58 +0200341element_resize(ElementObject* self, Py_ssize_t extra)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000342{
Serhiy Storchakac4c64be2015-11-25 20:12:58 +0200343 Py_ssize_t size;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000344 PyObject* *children;
345
346 /* make sure self->children can hold the given number of extra
347 elements. set an exception and return -1 if allocation failed */
348
349 if (!self->extra)
350 element_new_extra(self, NULL);
351
352 size = self->extra->length + extra;
353
354 if (size > self->extra->allocated) {
355 /* use Python 2.4's list growth strategy */
356 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes87dcf3d2008-01-18 08:04:57 +0000357 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
358 * which needs at least 4 bytes.
359 * Although it's a false alarm always assume at least one child to
360 * be safe.
361 */
362 size = size ? size : 1;
Serhiy Storchakac4c64be2015-11-25 20:12:58 +0200363 if ((size_t)size > PY_SSIZE_T_MAX/sizeof(PyObject*))
364 goto nomemory;
365 if (size > INT_MAX) {
366 PyErr_SetString(PyExc_OverflowError,
367 "too many children");
368 return -1;
369 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000370 if (self->extra->children != self->extra->_children) {
Christian Heimes87dcf3d2008-01-18 08:04:57 +0000371 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
372 * "children", which needs at least 4 bytes. Although it's a
373 * false alarm always assume at least one child to be safe.
374 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000375 children = PyObject_Realloc(self->extra->children,
376 size * sizeof(PyObject*));
377 if (!children)
378 goto nomemory;
379 } else {
380 children = PyObject_Malloc(size * sizeof(PyObject*));
381 if (!children)
382 goto nomemory;
383 /* copy existing children from static area to malloc buffer */
384 memcpy(children, self->extra->children,
385 self->extra->length * sizeof(PyObject*));
386 }
387 self->extra->children = children;
388 self->extra->allocated = size;
389 }
390
391 return 0;
392
393 nomemory:
394 PyErr_NoMemory();
395 return -1;
396}
397
398LOCAL(int)
399element_add_subelement(ElementObject* self, PyObject* element)
400{
401 /* add a child element to a parent */
402
403 if (element_resize(self, 1) < 0)
404 return -1;
405
406 Py_INCREF(element);
407 self->extra->children[self->extra->length] = element;
408
409 self->extra->length++;
410
411 return 0;
412}
413
414LOCAL(PyObject*)
415element_get_attrib(ElementObject* self)
416{
417 /* return borrowed reference to attrib dictionary */
418 /* note: this function assumes that the extra section exists */
419
420 PyObject* res = self->extra->attrib;
421
422 if (res == Py_None) {
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000423 Py_DECREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000424 /* create missing dictionary */
425 res = PyDict_New();
426 if (!res)
427 return NULL;
428 self->extra->attrib = res;
429 }
430
431 return res;
432}
433
434LOCAL(PyObject*)
435element_get_text(ElementObject* self)
436{
437 /* return borrowed reference to text attribute */
438
Serhiy Storchaka9c2c42c2017-04-02 20:37:03 +0300439 PyObject *res = self->text;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000440
441 if (JOIN_GET(res)) {
442 res = JOIN_OBJ(res);
443 if (PyList_CheckExact(res)) {
Serhiy Storchaka9c2c42c2017-04-02 20:37:03 +0300444 PyObject *tmp = list_join(res);
445 if (!tmp)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000446 return NULL;
Serhiy Storchaka9c2c42c2017-04-02 20:37:03 +0300447 self->text = tmp;
448 Py_DECREF(res);
449 res = tmp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000450 }
451 }
452
453 return res;
454}
455
456LOCAL(PyObject*)
457element_get_tail(ElementObject* self)
458{
459 /* return borrowed reference to text attribute */
460
Serhiy Storchaka9c2c42c2017-04-02 20:37:03 +0300461 PyObject *res = self->tail;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000462
463 if (JOIN_GET(res)) {
464 res = JOIN_OBJ(res);
465 if (PyList_CheckExact(res)) {
Serhiy Storchaka9c2c42c2017-04-02 20:37:03 +0300466 PyObject *tmp = list_join(res);
467 if (!tmp)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000468 return NULL;
Serhiy Storchaka9c2c42c2017-04-02 20:37:03 +0300469 self->tail = tmp;
470 Py_DECREF(res);
471 res = tmp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000472 }
473 }
474
475 return res;
476}
477
478static PyObject*
479element(PyObject* self, PyObject* args, PyObject* kw)
480{
481 PyObject* elem;
482
483 PyObject* tag;
484 PyObject* attrib = NULL;
485 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag,
486 &PyDict_Type, &attrib))
487 return NULL;
488
489 if (attrib || kw) {
490 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
491 if (!attrib)
492 return NULL;
493 if (kw)
494 PyDict_Update(attrib, kw);
495 } else {
496 Py_INCREF(Py_None);
497 attrib = Py_None;
498 }
499
500 elem = element_new(tag, attrib);
501
502 Py_DECREF(attrib);
503
504 return elem;
505}
506
507static PyObject*
508subelement(PyObject* self, PyObject* args, PyObject* kw)
509{
510 PyObject* elem;
511
512 ElementObject* parent;
513 PyObject* tag;
514 PyObject* attrib = NULL;
515 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
516 &Element_Type, &parent, &tag,
517 &PyDict_Type, &attrib))
518 return NULL;
519
520 if (attrib || kw) {
521 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
522 if (!attrib)
523 return NULL;
524 if (kw)
525 PyDict_Update(attrib, kw);
526 } else {
527 Py_INCREF(Py_None);
528 attrib = Py_None;
529 }
530
531 elem = element_new(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000532 Py_DECREF(attrib);
Xiang Zhang9c0408d2017-03-22 14:32:52 +0800533 if (elem == NULL)
534 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000535
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000536 if (element_add_subelement(parent, elem) < 0) {
537 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000538 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000539 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000540
541 return elem;
542}
543
544static void
545element_dealloc(ElementObject* self)
546{
Serhiy Storchaka14518742016-12-28 09:23:17 +0200547 if (self->extra)
548 element_dealloc_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000549
550 /* discard attributes */
551 Py_DECREF(self->tag);
Serhiy Storchaka14518742016-12-28 09:23:17 +0200552 Py_DECREF(JOIN_OBJ(self->text));
553 Py_DECREF(JOIN_OBJ(self->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000554
555 RELEASE(sizeof(ElementObject), "destroy element");
556
557 PyObject_Del(self);
558}
559
560/* -------------------------------------------------------------------- */
561/* methods (in alphabetical order) */
562
563static PyObject*
564element_append(ElementObject* self, PyObject* args)
565{
566 PyObject* element;
567 if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element))
568 return NULL;
569
570 if (element_add_subelement(self, element) < 0)
571 return NULL;
572
573 Py_RETURN_NONE;
574}
575
576static PyObject*
577element_clear(ElementObject* self, PyObject* args)
578{
579 if (!PyArg_ParseTuple(args, ":clear"))
580 return NULL;
581
582 if (self->extra) {
583 element_dealloc_extra(self);
584 self->extra = NULL;
585 }
586
587 Py_INCREF(Py_None);
588 Py_DECREF(JOIN_OBJ(self->text));
589 self->text = Py_None;
590
591 Py_INCREF(Py_None);
592 Py_DECREF(JOIN_OBJ(self->tail));
593 self->tail = Py_None;
594
595 Py_RETURN_NONE;
596}
597
598static PyObject*
599element_copy(ElementObject* self, PyObject* args)
600{
601 int i;
602 ElementObject* element;
603
604 if (!PyArg_ParseTuple(args, ":__copy__"))
605 return NULL;
606
607 element = (ElementObject*) element_new(
608 self->tag, (self->extra) ? self->extra->attrib : Py_None
609 );
610 if (!element)
611 return NULL;
612
613 Py_DECREF(JOIN_OBJ(element->text));
614 element->text = self->text;
615 Py_INCREF(JOIN_OBJ(element->text));
616
617 Py_DECREF(JOIN_OBJ(element->tail));
618 element->tail = self->tail;
619 Py_INCREF(JOIN_OBJ(element->tail));
620
621 if (self->extra) {
622
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000623 if (element_resize(element, self->extra->length) < 0) {
624 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000625 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000626 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000627
628 for (i = 0; i < self->extra->length; i++) {
629 Py_INCREF(self->extra->children[i]);
630 element->extra->children[i] = self->extra->children[i];
631 }
632
633 element->extra->length = self->extra->length;
634
635 }
636
637 return (PyObject*) element;
638}
639
640static PyObject*
641element_deepcopy(ElementObject* self, PyObject* args)
642{
643 int i;
644 ElementObject* element;
645 PyObject* tag;
646 PyObject* attrib;
647 PyObject* text;
648 PyObject* tail;
649 PyObject* id;
650
651 PyObject* memo;
652 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
653 return NULL;
654
655 tag = deepcopy(self->tag, memo);
656 if (!tag)
657 return NULL;
658
659 if (self->extra) {
660 attrib = deepcopy(self->extra->attrib, memo);
661 if (!attrib) {
662 Py_DECREF(tag);
663 return NULL;
664 }
665 } else {
666 Py_INCREF(Py_None);
667 attrib = Py_None;
668 }
669
670 element = (ElementObject*) element_new(tag, attrib);
671
672 Py_DECREF(tag);
673 Py_DECREF(attrib);
674
675 if (!element)
676 return NULL;
677
678 text = deepcopy(JOIN_OBJ(self->text), memo);
679 if (!text)
680 goto error;
681 Py_DECREF(element->text);
682 element->text = JOIN_SET(text, JOIN_GET(self->text));
683
684 tail = deepcopy(JOIN_OBJ(self->tail), memo);
685 if (!tail)
686 goto error;
687 Py_DECREF(element->tail);
688 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
689
690 if (self->extra) {
691
692 if (element_resize(element, self->extra->length) < 0)
693 goto error;
694
695 for (i = 0; i < self->extra->length; i++) {
696 PyObject* child = deepcopy(self->extra->children[i], memo);
697 if (!child) {
698 element->extra->length = i;
699 goto error;
700 }
701 element->extra->children[i] = child;
702 }
703
704 element->extra->length = self->extra->length;
705
706 }
707
708 /* add object to memo dictionary (so deepcopy won't visit it again) */
709 id = PyInt_FromLong((Py_uintptr_t) self);
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000710 if (!id)
711 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000712
713 i = PyDict_SetItem(memo, id, (PyObject*) element);
714
715 Py_DECREF(id);
716
717 if (i < 0)
718 goto error;
719
720 return (PyObject*) element;
721
722 error:
723 Py_DECREF(element);
724 return NULL;
725}
726
727LOCAL(int)
728checkpath(PyObject* tag)
729{
Neal Norwitzc7074382006-06-12 02:06:17 +0000730 Py_ssize_t i;
731 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000732
733 /* check if a tag contains an xpath character */
734
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000735#define PATHCHAR(ch) \
736 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000737
738#if defined(Py_USING_UNICODE)
739 if (PyUnicode_Check(tag)) {
740 Py_UNICODE *p = PyUnicode_AS_UNICODE(tag);
741 for (i = 0; i < PyUnicode_GET_SIZE(tag); i++) {
742 if (p[i] == '{')
743 check = 0;
744 else if (p[i] == '}')
745 check = 1;
746 else if (check && PATHCHAR(p[i]))
747 return 1;
748 }
749 return 0;
750 }
751#endif
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000752 if (PyString_Check(tag)) {
753 char *p = PyString_AS_STRING(tag);
754 for (i = 0; i < PyString_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000755 if (p[i] == '{')
756 check = 0;
757 else if (p[i] == '}')
758 check = 1;
759 else if (check && PATHCHAR(p[i]))
760 return 1;
761 }
762 return 0;
763 }
764
765 return 1; /* unknown type; might be path expression */
766}
767
768static PyObject*
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000769element_extend(ElementObject* self, PyObject* args)
770{
771 PyObject* seq;
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300772 Py_ssize_t i;
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000773
774 PyObject* seq_in;
775 if (!PyArg_ParseTuple(args, "O:extend", &seq_in))
776 return NULL;
777
778 seq = PySequence_Fast(seq_in, "");
779 if (!seq) {
780 PyErr_Format(
781 PyExc_TypeError,
782 "expected sequence, not \"%.200s\"", Py_TYPE(seq_in)->tp_name
783 );
784 return NULL;
785 }
786
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300787 for (i = 0; i < PySequence_Fast_GET_SIZE(seq); i++) {
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000788 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
789 if (element_add_subelement(self, element) < 0) {
790 Py_DECREF(seq);
791 return NULL;
792 }
793 }
794
795 Py_DECREF(seq);
796
797 Py_RETURN_NONE;
798}
799
800static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000801element_find(ElementObject* self, PyObject* args)
802{
803 int i;
804
805 PyObject* tag;
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000806 PyObject* namespaces = Py_None;
807 if (!PyArg_ParseTuple(args, "O|O:find", &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000808 return NULL;
809
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000810 if (checkpath(tag) || namespaces != Py_None)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000811 return PyObject_CallMethod(
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000812 elementpath_obj, "find", "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000813 );
814
815 if (!self->extra)
816 Py_RETURN_NONE;
817
818 for (i = 0; i < self->extra->length; i++) {
819 PyObject* item = self->extra->children[i];
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300820 int rc;
821 if (!Element_CheckExact(item))
822 continue;
823 Py_INCREF(item);
824 rc = PyObject_Compare(((ElementObject*)item)->tag, tag);
825 if (rc == 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000826 return item;
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300827 Py_DECREF(item);
828 if (rc < 0 && PyErr_Occurred())
829 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000830 }
831
832 Py_RETURN_NONE;
833}
834
835static PyObject*
836element_findtext(ElementObject* self, PyObject* args)
837{
838 int i;
839
840 PyObject* tag;
841 PyObject* default_value = Py_None;
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000842 PyObject* namespaces = Py_None;
843 if (!PyArg_ParseTuple(args, "O|OO:findtext", &tag, &default_value, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000844 return NULL;
845
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000846 if (checkpath(tag) || namespaces != Py_None)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000847 return PyObject_CallMethod(
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000848 elementpath_obj, "findtext", "OOOO", self, tag, default_value, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000849 );
850
851 if (!self->extra) {
852 Py_INCREF(default_value);
853 return default_value;
854 }
855
856 for (i = 0; i < self->extra->length; i++) {
857 ElementObject* item = (ElementObject*) self->extra->children[i];
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300858 int rc;
859 if (!Element_CheckExact(item))
860 continue;
861 Py_INCREF(item);
862 rc = PyObject_Compare(item->tag, tag);
863 if (rc == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000864 PyObject* text = element_get_text(item);
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300865 if (text == Py_None) {
866 Py_DECREF(item);
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000867 return PyString_FromString("");
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300868 }
Neal Norwitz6f5ff3f2006-08-12 01:43:40 +0000869 Py_XINCREF(text);
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300870 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000871 return text;
872 }
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300873 Py_DECREF(item);
874 if (rc < 0 && PyErr_Occurred())
875 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000876 }
877
878 Py_INCREF(default_value);
879 return default_value;
880}
881
882static PyObject*
883element_findall(ElementObject* self, PyObject* args)
884{
885 int i;
886 PyObject* out;
887
888 PyObject* tag;
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000889 PyObject* namespaces = Py_None;
890 if (!PyArg_ParseTuple(args, "O|O:findall", &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000891 return NULL;
892
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000893 if (checkpath(tag) || namespaces != Py_None)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000894 return PyObject_CallMethod(
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000895 elementpath_obj, "findall", "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000896 );
897
898 out = PyList_New(0);
899 if (!out)
900 return NULL;
901
902 if (!self->extra)
903 return out;
904
905 for (i = 0; i < self->extra->length; i++) {
906 PyObject* item = self->extra->children[i];
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300907 int rc;
908 if (!Element_CheckExact(item))
909 continue;
910 Py_INCREF(item);
911 rc = PyObject_Compare(((ElementObject*)item)->tag, tag);
912 if (rc == 0)
913 rc = PyList_Append(out, item);
914 Py_DECREF(item);
915 if (rc < 0 && PyErr_Occurred()) {
916 Py_DECREF(out);
917 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000918 }
919 }
920
921 return out;
922}
923
924static PyObject*
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000925element_iterfind(ElementObject* self, PyObject* args)
926{
927 PyObject* tag;
928 PyObject* namespaces = Py_None;
929 if (!PyArg_ParseTuple(args, "O|O:iterfind", &tag, &namespaces))
930 return NULL;
931
932 return PyObject_CallMethod(
933 elementpath_obj, "iterfind", "OOO", self, tag, namespaces
934 );
935}
936
937static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000938element_get(ElementObject* self, PyObject* args)
939{
940 PyObject* value;
941
942 PyObject* key;
943 PyObject* default_value = Py_None;
944 if (!PyArg_ParseTuple(args, "O|O:get", &key, &default_value))
945 return NULL;
946
947 if (!self->extra || self->extra->attrib == Py_None)
948 value = default_value;
949 else {
950 value = PyDict_GetItem(self->extra->attrib, key);
951 if (!value)
952 value = default_value;
953 }
954
955 Py_INCREF(value);
956 return value;
957}
958
959static PyObject*
960element_getchildren(ElementObject* self, PyObject* args)
961{
962 int i;
963 PyObject* list;
964
Serhiy Storchaka09b52472017-05-17 10:08:11 +0300965 if (PyErr_WarnPy3k("This method will be removed in future versions. "
966 "Use 'list(elem)' or iteration over elem instead.",
967 1) < 0) {
968 return NULL;
969 }
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000970
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000971 if (!PyArg_ParseTuple(args, ":getchildren"))
972 return NULL;
973
974 if (!self->extra)
975 return PyList_New(0);
976
977 list = PyList_New(self->extra->length);
978 if (!list)
979 return NULL;
980
981 for (i = 0; i < self->extra->length; i++) {
982 PyObject* item = self->extra->children[i];
983 Py_INCREF(item);
984 PyList_SET_ITEM(list, i, item);
985 }
986
987 return list;
988}
989
990static PyObject*
Serhiy Storchaka09b52472017-05-17 10:08:11 +0300991element_iter_impl(ElementObject* self, PyObject* tag)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000992{
Serhiy Storchaka09b52472017-05-17 10:08:11 +0300993 PyObject* args;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000994 PyObject* result;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000995
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000996 if (!elementtree_iter_obj) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000997 PyErr_SetString(
998 PyExc_RuntimeError,
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000999 "iter helper not found"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001000 );
1001 return NULL;
1002 }
1003
1004 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001005 if (!args)
1006 return NULL;
Neal Norwitz02876df2006-02-07 06:58:52 +00001007
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001008 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
1009 Py_INCREF(tag); PyTuple_SET_ITEM(args, 1, (PyObject*) tag);
1010
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001011 result = PyObject_CallObject(elementtree_iter_obj, args);
1012
1013 Py_DECREF(args);
1014
1015 return result;
1016}
1017
Serhiy Storchaka09b52472017-05-17 10:08:11 +03001018static PyObject*
1019element_iter(ElementObject* self, PyObject* args)
1020{
1021 PyObject* tag = Py_None;
1022 if (!PyArg_ParseTuple(args, "|O:iter", &tag))
1023 return NULL;
1024
1025 return element_iter_impl(self, tag);
1026}
1027
1028static PyObject*
1029element_getiterator(ElementObject* self, PyObject* args)
1030{
1031 PyObject* tag = Py_None;
1032 if (!PyArg_ParseTuple(args, "|O:getiterator", &tag))
1033 return NULL;
1034
1035 /* Change for a DeprecationWarning in 1.4 */
1036 if (Py_Py3kWarningFlag &&
1037 PyErr_WarnEx(PyExc_PendingDeprecationWarning,
1038 "This method will be removed in future versions. "
1039 "Use 'tree.iter()' or 'list(tree.iter())' instead.",
1040 1) < 0) {
1041 return NULL;
1042 }
1043 return element_iter_impl(self, tag);
1044}
1045
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001046
1047static PyObject*
1048element_itertext(ElementObject* self, PyObject* args)
1049{
1050 PyObject* result;
1051
1052 if (!PyArg_ParseTuple(args, ":itertext"))
1053 return NULL;
1054
1055 if (!elementtree_itertext_obj) {
1056 PyErr_SetString(
1057 PyExc_RuntimeError,
1058 "itertext helper not found"
1059 );
1060 return NULL;
1061 }
1062
1063 args = PyTuple_New(1);
1064 if (!args)
1065 return NULL;
1066
1067 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
1068
1069 result = PyObject_CallObject(elementtree_itertext_obj, args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001070
1071 Py_DECREF(args);
1072
1073 return result;
1074}
1075
1076static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001077element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001078{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001079 ElementObject* self = (ElementObject*) self_;
1080
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001081 if (!self->extra || index < 0 || index >= self->extra->length) {
1082 PyErr_SetString(
1083 PyExc_IndexError,
1084 "child index out of range"
1085 );
1086 return NULL;
1087 }
1088
1089 Py_INCREF(self->extra->children[index]);
1090 return self->extra->children[index];
1091}
1092
1093static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001094element_insert(ElementObject* self, PyObject* args)
1095{
1096 int i;
1097
1098 int index;
1099 PyObject* element;
1100 if (!PyArg_ParseTuple(args, "iO!:insert", &index,
1101 &Element_Type, &element))
1102 return NULL;
1103
1104 if (!self->extra)
1105 element_new_extra(self, NULL);
1106
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001107 if (index < 0) {
1108 index += self->extra->length;
1109 if (index < 0)
1110 index = 0;
1111 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001112 if (index > self->extra->length)
1113 index = self->extra->length;
1114
1115 if (element_resize(self, 1) < 0)
1116 return NULL;
1117
1118 for (i = self->extra->length; i > index; i--)
1119 self->extra->children[i] = self->extra->children[i-1];
1120
1121 Py_INCREF(element);
1122 self->extra->children[index] = element;
1123
1124 self->extra->length++;
1125
1126 Py_RETURN_NONE;
1127}
1128
1129static PyObject*
1130element_items(ElementObject* self, PyObject* args)
1131{
1132 if (!PyArg_ParseTuple(args, ":items"))
1133 return NULL;
1134
1135 if (!self->extra || self->extra->attrib == Py_None)
1136 return PyList_New(0);
1137
1138 return PyDict_Items(self->extra->attrib);
1139}
1140
1141static PyObject*
1142element_keys(ElementObject* self, PyObject* args)
1143{
1144 if (!PyArg_ParseTuple(args, ":keys"))
1145 return NULL;
1146
1147 if (!self->extra || self->extra->attrib == Py_None)
1148 return PyList_New(0);
1149
1150 return PyDict_Keys(self->extra->attrib);
1151}
1152
Martin v. Löwis18e16552006-02-15 17:27:45 +00001153static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001154element_length(ElementObject* self)
1155{
1156 if (!self->extra)
1157 return 0;
1158
1159 return self->extra->length;
1160}
1161
1162static PyObject*
1163element_makeelement(PyObject* self, PyObject* args, PyObject* kw)
1164{
1165 PyObject* elem;
1166
1167 PyObject* tag;
1168 PyObject* attrib;
1169 if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib))
1170 return NULL;
1171
1172 attrib = PyDict_Copy(attrib);
1173 if (!attrib)
1174 return NULL;
1175
1176 elem = element_new(tag, attrib);
1177
1178 Py_DECREF(attrib);
1179
1180 return elem;
1181}
1182
1183static PyObject*
1184element_reduce(ElementObject* self, PyObject* args)
1185{
1186 if (!PyArg_ParseTuple(args, ":__reduce__"))
1187 return NULL;
1188
1189 /* Hack alert: This method is used to work around a __copy__
1190 problem on certain 2.3 and 2.4 versions. To save time and
1191 simplify the code, we create the copy in here, and use a dummy
1192 copyelement helper to trick the copy module into doing the
1193 right thing. */
1194
1195 if (!elementtree_copyelement_obj) {
1196 PyErr_SetString(
1197 PyExc_RuntimeError,
1198 "copyelement helper not found"
1199 );
1200 return NULL;
1201 }
1202
1203 return Py_BuildValue(
1204 "O(N)", elementtree_copyelement_obj, element_copy(self, args)
1205 );
1206}
1207
1208static PyObject*
1209element_remove(ElementObject* self, PyObject* args)
1210{
1211 int i;
Serhiy Storchaka25598f32015-05-18 18:28:57 +03001212 int rc;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001213 PyObject* element;
Serhiy Storchaka25598f32015-05-18 18:28:57 +03001214 PyObject* found;
1215
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001216 if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element))
1217 return NULL;
1218
1219 if (!self->extra) {
1220 /* element has no children, so raise exception */
1221 PyErr_SetString(
1222 PyExc_ValueError,
1223 "list.remove(x): x not in list"
1224 );
1225 return NULL;
1226 }
1227
1228 for (i = 0; i < self->extra->length; i++) {
1229 if (self->extra->children[i] == element)
1230 break;
Serhiy Storchaka25598f32015-05-18 18:28:57 +03001231 rc = PyObject_Compare(self->extra->children[i], element);
1232 if (rc == 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001233 break;
Serhiy Storchaka25598f32015-05-18 18:28:57 +03001234 if (rc < 0 && PyErr_Occurred())
1235 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001236 }
1237
Serhiy Storchaka25598f32015-05-18 18:28:57 +03001238 if (i >= self->extra->length) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001239 /* element is not in children, so raise exception */
1240 PyErr_SetString(
1241 PyExc_ValueError,
1242 "list.remove(x): x not in list"
1243 );
1244 return NULL;
1245 }
1246
Serhiy Storchaka25598f32015-05-18 18:28:57 +03001247 found = self->extra->children[i];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001248
1249 self->extra->length--;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001250 for (; i < self->extra->length; i++)
1251 self->extra->children[i] = self->extra->children[i+1];
1252
Serhiy Storchaka25598f32015-05-18 18:28:57 +03001253 Py_DECREF(found);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001254 Py_RETURN_NONE;
1255}
1256
1257static PyObject*
1258element_repr(ElementObject* self)
1259{
Serhiy Storchaka1f7586e2016-06-12 10:06:32 +03001260 int status;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001261
Serhiy Storchaka1f7586e2016-06-12 10:06:32 +03001262 if (self->tag == NULL)
1263 return PyUnicode_FromFormat("<Element at %p>", self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001264
Serhiy Storchaka1f7586e2016-06-12 10:06:32 +03001265 status = Py_ReprEnter((PyObject *)self);
1266 if (status == 0) {
1267 PyObject *repr, *tag;
1268 tag = PyObject_Repr(self->tag);
Stéphane Wirtel41af9422017-06-12 15:30:48 +02001269 if (!tag) {
1270 Py_ReprLeave((PyObject *)self);
Serhiy Storchaka1f7586e2016-06-12 10:06:32 +03001271 return NULL;
Stéphane Wirtel41af9422017-06-12 15:30:48 +02001272 }
Florent Xiclunae2e81e82010-03-11 15:55:11 +00001273
Serhiy Storchaka1f7586e2016-06-12 10:06:32 +03001274 repr = PyString_FromFormat("<Element %s at %p>",
1275 PyString_AS_STRING(tag), self);
Benjamin Petersond7324bc2016-12-03 11:30:04 -08001276 Py_ReprLeave((PyObject *)self);
Serhiy Storchaka1f7586e2016-06-12 10:06:32 +03001277 Py_DECREF(tag);
1278 return repr;
1279 }
1280 if (status > 0)
1281 PyErr_Format(PyExc_RuntimeError,
1282 "reentrant call inside %s.__repr__",
1283 Py_TYPE(self)->tp_name);
1284 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001285}
1286
1287static PyObject*
1288element_set(ElementObject* self, PyObject* args)
1289{
1290 PyObject* attrib;
1291
1292 PyObject* key;
1293 PyObject* value;
1294 if (!PyArg_ParseTuple(args, "OO:set", &key, &value))
1295 return NULL;
1296
1297 if (!self->extra)
1298 element_new_extra(self, NULL);
1299
1300 attrib = element_get_attrib(self);
1301 if (!attrib)
1302 return NULL;
1303
1304 if (PyDict_SetItem(attrib, key, value) < 0)
1305 return NULL;
1306
1307 Py_RETURN_NONE;
1308}
1309
1310static int
Serhiy Storchakab5b76c32015-11-26 11:21:47 +02001311element_setitem(PyObject* self_, Py_ssize_t index_, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001312{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001313 ElementObject* self = (ElementObject*) self_;
Serhiy Storchakac4c64be2015-11-25 20:12:58 +02001314 int i, index;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001315 PyObject* old;
1316
Serhiy Storchakac4c64be2015-11-25 20:12:58 +02001317 if (!self->extra || index_ < 0 || index_ >= self->extra->length) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001318 PyErr_SetString(
1319 PyExc_IndexError,
1320 "child assignment index out of range");
1321 return -1;
1322 }
Serhiy Storchakac4c64be2015-11-25 20:12:58 +02001323 index = (int)index_;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001324
1325 old = self->extra->children[index];
1326
1327 if (item) {
1328 Py_INCREF(item);
1329 self->extra->children[index] = item;
1330 } else {
1331 self->extra->length--;
1332 for (i = index; i < self->extra->length; i++)
1333 self->extra->children[i] = self->extra->children[i+1];
1334 }
1335
1336 Py_DECREF(old);
1337
1338 return 0;
1339}
1340
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001341static PyObject*
1342element_subscr(PyObject* self_, PyObject* item)
1343{
1344 ElementObject* self = (ElementObject*) self_;
1345
1346#if (PY_VERSION_HEX < 0x02050000)
1347 if (PyInt_Check(item) || PyLong_Check(item)) {
1348 long i = PyInt_AsLong(item);
1349#else
1350 if (PyIndex_Check(item)) {
1351 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1352#endif
1353
1354 if (i == -1 && PyErr_Occurred()) {
1355 return NULL;
1356 }
1357 if (i < 0 && self->extra)
1358 i += self->extra->length;
1359 return element_getitem(self_, i);
1360 }
1361 else if (PySlice_Check(item)) {
1362 Py_ssize_t start, stop, step, slicelen, cur, i;
1363 PyObject* list;
1364
1365 if (!self->extra)
1366 return PyList_New(0);
1367
Serhiy Storchaka5e793212017-04-15 20:11:12 +03001368 if (_PySlice_Unpack(item, &start, &stop, &step) < 0) {
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001369 return NULL;
1370 }
Serhiy Storchakae41390a2017-04-08 11:48:57 +03001371 slicelen = _PySlice_AdjustIndices(self->extra->length, &start, &stop,
1372 step);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001373
1374 if (slicelen <= 0)
1375 return PyList_New(0);
1376 else {
1377 list = PyList_New(slicelen);
1378 if (!list)
1379 return NULL;
1380
1381 for (cur = start, i = 0; i < slicelen;
1382 cur += step, i++) {
1383 PyObject* item = self->extra->children[cur];
1384 Py_INCREF(item);
1385 PyList_SET_ITEM(list, i, item);
1386 }
1387
1388 return list;
1389 }
1390 }
1391 else {
1392 PyErr_SetString(PyExc_TypeError,
1393 "element indices must be integers");
1394 return NULL;
1395 }
1396}
1397
1398static int
1399element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1400{
1401 ElementObject* self = (ElementObject*) self_;
1402
1403#if (PY_VERSION_HEX < 0x02050000)
1404 if (PyInt_Check(item) || PyLong_Check(item)) {
1405 long i = PyInt_AsLong(item);
1406#else
1407 if (PyIndex_Check(item)) {
1408 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1409#endif
1410
1411 if (i == -1 && PyErr_Occurred()) {
1412 return -1;
1413 }
1414 if (i < 0 && self->extra)
1415 i += self->extra->length;
1416 return element_setitem(self_, i, value);
1417 }
1418 else if (PySlice_Check(item)) {
1419 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1420
1421 PyObject* recycle = NULL;
1422 PyObject* seq = NULL;
1423
1424 if (!self->extra)
1425 element_new_extra(self, NULL);
1426
Serhiy Storchaka5e793212017-04-15 20:11:12 +03001427 if (_PySlice_Unpack(item, &start, &stop, &step) < 0) {
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001428 return -1;
1429 }
Serhiy Storchakae41390a2017-04-08 11:48:57 +03001430 slicelen = _PySlice_AdjustIndices(self->extra->length, &start, &stop,
1431 step);
Serhiy Storchakac4c64be2015-11-25 20:12:58 +02001432 assert(slicelen <= self->extra->length);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001433
1434 if (value == NULL)
1435 newlen = 0;
1436 else {
1437 seq = PySequence_Fast(value, "");
1438 if (!seq) {
1439 PyErr_Format(
1440 PyExc_TypeError,
1441 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1442 );
1443 return -1;
1444 }
1445 newlen = PySequence_Size(seq);
1446 }
1447
1448 if (step != 1 && newlen != slicelen)
1449 {
Serhiy Storchakaa0ae9ff2015-11-22 12:31:11 +02001450 Py_XDECREF(seq);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001451 PyErr_Format(PyExc_ValueError,
1452#if (PY_VERSION_HEX < 0x02050000)
1453 "attempt to assign sequence of size %d "
1454 "to extended slice of size %d",
Serhiy Storchakaa0ae9ff2015-11-22 12:31:11 +02001455 (int)newlen, (int)slicelen
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001456#else
1457 "attempt to assign sequence of size %zd "
1458 "to extended slice of size %zd",
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001459 newlen, slicelen
Serhiy Storchakaa0ae9ff2015-11-22 12:31:11 +02001460#endif
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001461 );
1462 return -1;
1463 }
1464
1465
1466 /* Resize before creating the recycle bin, to prevent refleaks. */
1467 if (newlen > slicelen) {
1468 if (element_resize(self, newlen - slicelen) < 0) {
Serhiy Storchakaa0ae9ff2015-11-22 12:31:11 +02001469 Py_XDECREF(seq);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001470 return -1;
1471 }
1472 }
Serhiy Storchakac4c64be2015-11-25 20:12:58 +02001473 assert(newlen - slicelen <= INT_MAX - self->extra->length);
1474 assert(newlen - slicelen >= -self->extra->length);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001475
1476 if (slicelen > 0) {
1477 /* to avoid recursive calls to this method (via decref), move
1478 old items to the recycle bin here, and get rid of them when
1479 we're done modifying the element */
1480 recycle = PyList_New(slicelen);
1481 if (!recycle) {
Serhiy Storchakaa0ae9ff2015-11-22 12:31:11 +02001482 Py_XDECREF(seq);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001483 return -1;
1484 }
1485 for (cur = start, i = 0; i < slicelen;
1486 cur += step, i++)
1487 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1488 }
1489
1490 if (newlen < slicelen) {
1491 /* delete slice */
1492 for (i = stop; i < self->extra->length; i++)
1493 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1494 } else if (newlen > slicelen) {
1495 /* insert slice */
1496 for (i = self->extra->length-1; i >= stop; i--)
1497 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1498 }
1499
1500 /* replace the slice */
1501 for (cur = start, i = 0; i < newlen;
1502 cur += step, i++) {
1503 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1504 Py_INCREF(element);
1505 self->extra->children[cur] = element;
1506 }
1507
Serhiy Storchakac4c64be2015-11-25 20:12:58 +02001508 self->extra->length += (int)(newlen - slicelen);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001509
Serhiy Storchakaa0ae9ff2015-11-22 12:31:11 +02001510 Py_XDECREF(seq);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001511
1512 /* discard the recycle bin, and everything in it */
1513 Py_XDECREF(recycle);
1514
1515 return 0;
1516 }
1517 else {
1518 PyErr_SetString(PyExc_TypeError,
1519 "element indices must be integers");
1520 return -1;
1521 }
1522}
1523
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001524static PyMethodDef element_methods[] = {
1525
1526 {"clear", (PyCFunction) element_clear, METH_VARARGS},
1527
1528 {"get", (PyCFunction) element_get, METH_VARARGS},
1529 {"set", (PyCFunction) element_set, METH_VARARGS},
1530
1531 {"find", (PyCFunction) element_find, METH_VARARGS},
1532 {"findtext", (PyCFunction) element_findtext, METH_VARARGS},
1533 {"findall", (PyCFunction) element_findall, METH_VARARGS},
1534
1535 {"append", (PyCFunction) element_append, METH_VARARGS},
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001536 {"extend", (PyCFunction) element_extend, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001537 {"insert", (PyCFunction) element_insert, METH_VARARGS},
1538 {"remove", (PyCFunction) element_remove, METH_VARARGS},
1539
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001540 {"iter", (PyCFunction) element_iter, METH_VARARGS},
1541 {"itertext", (PyCFunction) element_itertext, METH_VARARGS},
1542 {"iterfind", (PyCFunction) element_iterfind, METH_VARARGS},
1543
Serhiy Storchaka09b52472017-05-17 10:08:11 +03001544 {"getiterator", (PyCFunction) element_getiterator, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001545 {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS},
1546
1547 {"items", (PyCFunction) element_items, METH_VARARGS},
1548 {"keys", (PyCFunction) element_keys, METH_VARARGS},
1549
1550 {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS},
1551
1552 {"__copy__", (PyCFunction) element_copy, METH_VARARGS},
1553 {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS},
1554
1555 /* Some 2.3 and 2.4 versions do not handle the __copy__ method on
1556 C objects correctly, so we have to fake it using a __reduce__-
1557 based hack (see the element_reduce implementation above for
1558 details). */
1559
1560 /* The behaviour has been changed in 2.3.5 and 2.4.1, so we're
1561 using a runtime test to figure out if we need to fake things
1562 or now (see the init code below). The following entry is
1563 enabled only if the hack is needed. */
1564
1565 {"!__reduce__", (PyCFunction) element_reduce, METH_VARARGS},
1566
1567 {NULL, NULL}
1568};
1569
1570static PyObject*
1571element_getattr(ElementObject* self, char* name)
1572{
1573 PyObject* res;
1574
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001575 /* handle common attributes first */
1576 if (strcmp(name, "tag") == 0) {
1577 res = self->tag;
1578 Py_INCREF(res);
1579 return res;
1580 } else if (strcmp(name, "text") == 0) {
1581 res = element_get_text(self);
Xiang Zhang827c7832017-03-22 12:25:51 +08001582 Py_XINCREF(res);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001583 return res;
1584 }
1585
1586 /* methods */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001587 res = Py_FindMethod(element_methods, (PyObject*) self, name);
1588 if (res)
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001589 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001590
1591 PyErr_Clear();
1592
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001593 /* less common attributes */
1594 if (strcmp(name, "tail") == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001595 res = element_get_tail(self);
1596 } else if (strcmp(name, "attrib") == 0) {
1597 if (!self->extra)
1598 element_new_extra(self, NULL);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001599 res = element_get_attrib(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001600 } else {
1601 PyErr_SetString(PyExc_AttributeError, name);
1602 return NULL;
1603 }
1604
1605 if (!res)
1606 return NULL;
1607
1608 Py_INCREF(res);
1609 return res;
1610}
1611
1612static int
1613element_setattr(ElementObject* self, const char* name, PyObject* value)
1614{
1615 if (value == NULL) {
1616 PyErr_SetString(
1617 PyExc_AttributeError,
1618 "can't delete element attributes"
1619 );
1620 return -1;
1621 }
1622
1623 if (strcmp(name, "tag") == 0) {
Serhiy Storchaka2e6c8292015-12-27 15:41:58 +02001624 Py_INCREF(value);
Serhiy Storchaka763a61c2016-04-10 18:05:12 +03001625 Py_SETREF(self->tag, value);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001626 } else if (strcmp(name, "text") == 0) {
1627 Py_DECREF(JOIN_OBJ(self->text));
1628 self->text = value;
1629 Py_INCREF(self->text);
1630 } else if (strcmp(name, "tail") == 0) {
1631 Py_DECREF(JOIN_OBJ(self->tail));
1632 self->tail = value;
1633 Py_INCREF(self->tail);
1634 } else if (strcmp(name, "attrib") == 0) {
1635 if (!self->extra)
1636 element_new_extra(self, NULL);
Serhiy Storchaka2e6c8292015-12-27 15:41:58 +02001637 Py_INCREF(value);
Serhiy Storchaka763a61c2016-04-10 18:05:12 +03001638 Py_SETREF(self->extra->attrib, value);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001639 } else {
1640 PyErr_SetString(PyExc_AttributeError, name);
1641 return -1;
1642 }
1643
1644 return 0;
1645}
1646
1647static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001648 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001649 0, /* sq_concat */
1650 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001651 element_getitem,
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001652 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001653 element_setitem,
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001654 0,
1655};
1656
1657static PyMappingMethods element_as_mapping = {
1658 (lenfunc) element_length,
1659 (binaryfunc) element_subscr,
1660 (objobjargproc) element_ass_subscr,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001661};
1662
1663statichere PyTypeObject Element_Type = {
1664 PyObject_HEAD_INIT(NULL)
1665 0, "Element", sizeof(ElementObject), 0,
1666 /* methods */
1667 (destructor)element_dealloc, /* tp_dealloc */
1668 0, /* tp_print */
1669 (getattrfunc)element_getattr, /* tp_getattr */
1670 (setattrfunc)element_setattr, /* tp_setattr */
1671 0, /* tp_compare */
1672 (reprfunc)element_repr, /* tp_repr */
1673 0, /* tp_as_number */
1674 &element_as_sequence, /* tp_as_sequence */
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001675 &element_as_mapping, /* tp_as_mapping */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001676};
1677
1678/* ==================================================================== */
1679/* the tree builder type */
1680
1681typedef struct {
1682 PyObject_HEAD
1683
1684 PyObject* root; /* root node (first created node) */
1685
1686 ElementObject* this; /* current node */
1687 ElementObject* last; /* most recently created node */
1688
1689 PyObject* data; /* data collector (string or list), or NULL */
1690
1691 PyObject* stack; /* element stack */
Neal Norwitzc7074382006-06-12 02:06:17 +00001692 Py_ssize_t index; /* current stack size (0=empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001693
1694 /* element tracing */
1695 PyObject* events; /* list of events, or NULL if not collecting */
1696 PyObject* start_event_obj; /* event objects (NULL to ignore) */
1697 PyObject* end_event_obj;
1698 PyObject* start_ns_event_obj;
1699 PyObject* end_ns_event_obj;
1700
1701} TreeBuilderObject;
1702
1703staticforward PyTypeObject TreeBuilder_Type;
1704
Christian Heimese93237d2007-12-19 02:37:44 +00001705#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001706
1707/* -------------------------------------------------------------------- */
1708/* constructor and destructor */
1709
1710LOCAL(PyObject*)
1711treebuilder_new(void)
1712{
1713 TreeBuilderObject* self;
1714
1715 self = PyObject_New(TreeBuilderObject, &TreeBuilder_Type);
1716 if (self == NULL)
1717 return NULL;
1718
1719 self->root = NULL;
1720
1721 Py_INCREF(Py_None);
1722 self->this = (ElementObject*) Py_None;
1723
1724 Py_INCREF(Py_None);
1725 self->last = (ElementObject*) Py_None;
1726
1727 self->data = NULL;
1728
1729 self->stack = PyList_New(20);
1730 self->index = 0;
1731
1732 self->events = NULL;
1733 self->start_event_obj = self->end_event_obj = NULL;
1734 self->start_ns_event_obj = self->end_ns_event_obj = NULL;
1735
1736 ALLOC(sizeof(TreeBuilderObject), "create treebuilder");
1737
1738 return (PyObject*) self;
1739}
1740
1741static PyObject*
Fredrik Lundh81707f12006-06-03 21:56:05 +00001742treebuilder(PyObject* self_, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001743{
1744 if (!PyArg_ParseTuple(args, ":TreeBuilder"))
1745 return NULL;
1746
1747 return treebuilder_new();
1748}
1749
1750static void
1751treebuilder_dealloc(TreeBuilderObject* self)
1752{
1753 Py_XDECREF(self->end_ns_event_obj);
1754 Py_XDECREF(self->start_ns_event_obj);
1755 Py_XDECREF(self->end_event_obj);
1756 Py_XDECREF(self->start_event_obj);
1757 Py_XDECREF(self->events);
1758 Py_DECREF(self->stack);
1759 Py_XDECREF(self->data);
1760 Py_DECREF(self->last);
1761 Py_DECREF(self->this);
1762 Py_XDECREF(self->root);
1763
1764 RELEASE(sizeof(TreeBuilderObject), "destroy treebuilder");
1765
1766 PyObject_Del(self);
1767}
1768
Serhiy Storchaka9c2c42c2017-04-02 20:37:03 +03001769/* -------------------------------------------------------------------- */
1770/* helpers for handling of arbitrary element-like objects */
1771
1772static void
1773treebuilder_set_element_text_or_tail(PyObject **data, PyObject **dest)
1774{
1775 PyObject *tmp = JOIN_OBJ(*dest);
1776 *dest = JOIN_SET(*data, PyList_CheckExact(*data));
1777 *data = NULL;
1778 Py_DECREF(tmp);
1779}
1780
1781LOCAL(void)
1782treebuilder_flush_data(TreeBuilderObject* self)
1783{
1784 ElementObject *element = self->last;
1785
1786 if (self->data) {
1787 if (self->this == element) {
1788 treebuilder_set_element_text_or_tail(
1789 &self->data,
1790 &element->text);
1791 }
1792 else {
1793 treebuilder_set_element_text_or_tail(
1794 &self->data,
1795 &element->tail);
1796 }
1797 }
1798}
1799
Serhiy Storchaka45cf0b72015-12-06 23:51:53 +02001800LOCAL(int)
1801treebuilder_append_event(TreeBuilderObject *self, PyObject *action,
1802 PyObject *node)
1803{
1804 if (action != NULL) {
1805 PyObject *res = PyTuple_Pack(2, action, node);
1806 if (res == NULL)
1807 return -1;
1808 if (PyList_Append(self->events, res) < 0) {
1809 Py_DECREF(res);
1810 return -1;
1811 }
1812 Py_DECREF(res);
1813 }
1814 return 0;
1815}
1816
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001817/* -------------------------------------------------------------------- */
1818/* handlers */
1819
1820LOCAL(PyObject*)
1821treebuilder_handle_xml(TreeBuilderObject* self, PyObject* encoding,
1822 PyObject* standalone)
1823{
1824 Py_RETURN_NONE;
1825}
1826
1827LOCAL(PyObject*)
1828treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
1829 PyObject* attrib)
1830{
1831 PyObject* node;
1832 PyObject* this;
1833
Serhiy Storchaka9c2c42c2017-04-02 20:37:03 +03001834 treebuilder_flush_data(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001835
1836 node = element_new(tag, attrib);
1837 if (!node)
1838 return NULL;
1839
1840 this = (PyObject*) self->this;
1841
1842 if (this != Py_None) {
1843 if (element_add_subelement((ElementObject*) this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001844 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001845 } else {
1846 if (self->root) {
1847 PyErr_SetString(
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001848 elementtree_parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001849 "multiple elements on top level"
1850 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001851 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001852 }
1853 Py_INCREF(node);
1854 self->root = node;
1855 }
1856
1857 if (self->index < PyList_GET_SIZE(self->stack)) {
1858 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001859 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001860 Py_INCREF(this);
1861 } else {
1862 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001863 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001864 }
1865 self->index++;
1866
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001867 Py_INCREF(node);
Serhiy Storchaka763a61c2016-04-10 18:05:12 +03001868 Py_SETREF(self->this, (ElementObject*) node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001869
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001870 Py_INCREF(node);
Serhiy Storchaka763a61c2016-04-10 18:05:12 +03001871 Py_SETREF(self->last, (ElementObject*) node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001872
Serhiy Storchaka45cf0b72015-12-06 23:51:53 +02001873 if (treebuilder_append_event(self, self->start_event_obj, node) < 0)
1874 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001875
1876 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001877
1878 error:
1879 Py_DECREF(node);
1880 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001881}
1882
1883LOCAL(PyObject*)
1884treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
1885{
1886 if (!self->data) {
Fredrik Lundhdc075b92006-08-16 16:47:07 +00001887 if (self->last == (ElementObject*) Py_None) {
1888 /* ignore calls to data before the first call to start */
1889 Py_RETURN_NONE;
1890 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001891 /* store the first item as is */
1892 Py_INCREF(data); self->data = data;
1893 } else {
1894 /* more than one item; use a list to collect items */
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001895 if (PyString_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
1896 PyString_CheckExact(data) && PyString_GET_SIZE(data) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001897 /* expat often generates single character data sections; handle
1898 the most common case by resizing the existing string... */
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001899 Py_ssize_t size = PyString_GET_SIZE(self->data);
1900 if (_PyString_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001901 return NULL;
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001902 PyString_AS_STRING(self->data)[size] = PyString_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001903 } else if (PyList_CheckExact(self->data)) {
1904 if (PyList_Append(self->data, data) < 0)
1905 return NULL;
1906 } else {
1907 PyObject* list = PyList_New(2);
1908 if (!list)
1909 return NULL;
1910 PyList_SET_ITEM(list, 0, self->data);
1911 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
1912 self->data = list;
1913 }
1914 }
1915
1916 Py_RETURN_NONE;
1917}
1918
1919LOCAL(PyObject*)
1920treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
1921{
Serhiy Storchaka2e6c8292015-12-27 15:41:58 +02001922 ElementObject *item;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001923
Serhiy Storchaka9c2c42c2017-04-02 20:37:03 +03001924 treebuilder_flush_data(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001925
1926 if (self->index == 0) {
1927 PyErr_SetString(
1928 PyExc_IndexError,
1929 "pop from empty stack"
1930 );
1931 return NULL;
1932 }
1933
Serhiy Storchaka2e6c8292015-12-27 15:41:58 +02001934 item = self->last;
1935 self->last = self->this;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001936 self->index--;
Serhiy Storchaka2e6c8292015-12-27 15:41:58 +02001937 self->this = (ElementObject *) PyList_GET_ITEM(self->stack, self->index);
1938 Py_INCREF(self->this);
1939 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001940
Serhiy Storchaka45cf0b72015-12-06 23:51:53 +02001941 if (treebuilder_append_event(self, self->end_event_obj, (PyObject*)self->last) < 0)
1942 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001943
1944 Py_INCREF(self->last);
1945 return (PyObject*) self->last;
1946}
1947
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001948/* -------------------------------------------------------------------- */
1949/* methods (in alphabetical order) */
1950
1951static PyObject*
1952treebuilder_data(TreeBuilderObject* self, PyObject* args)
1953{
1954 PyObject* data;
1955 if (!PyArg_ParseTuple(args, "O:data", &data))
1956 return NULL;
1957
1958 return treebuilder_handle_data(self, data);
1959}
1960
1961static PyObject*
1962treebuilder_end(TreeBuilderObject* self, PyObject* args)
1963{
1964 PyObject* tag;
1965 if (!PyArg_ParseTuple(args, "O:end", &tag))
1966 return NULL;
1967
1968 return treebuilder_handle_end(self, tag);
1969}
1970
1971LOCAL(PyObject*)
1972treebuilder_done(TreeBuilderObject* self)
1973{
1974 PyObject* res;
1975
1976 /* FIXME: check stack size? */
1977
1978 if (self->root)
1979 res = self->root;
1980 else
1981 res = Py_None;
1982
1983 Py_INCREF(res);
1984 return res;
1985}
1986
1987static PyObject*
1988treebuilder_close(TreeBuilderObject* self, PyObject* args)
1989{
1990 if (!PyArg_ParseTuple(args, ":close"))
1991 return NULL;
1992
1993 return treebuilder_done(self);
1994}
1995
1996static PyObject*
1997treebuilder_start(TreeBuilderObject* self, PyObject* args)
1998{
1999 PyObject* tag;
2000 PyObject* attrib = Py_None;
2001 if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib))
2002 return NULL;
2003
2004 return treebuilder_handle_start(self, tag, attrib);
2005}
2006
2007static PyObject*
2008treebuilder_xml(TreeBuilderObject* self, PyObject* args)
2009{
2010 PyObject* encoding;
2011 PyObject* standalone;
2012 if (!PyArg_ParseTuple(args, "OO:xml", &encoding, &standalone))
2013 return NULL;
2014
2015 return treebuilder_handle_xml(self, encoding, standalone);
2016}
2017
2018static PyMethodDef treebuilder_methods[] = {
2019 {"data", (PyCFunction) treebuilder_data, METH_VARARGS},
2020 {"start", (PyCFunction) treebuilder_start, METH_VARARGS},
2021 {"end", (PyCFunction) treebuilder_end, METH_VARARGS},
2022 {"xml", (PyCFunction) treebuilder_xml, METH_VARARGS},
2023 {"close", (PyCFunction) treebuilder_close, METH_VARARGS},
2024 {NULL, NULL}
2025};
2026
2027static PyObject*
2028treebuilder_getattr(TreeBuilderObject* self, char* name)
2029{
2030 return Py_FindMethod(treebuilder_methods, (PyObject*) self, name);
2031}
2032
2033statichere PyTypeObject TreeBuilder_Type = {
2034 PyObject_HEAD_INIT(NULL)
2035 0, "TreeBuilder", sizeof(TreeBuilderObject), 0,
2036 /* methods */
2037 (destructor)treebuilder_dealloc, /* tp_dealloc */
2038 0, /* tp_print */
2039 (getattrfunc)treebuilder_getattr, /* tp_getattr */
2040};
2041
2042/* ==================================================================== */
2043/* the expat interface */
2044
2045#if defined(USE_EXPAT)
2046
2047#include "expat.h"
2048
2049#if defined(USE_PYEXPAT_CAPI)
2050#include "pyexpat.h"
2051static struct PyExpat_CAPI* expat_capi;
2052#define EXPAT(func) (expat_capi->func)
2053#else
2054#define EXPAT(func) (XML_##func)
2055#endif
2056
2057typedef struct {
2058 PyObject_HEAD
2059
2060 XML_Parser parser;
2061
2062 PyObject* target;
2063 PyObject* entity;
2064
2065 PyObject* names;
2066
2067 PyObject* handle_xml;
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002068
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002069 PyObject* handle_start;
2070 PyObject* handle_data;
2071 PyObject* handle_end;
2072
2073 PyObject* handle_comment;
2074 PyObject* handle_pi;
2075
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002076 PyObject* handle_close;
2077
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002078} XMLParserObject;
2079
2080staticforward PyTypeObject XMLParser_Type;
2081
2082/* helpers */
2083
2084#if defined(Py_USING_UNICODE)
2085LOCAL(int)
2086checkstring(const char* string, int size)
2087{
2088 int i;
2089
2090 /* check if an 8-bit string contains UTF-8 characters */
2091 for (i = 0; i < size; i++)
2092 if (string[i] & 0x80)
2093 return 1;
2094
2095 return 0;
2096}
2097#endif
2098
2099LOCAL(PyObject*)
2100makestring(const char* string, int size)
2101{
2102 /* convert a UTF-8 string to either a 7-bit ascii string or a
2103 Unicode string */
2104
2105#if defined(Py_USING_UNICODE)
2106 if (checkstring(string, size))
2107 return PyUnicode_DecodeUTF8(string, size, "strict");
2108#endif
2109
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002110 return PyString_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002111}
2112
2113LOCAL(PyObject*)
2114makeuniversal(XMLParserObject* self, const char* string)
2115{
2116 /* convert a UTF-8 tag/attribute name from the expat parser
2117 to a universal name string */
2118
2119 int size = strlen(string);
2120 PyObject* key;
2121 PyObject* value;
2122
2123 /* look the 'raw' name up in the names dictionary */
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002124 key = PyString_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002125 if (!key)
2126 return NULL;
2127
2128 value = PyDict_GetItem(self->names, key);
2129
2130 if (value) {
2131 Py_INCREF(value);
2132 } else {
2133 /* new name. convert to universal name, and decode as
2134 necessary */
2135
2136 PyObject* tag;
2137 char* p;
2138 int i;
2139
2140 /* look for namespace separator */
2141 for (i = 0; i < size; i++)
2142 if (string[i] == '}')
2143 break;
2144 if (i != size) {
2145 /* convert to universal name */
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002146 tag = PyString_FromStringAndSize(NULL, size+1);
2147 p = PyString_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002148 p[0] = '{';
2149 memcpy(p+1, string, size);
2150 size++;
2151 } else {
2152 /* plain name; use key as tag */
2153 Py_INCREF(key);
2154 tag = key;
2155 }
2156
2157 /* decode universal name */
2158#if defined(Py_USING_UNICODE)
2159 /* inline makestring, to avoid duplicating the source string if
Martin Panter6a8163a2016-04-15 02:14:19 +00002160 it's not a utf-8 string */
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002161 p = PyString_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002162 if (checkstring(p, size)) {
2163 value = PyUnicode_DecodeUTF8(p, size, "strict");
2164 Py_DECREF(tag);
2165 if (!value) {
2166 Py_DECREF(key);
2167 return NULL;
2168 }
2169 } else
2170#endif
2171 value = tag; /* use tag as is */
2172
2173 /* add to names dictionary */
2174 if (PyDict_SetItem(self->names, key, value) < 0) {
2175 Py_DECREF(key);
2176 Py_DECREF(value);
2177 return NULL;
2178 }
2179 }
2180
2181 Py_DECREF(key);
2182 return value;
2183}
2184
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002185static void
2186expat_set_error(const char* message, int line, int column)
2187{
2188 PyObject *error;
2189 PyObject *position;
2190 char buffer[256];
2191
2192 sprintf(buffer, "%s: line %d, column %d", message, line, column);
2193
2194 error = PyObject_CallFunction(elementtree_parseerror_obj, "s", buffer);
2195 if (!error)
2196 return;
2197
2198 /* add position attribute */
2199 position = Py_BuildValue("(ii)", line, column);
2200 if (!position) {
2201 Py_DECREF(error);
2202 return;
2203 }
2204 if (PyObject_SetAttrString(error, "position", position) == -1) {
2205 Py_DECREF(error);
2206 Py_DECREF(position);
2207 return;
2208 }
2209 Py_DECREF(position);
2210
2211 PyErr_SetObject(elementtree_parseerror_obj, error);
2212 Py_DECREF(error);
2213}
2214
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002215/* -------------------------------------------------------------------- */
2216/* handlers */
2217
2218static void
2219expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2220 int data_len)
2221{
2222 PyObject* key;
2223 PyObject* value;
2224 PyObject* res;
2225
2226 if (data_len < 2 || data_in[0] != '&')
2227 return;
2228
2229 key = makestring(data_in + 1, data_len - 2);
2230 if (!key)
2231 return;
2232
2233 value = PyDict_GetItem(self->entity, key);
2234
2235 if (value) {
2236 if (TreeBuilder_CheckExact(self->target))
2237 res = treebuilder_handle_data(
2238 (TreeBuilderObject*) self->target, value
2239 );
2240 else if (self->handle_data)
2241 res = PyObject_CallFunction(self->handle_data, "O", value);
2242 else
2243 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002244 Py_XDECREF(res);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002245 } else if (!PyErr_Occurred()) {
2246 /* Report the first error, not the last */
2247 char message[128];
2248 sprintf(message, "undefined entity &%.100s;", PyString_AS_STRING(key));
2249 expat_set_error(
2250 message,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002251 EXPAT(GetErrorLineNumber)(self->parser),
2252 EXPAT(GetErrorColumnNumber)(self->parser)
2253 );
2254 }
2255
2256 Py_DECREF(key);
2257}
2258
2259static void
2260expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2261 const XML_Char **attrib_in)
2262{
2263 PyObject* res;
2264 PyObject* tag;
2265 PyObject* attrib;
2266 int ok;
2267
2268 /* tag name */
2269 tag = makeuniversal(self, tag_in);
2270 if (!tag)
2271 return; /* parser will look for errors */
2272
2273 /* attributes */
2274 if (attrib_in[0]) {
2275 attrib = PyDict_New();
Serhiy Storchaka33ea2972015-12-09 19:44:30 +02002276 if (!attrib) {
2277 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002278 return;
Serhiy Storchaka33ea2972015-12-09 19:44:30 +02002279 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002280 while (attrib_in[0] && attrib_in[1]) {
2281 PyObject* key = makeuniversal(self, attrib_in[0]);
2282 PyObject* value = makestring(attrib_in[1], strlen(attrib_in[1]));
2283 if (!key || !value) {
2284 Py_XDECREF(value);
2285 Py_XDECREF(key);
2286 Py_DECREF(attrib);
Serhiy Storchaka33ea2972015-12-09 19:44:30 +02002287 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002288 return;
2289 }
2290 ok = PyDict_SetItem(attrib, key, value);
2291 Py_DECREF(value);
2292 Py_DECREF(key);
2293 if (ok < 0) {
2294 Py_DECREF(attrib);
Serhiy Storchaka33ea2972015-12-09 19:44:30 +02002295 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002296 return;
2297 }
2298 attrib_in += 2;
2299 }
2300 } else {
2301 Py_INCREF(Py_None);
2302 attrib = Py_None;
2303 }
2304
2305 if (TreeBuilder_CheckExact(self->target))
2306 /* shortcut */
2307 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2308 tag, attrib);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002309 else if (self->handle_start) {
2310 if (attrib == Py_None) {
2311 Py_DECREF(attrib);
2312 attrib = PyDict_New();
Serhiy Storchaka33ea2972015-12-09 19:44:30 +02002313 if (!attrib) {
2314 Py_DECREF(tag);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002315 return;
Serhiy Storchaka33ea2972015-12-09 19:44:30 +02002316 }
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002317 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002318 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002319 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002320 res = NULL;
2321
2322 Py_DECREF(tag);
2323 Py_DECREF(attrib);
2324
2325 Py_XDECREF(res);
2326}
2327
2328static void
2329expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2330 int data_len)
2331{
2332 PyObject* data;
2333 PyObject* res;
2334
2335 data = makestring(data_in, data_len);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002336 if (!data)
2337 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002338
2339 if (TreeBuilder_CheckExact(self->target))
2340 /* shortcut */
2341 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
2342 else if (self->handle_data)
2343 res = PyObject_CallFunction(self->handle_data, "O", data);
2344 else
2345 res = NULL;
2346
2347 Py_DECREF(data);
2348
2349 Py_XDECREF(res);
2350}
2351
2352static void
2353expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
2354{
2355 PyObject* tag;
2356 PyObject* res = NULL;
2357
2358 if (TreeBuilder_CheckExact(self->target))
2359 /* shortcut */
2360 /* the standard tree builder doesn't look at the end tag */
2361 res = treebuilder_handle_end(
2362 (TreeBuilderObject*) self->target, Py_None
2363 );
2364 else if (self->handle_end) {
2365 tag = makeuniversal(self, tag_in);
2366 if (tag) {
2367 res = PyObject_CallFunction(self->handle_end, "O", tag);
2368 Py_DECREF(tag);
2369 }
2370 }
2371
2372 Py_XDECREF(res);
2373}
2374
2375static void
2376expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
2377 const XML_Char *uri)
2378{
Serhiy Storchaka45cf0b72015-12-06 23:51:53 +02002379 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
2380 PyObject *parcel;
2381 PyObject *sprefix = NULL;
2382 PyObject *suri = NULL;
2383
2384 if (PyErr_Occurred())
2385 return;
2386
2387 if (!target->events || !target->start_ns_event_obj)
2388 return;
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002389
Eli Benderskyf933e082013-11-28 06:25:45 -08002390 if (uri)
Eli Bendersky71142c42013-11-28 06:37:25 -08002391 suri = makestring(uri, strlen(uri));
Eli Benderskyf933e082013-11-28 06:25:45 -08002392 else
Eli Bendersky71142c42013-11-28 06:37:25 -08002393 suri = PyString_FromStringAndSize("", 0);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002394 if (!suri)
2395 return;
2396
2397 if (prefix)
2398 sprefix = makestring(prefix, strlen(prefix));
2399 else
2400 sprefix = PyString_FromStringAndSize("", 0);
2401 if (!sprefix) {
2402 Py_DECREF(suri);
2403 return;
2404 }
2405
Serhiy Storchaka45cf0b72015-12-06 23:51:53 +02002406 parcel = PyTuple_Pack(2, sprefix, suri);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002407 Py_DECREF(sprefix);
2408 Py_DECREF(suri);
Serhiy Storchaka45cf0b72015-12-06 23:51:53 +02002409 if (!parcel)
2410 return;
2411 treebuilder_append_event(target, target->start_ns_event_obj, parcel);
2412 Py_DECREF(parcel);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002413}
2414
2415static void
2416expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
2417{
Serhiy Storchaka45cf0b72015-12-06 23:51:53 +02002418 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
2419
2420 if (PyErr_Occurred())
2421 return;
2422
2423 if (!target->events)
2424 return;
2425
2426 treebuilder_append_event(target, target->end_ns_event_obj, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002427}
2428
2429static void
2430expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
2431{
2432 PyObject* comment;
2433 PyObject* res;
2434
2435 if (self->handle_comment) {
2436 comment = makestring(comment_in, strlen(comment_in));
2437 if (comment) {
2438 res = PyObject_CallFunction(self->handle_comment, "O", comment);
2439 Py_XDECREF(res);
2440 Py_DECREF(comment);
2441 }
2442 }
2443}
2444
2445static void
2446expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
2447 const XML_Char* data_in)
2448{
2449 PyObject* target;
2450 PyObject* data;
2451 PyObject* res;
2452
2453 if (self->handle_pi) {
2454 target = makestring(target_in, strlen(target_in));
2455 data = makestring(data_in, strlen(data_in));
2456 if (target && data) {
2457 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
2458 Py_XDECREF(res);
2459 Py_DECREF(data);
2460 Py_DECREF(target);
2461 } else {
2462 Py_XDECREF(data);
2463 Py_XDECREF(target);
2464 }
2465 }
2466}
2467
2468#if defined(Py_USING_UNICODE)
2469static int
2470expat_unknown_encoding_handler(XMLParserObject *self, const XML_Char *name,
2471 XML_Encoding *info)
2472{
2473 PyObject* u;
2474 Py_UNICODE* p;
2475 unsigned char s[256];
2476 int i;
2477
2478 memset(info, 0, sizeof(XML_Encoding));
2479
2480 for (i = 0; i < 256; i++)
2481 s[i] = i;
2482
Fredrik Lundhc3389992005-12-25 11:40:19 +00002483 u = PyUnicode_Decode((char*) s, 256, name, "replace");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002484 if (!u)
2485 return XML_STATUS_ERROR;
2486
2487 if (PyUnicode_GET_SIZE(u) != 256) {
2488 Py_DECREF(u);
Eli Benderskyb6717012013-08-04 06:09:49 -07002489 PyErr_SetString(PyExc_ValueError,
2490 "multi-byte encodings are not supported");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002491 return XML_STATUS_ERROR;
2492 }
2493
2494 p = PyUnicode_AS_UNICODE(u);
2495
2496 for (i = 0; i < 256; i++) {
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002497 if (p[i] != Py_UNICODE_REPLACEMENT_CHARACTER)
2498 info->map[i] = p[i];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002499 else
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002500 info->map[i] = -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002501 }
2502
2503 Py_DECREF(u);
2504
2505 return XML_STATUS_OK;
2506}
2507#endif
2508
2509/* -------------------------------------------------------------------- */
2510/* constructor and destructor */
2511
2512static PyObject*
Fredrik Lundh81707f12006-06-03 21:56:05 +00002513xmlparser(PyObject* self_, PyObject* args, PyObject* kw)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002514{
2515 XMLParserObject* self;
2516 /* FIXME: does this need to be static? */
2517 static XML_Memory_Handling_Suite memory_handler;
2518
2519 PyObject* target = NULL;
2520 char* encoding = NULL;
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +00002521 static char* kwlist[] = { "target", "encoding", NULL };
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002522 if (!PyArg_ParseTupleAndKeywords(args, kw, "|Oz:XMLParser", kwlist,
2523 &target, &encoding))
2524 return NULL;
2525
2526#if defined(USE_PYEXPAT_CAPI)
2527 if (!expat_capi) {
2528 PyErr_SetString(
2529 PyExc_RuntimeError, "cannot load dispatch table from pyexpat"
2530 );
2531 return NULL;
2532 }
2533#endif
2534
2535 self = PyObject_New(XMLParserObject, &XMLParser_Type);
2536 if (self == NULL)
2537 return NULL;
2538
2539 self->entity = PyDict_New();
2540 if (!self->entity) {
2541 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002542 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002543 }
2544
2545 self->names = PyDict_New();
2546 if (!self->names) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002547 PyObject_Del(self->entity);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002548 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002549 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002550 }
2551
2552 memory_handler.malloc_fcn = PyObject_Malloc;
2553 memory_handler.realloc_fcn = PyObject_Realloc;
2554 memory_handler.free_fcn = PyObject_Free;
2555
2556 self->parser = EXPAT(ParserCreate_MM)(encoding, &memory_handler, "}");
2557 if (!self->parser) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002558 PyObject_Del(self->names);
2559 PyObject_Del(self->entity);
2560 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002561 PyErr_NoMemory();
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002562 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002563 }
2564
2565 /* setup target handlers */
2566 if (!target) {
2567 target = treebuilder_new();
2568 if (!target) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002569 EXPAT(ParserFree)(self->parser);
2570 PyObject_Del(self->names);
2571 PyObject_Del(self->entity);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002572 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002573 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002574 }
2575 } else
2576 Py_INCREF(target);
2577 self->target = target;
2578
2579 self->handle_xml = PyObject_GetAttrString(target, "xml");
2580 self->handle_start = PyObject_GetAttrString(target, "start");
2581 self->handle_data = PyObject_GetAttrString(target, "data");
2582 self->handle_end = PyObject_GetAttrString(target, "end");
2583 self->handle_comment = PyObject_GetAttrString(target, "comment");
2584 self->handle_pi = PyObject_GetAttrString(target, "pi");
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002585 self->handle_close = PyObject_GetAttrString(target, "close");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002586
2587 PyErr_Clear();
2588
2589 /* configure parser */
2590 EXPAT(SetUserData)(self->parser, self);
2591 EXPAT(SetElementHandler)(
2592 self->parser,
2593 (XML_StartElementHandler) expat_start_handler,
2594 (XML_EndElementHandler) expat_end_handler
2595 );
2596 EXPAT(SetDefaultHandlerExpand)(
2597 self->parser,
2598 (XML_DefaultHandler) expat_default_handler
2599 );
2600 EXPAT(SetCharacterDataHandler)(
2601 self->parser,
2602 (XML_CharacterDataHandler) expat_data_handler
2603 );
2604 if (self->handle_comment)
2605 EXPAT(SetCommentHandler)(
2606 self->parser,
2607 (XML_CommentHandler) expat_comment_handler
2608 );
2609 if (self->handle_pi)
2610 EXPAT(SetProcessingInstructionHandler)(
2611 self->parser,
2612 (XML_ProcessingInstructionHandler) expat_pi_handler
2613 );
2614#if defined(Py_USING_UNICODE)
2615 EXPAT(SetUnknownEncodingHandler)(
2616 self->parser,
2617 (XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL
2618 );
2619#endif
2620
2621 ALLOC(sizeof(XMLParserObject), "create expatparser");
2622
2623 return (PyObject*) self;
2624}
2625
2626static void
2627xmlparser_dealloc(XMLParserObject* self)
2628{
2629 EXPAT(ParserFree)(self->parser);
2630
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002631 Py_XDECREF(self->handle_close);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002632 Py_XDECREF(self->handle_pi);
2633 Py_XDECREF(self->handle_comment);
2634 Py_XDECREF(self->handle_end);
2635 Py_XDECREF(self->handle_data);
2636 Py_XDECREF(self->handle_start);
2637 Py_XDECREF(self->handle_xml);
2638
2639 Py_DECREF(self->target);
2640 Py_DECREF(self->entity);
2641 Py_DECREF(self->names);
2642
2643 RELEASE(sizeof(XMLParserObject), "destroy expatparser");
2644
2645 PyObject_Del(self);
2646}
2647
2648/* -------------------------------------------------------------------- */
2649/* methods (in alphabetical order) */
2650
2651LOCAL(PyObject*)
2652expat_parse(XMLParserObject* self, char* data, int data_len, int final)
2653{
2654 int ok;
2655
2656 ok = EXPAT(Parse)(self->parser, data, data_len, final);
2657
2658 if (PyErr_Occurred())
2659 return NULL;
2660
2661 if (!ok) {
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002662 expat_set_error(
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002663 EXPAT(ErrorString)(EXPAT(GetErrorCode)(self->parser)),
2664 EXPAT(GetErrorLineNumber)(self->parser),
2665 EXPAT(GetErrorColumnNumber)(self->parser)
2666 );
2667 return NULL;
2668 }
2669
2670 Py_RETURN_NONE;
2671}
2672
2673static PyObject*
2674xmlparser_close(XMLParserObject* self, PyObject* args)
2675{
2676 /* end feeding data to parser */
2677
2678 PyObject* res;
2679 if (!PyArg_ParseTuple(args, ":close"))
2680 return NULL;
2681
2682 res = expat_parse(self, "", 0, 1);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002683 if (!res)
2684 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002685
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002686 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002687 Py_DECREF(res);
2688 return treebuilder_done((TreeBuilderObject*) self->target);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002689 } if (self->handle_close) {
2690 Py_DECREF(res);
2691 return PyObject_CallFunction(self->handle_close, "");
2692 } else
2693 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002694}
2695
2696static PyObject*
2697xmlparser_feed(XMLParserObject* self, PyObject* args)
2698{
2699 /* feed data to parser */
2700
2701 char* data;
2702 int data_len;
2703 if (!PyArg_ParseTuple(args, "s#:feed", &data, &data_len))
2704 return NULL;
2705
2706 return expat_parse(self, data, data_len, 0);
2707}
2708
2709static PyObject*
2710xmlparser_parse(XMLParserObject* self, PyObject* args)
2711{
2712 /* (internal) parse until end of input stream */
2713
2714 PyObject* reader;
2715 PyObject* buffer;
2716 PyObject* res;
2717
2718 PyObject* fileobj;
2719 if (!PyArg_ParseTuple(args, "O:_parse", &fileobj))
2720 return NULL;
2721
2722 reader = PyObject_GetAttrString(fileobj, "read");
2723 if (!reader)
2724 return NULL;
2725
2726 /* read from open file object */
2727 for (;;) {
2728
2729 buffer = PyObject_CallFunction(reader, "i", 64*1024);
2730
2731 if (!buffer) {
2732 /* read failed (e.g. due to KeyboardInterrupt) */
2733 Py_DECREF(reader);
2734 return NULL;
2735 }
2736
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002737 if (!PyString_CheckExact(buffer) || PyString_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002738 Py_DECREF(buffer);
2739 break;
2740 }
2741
Serhiy Storchakac4c64be2015-11-25 20:12:58 +02002742 if (PyString_GET_SIZE(buffer) > INT_MAX) {
2743 Py_DECREF(buffer);
2744 Py_DECREF(reader);
2745 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
2746 return NULL;
2747 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002748 res = expat_parse(
Serhiy Storchakac4c64be2015-11-25 20:12:58 +02002749 self, PyString_AS_STRING(buffer), (int)PyString_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002750 );
2751
2752 Py_DECREF(buffer);
2753
2754 if (!res) {
2755 Py_DECREF(reader);
2756 return NULL;
2757 }
2758 Py_DECREF(res);
2759
2760 }
2761
2762 Py_DECREF(reader);
2763
2764 res = expat_parse(self, "", 0, 1);
2765
2766 if (res && TreeBuilder_CheckExact(self->target)) {
2767 Py_DECREF(res);
2768 return treebuilder_done((TreeBuilderObject*) self->target);
2769 }
2770
2771 return res;
2772}
2773
2774static PyObject*
2775xmlparser_setevents(XMLParserObject* self, PyObject* args)
2776{
2777 /* activate element event reporting */
2778
Neal Norwitzc7074382006-06-12 02:06:17 +00002779 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002780 TreeBuilderObject* target;
2781
2782 PyObject* events; /* event collector */
2783 PyObject* event_set = Py_None;
2784 if (!PyArg_ParseTuple(args, "O!|O:_setevents", &PyList_Type, &events,
2785 &event_set))
2786 return NULL;
2787
2788 if (!TreeBuilder_CheckExact(self->target)) {
2789 PyErr_SetString(
2790 PyExc_TypeError,
2791 "event handling only supported for cElementTree.Treebuilder "
2792 "targets"
2793 );
2794 return NULL;
2795 }
2796
2797 target = (TreeBuilderObject*) self->target;
2798
2799 Py_INCREF(events);
Serhiy Storchakabc62af12016-04-06 09:51:18 +03002800 Py_XSETREF(target->events, events);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002801
2802 /* clear out existing events */
Serhiy Storchaka98a97222014-02-09 13:14:04 +02002803 Py_CLEAR(target->start_event_obj);
2804 Py_CLEAR(target->end_event_obj);
2805 Py_CLEAR(target->start_ns_event_obj);
2806 Py_CLEAR(target->end_ns_event_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002807
2808 if (event_set == Py_None) {
2809 /* default is "end" only */
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002810 target->end_event_obj = PyString_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002811 Py_RETURN_NONE;
2812 }
2813
2814 if (!PyTuple_Check(event_set)) /* FIXME: handle arbitrary sequences */
2815 goto error;
2816
2817 for (i = 0; i < PyTuple_GET_SIZE(event_set); i++) {
2818 PyObject* item = PyTuple_GET_ITEM(event_set, i);
2819 char* event;
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002820 if (!PyString_Check(item))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002821 goto error;
Serhiy Storchaka20a003b2015-12-24 11:51:24 +02002822 Py_INCREF(item);
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002823 event = PyString_AS_STRING(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002824 if (strcmp(event, "start") == 0) {
Serhiy Storchakabc62af12016-04-06 09:51:18 +03002825 Py_XSETREF(target->start_event_obj, item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002826 } else if (strcmp(event, "end") == 0) {
Serhiy Storchakabc62af12016-04-06 09:51:18 +03002827 Py_XSETREF(target->end_event_obj, item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002828 } else if (strcmp(event, "start-ns") == 0) {
Serhiy Storchakabc62af12016-04-06 09:51:18 +03002829 Py_XSETREF(target->start_ns_event_obj, item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002830 EXPAT(SetNamespaceDeclHandler)(
2831 self->parser,
2832 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2833 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2834 );
2835 } else if (strcmp(event, "end-ns") == 0) {
Serhiy Storchakabc62af12016-04-06 09:51:18 +03002836 Py_XSETREF(target->end_ns_event_obj, item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002837 EXPAT(SetNamespaceDeclHandler)(
2838 self->parser,
2839 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2840 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2841 );
2842 } else {
Serhiy Storchaka20a003b2015-12-24 11:51:24 +02002843 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002844 PyErr_Format(
2845 PyExc_ValueError,
2846 "unknown event '%s'", event
2847 );
2848 return NULL;
2849 }
2850 }
2851
2852 Py_RETURN_NONE;
2853
2854 error:
2855 PyErr_SetString(
2856 PyExc_TypeError,
2857 "invalid event tuple"
2858 );
2859 return NULL;
2860}
2861
2862static PyMethodDef xmlparser_methods[] = {
2863 {"feed", (PyCFunction) xmlparser_feed, METH_VARARGS},
2864 {"close", (PyCFunction) xmlparser_close, METH_VARARGS},
2865 {"_parse", (PyCFunction) xmlparser_parse, METH_VARARGS},
2866 {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
2867 {NULL, NULL}
2868};
2869
2870static PyObject*
2871xmlparser_getattr(XMLParserObject* self, char* name)
2872{
2873 PyObject* res;
2874
2875 res = Py_FindMethod(xmlparser_methods, (PyObject*) self, name);
2876 if (res)
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002877 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002878
2879 PyErr_Clear();
2880
2881 if (strcmp(name, "entity") == 0)
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002882 res = self->entity;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002883 else if (strcmp(name, "target") == 0)
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002884 res = self->target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002885 else if (strcmp(name, "version") == 0) {
2886 char buffer[100];
2887 sprintf(buffer, "Expat %d.%d.%d", XML_MAJOR_VERSION,
2888 XML_MINOR_VERSION, XML_MICRO_VERSION);
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002889 return PyString_FromString(buffer);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002890 } else {
2891 PyErr_SetString(PyExc_AttributeError, name);
2892 return NULL;
2893 }
2894
2895 Py_INCREF(res);
2896 return res;
2897}
2898
2899statichere PyTypeObject XMLParser_Type = {
2900 PyObject_HEAD_INIT(NULL)
2901 0, "XMLParser", sizeof(XMLParserObject), 0,
2902 /* methods */
2903 (destructor)xmlparser_dealloc, /* tp_dealloc */
2904 0, /* tp_print */
2905 (getattrfunc)xmlparser_getattr, /* tp_getattr */
2906};
2907
2908#endif
2909
2910/* ==================================================================== */
2911/* python module interface */
2912
2913static PyMethodDef _functions[] = {
2914 {"Element", (PyCFunction) element, METH_VARARGS|METH_KEYWORDS},
2915 {"SubElement", (PyCFunction) subelement, METH_VARARGS|METH_KEYWORDS},
2916 {"TreeBuilder", (PyCFunction) treebuilder, METH_VARARGS},
2917#if defined(USE_EXPAT)
2918 {"XMLParser", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
2919 {"XMLTreeBuilder", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
2920#endif
2921 {NULL, NULL}
2922};
2923
2924DL_EXPORT(void)
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002925init_elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002926{
2927 PyObject* m;
2928 PyObject* g;
2929 char* bootstrap;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002930
2931 /* Patch object type */
Christian Heimese93237d2007-12-19 02:37:44 +00002932 Py_TYPE(&Element_Type) = Py_TYPE(&TreeBuilder_Type) = &PyType_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002933#if defined(USE_EXPAT)
Christian Heimese93237d2007-12-19 02:37:44 +00002934 Py_TYPE(&XMLParser_Type) = &PyType_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002935#endif
2936
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002937 m = Py_InitModule("_elementtree", _functions);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002938 if (!m)
2939 return;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002940
2941 /* python glue code */
2942
2943 g = PyDict_New();
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002944 if (!g)
2945 return;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002946
2947 PyDict_SetItemString(g, "__builtins__", PyEval_GetBuiltins());
2948
2949 bootstrap = (
2950
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002951 "from copy import copy, deepcopy\n"
2952
2953 "try:\n"
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002954 " from xml.etree import ElementTree\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002955 "except ImportError:\n"
2956 " import ElementTree\n"
2957 "ET = ElementTree\n"
2958 "del ElementTree\n"
2959
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002960 "import _elementtree as cElementTree\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002961
2962 "try:\n" /* check if copy works as is */
2963 " copy(cElementTree.Element('x'))\n"
2964 "except:\n"
2965 " def copyelement(elem):\n"
2966 " return elem\n"
2967
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002968 "class CommentProxy:\n"
2969 " def __call__(self, text=None):\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002970 " element = cElementTree.Element(ET.Comment)\n"
2971 " element.text = text\n"
2972 " return element\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002973 " def __cmp__(self, other):\n"
2974 " return cmp(ET.Comment, other)\n"
2975 "cElementTree.Comment = CommentProxy()\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002976
2977 "class ElementTree(ET.ElementTree):\n" /* public */
2978 " def parse(self, source, parser=None):\n"
Florent Xicluna67d5d0e2011-10-29 03:38:56 +02002979 " close_source = False\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002980 " if not hasattr(source, 'read'):\n"
2981 " source = open(source, 'rb')\n"
Florent Xicluna67d5d0e2011-10-29 03:38:56 +02002982 " close_source = False\n"
2983 " try:\n"
2984 " if parser is not None:\n"
2985 " while 1:\n"
2986 " data = source.read(65536)\n"
2987 " if not data:\n"
2988 " break\n"
2989 " parser.feed(data)\n"
2990 " self._root = parser.close()\n"
2991 " else:\n"
2992 " parser = cElementTree.XMLParser()\n"
2993 " self._root = parser._parse(source)\n"
2994 " return self._root\n"
2995 " finally:\n"
2996 " if close_source:\n"
2997 " source.close()\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002998 "cElementTree.ElementTree = ElementTree\n"
2999
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003000 "def iter(node, tag=None):\n" /* helper */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003001 " if tag == '*':\n"
3002 " tag = None\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003003 " if tag is None or node.tag == tag:\n"
3004 " yield node\n"
3005 " for node in node:\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003006 " for node in iter(node, tag):\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003007 " yield node\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003008
3009 "def itertext(node):\n" /* helper */
3010 " if node.text:\n"
3011 " yield node.text\n"
3012 " for e in node:\n"
3013 " for s in e.itertext():\n"
3014 " yield s\n"
3015 " if e.tail:\n"
3016 " yield e.tail\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003017
3018 "def parse(source, parser=None):\n" /* public */
3019 " tree = ElementTree()\n"
3020 " tree.parse(source, parser)\n"
3021 " return tree\n"
3022 "cElementTree.parse = parse\n"
3023
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003024 "class iterparse(object):\n"
3025 " root = None\n"
3026 " def __init__(self, file, events=None):\n"
Florent Xicluna67d5d0e2011-10-29 03:38:56 +02003027 " self._close_file = False\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003028 " if not hasattr(file, 'read'):\n"
3029 " file = open(file, 'rb')\n"
Florent Xicluna67d5d0e2011-10-29 03:38:56 +02003030 " self._close_file = True\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003031 " self._file = file\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003032 " self._events = []\n"
3033 " self._index = 0\n"
Florent Xicluna0965ee22011-11-01 23:34:41 +01003034 " self._error = None\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003035 " self.root = self._root = None\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003036 " b = cElementTree.TreeBuilder()\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003037 " self._parser = cElementTree.XMLParser(b)\n"
3038 " self._parser._setevents(self._events, events)\n"
3039 " def next(self):\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003040 " while 1:\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003041 " try:\n"
3042 " item = self._events[self._index]\n"
Florent Xicluna0965ee22011-11-01 23:34:41 +01003043 " self._index += 1\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003044 " return item\n"
Florent Xicluna0965ee22011-11-01 23:34:41 +01003045 " except IndexError:\n"
3046 " pass\n"
3047 " if self._error:\n"
3048 " e = self._error\n"
3049 " self._error = None\n"
3050 " raise e\n"
3051 " if self._parser is None:\n"
3052 " self.root = self._root\n"
3053 " if self._close_file:\n"
3054 " self._file.close()\n"
3055 " raise StopIteration\n"
3056 " # load event buffer\n"
3057 " del self._events[:]\n"
3058 " self._index = 0\n"
3059 " data = self._file.read(16384)\n"
3060 " if data:\n"
3061 " try:\n"
3062 " self._parser.feed(data)\n"
3063 " except SyntaxError as exc:\n"
3064 " self._error = exc\n"
3065 " else:\n"
3066 " self._root = self._parser.close()\n"
3067 " self._parser = None\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003068 " def __iter__(self):\n"
3069 " return self\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003070 "cElementTree.iterparse = iterparse\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003071
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003072 "class PIProxy:\n"
3073 " def __call__(self, target, text=None):\n"
3074 " element = cElementTree.Element(ET.PI)\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003075 " element.text = target\n"
3076 " if text:\n"
3077 " element.text = element.text + ' ' + text\n"
3078 " return element\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003079 " def __cmp__(self, other):\n"
3080 " return cmp(ET.PI, other)\n"
3081 "cElementTree.PI = cElementTree.ProcessingInstruction = PIProxy()\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003082
3083 "def XML(text):\n" /* public */
3084 " parser = cElementTree.XMLParser()\n"
3085 " parser.feed(text)\n"
3086 " return parser.close()\n"
3087 "cElementTree.XML = cElementTree.fromstring = XML\n"
3088
3089 "def XMLID(text):\n" /* public */
3090 " tree = XML(text)\n"
3091 " ids = {}\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003092 " for elem in tree.iter():\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003093 " id = elem.get('id')\n"
3094 " if id:\n"
3095 " ids[id] = elem\n"
3096 " return tree, ids\n"
3097 "cElementTree.XMLID = XMLID\n"
3098
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003099 "try:\n"
3100 " register_namespace = ET.register_namespace\n"
3101 "except AttributeError:\n"
3102 " def register_namespace(prefix, uri):\n"
3103 " ET._namespace_map[uri] = prefix\n"
3104 "cElementTree.register_namespace = register_namespace\n"
3105
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003106 "cElementTree.dump = ET.dump\n"
3107 "cElementTree.ElementPath = ElementPath = ET.ElementPath\n"
3108 "cElementTree.iselement = ET.iselement\n"
3109 "cElementTree.QName = ET.QName\n"
3110 "cElementTree.tostring = ET.tostring\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003111 "cElementTree.fromstringlist = ET.fromstringlist\n"
3112 "cElementTree.tostringlist = ET.tostringlist\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003113 "cElementTree.VERSION = '" VERSION "'\n"
3114 "cElementTree.__version__ = '" VERSION "'\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003115
3116 );
3117
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003118 if (!PyRun_String(bootstrap, Py_file_input, g, NULL))
3119 return;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003120
3121 elementpath_obj = PyDict_GetItemString(g, "ElementPath");
3122
3123 elementtree_copyelement_obj = PyDict_GetItemString(g, "copyelement");
3124 if (elementtree_copyelement_obj) {
3125 /* reduce hack needed; enable reduce method */
3126 PyMethodDef* mp;
3127 for (mp = element_methods; mp->ml_name; mp++)
3128 if (mp->ml_meth == (PyCFunction) element_reduce) {
3129 mp->ml_name = "__reduce__";
3130 break;
3131 }
3132 } else
3133 PyErr_Clear();
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003134
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003135 elementtree_deepcopy_obj = PyDict_GetItemString(g, "deepcopy");
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003136 elementtree_iter_obj = PyDict_GetItemString(g, "iter");
3137 elementtree_itertext_obj = PyDict_GetItemString(g, "itertext");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003138
3139#if defined(USE_PYEXPAT_CAPI)
3140 /* link against pyexpat, if possible */
Larry Hastings402b73f2010-03-25 00:54:54 +00003141 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003142 if (expat_capi) {
3143 /* check that it's usable */
3144 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
3145 expat_capi->size < sizeof(struct PyExpat_CAPI) ||
3146 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
3147 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
3148 expat_capi->MICRO_VERSION != XML_MICRO_VERSION)
3149 expat_capi = NULL;
3150 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003151#endif
3152
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003153 elementtree_parseerror_obj = PyErr_NewException(
3154 "cElementTree.ParseError", PyExc_SyntaxError, NULL
3155 );
3156 Py_INCREF(elementtree_parseerror_obj);
3157 PyModule_AddObject(m, "ParseError", elementtree_parseerror_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003158}