blob: 574559c6313bc73e83de6e8798c35f689cc5ca43 [file] [log] [blame]
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001/*
2 * ElementTree
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003 * $Id: _elementtree.c 3473 2009-01-11 22:53:55Z fredrik $
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
5 * elementtree accelerator
6 *
7 * History:
8 * 1999-06-20 fl created (as part of sgmlop)
9 * 2001-05-29 fl effdom edition
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000010 * 2003-02-27 fl elementtree edition (alpha)
11 * 2004-06-03 fl updates for elementtree 1.2
Florent Xicluna3e8c1892010-03-11 14:36:19 +000012 * 2005-01-05 fl major optimization effort
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000013 * 2005-01-11 fl first public release (cElementTree 0.8)
14 * 2005-01-12 fl split element object into base and extras
15 * 2005-01-13 fl use tagged pointers for tail/text (cElementTree 0.9)
16 * 2005-01-17 fl added treebuilder close method
17 * 2005-01-17 fl fixed crash in getchildren
18 * 2005-01-18 fl removed observer api, added iterparse (cElementTree 0.9.3)
19 * 2005-01-23 fl revised iterparse api; added namespace event support (0.9.8)
20 * 2005-01-26 fl added VERSION module property (cElementTree 1.0)
21 * 2005-01-28 fl added remove method (1.0.1)
22 * 2005-03-01 fl added iselement function; fixed makeelement aliasing (1.0.2)
23 * 2005-03-13 fl export Comment and ProcessingInstruction/PI helpers
24 * 2005-03-26 fl added Comment and PI support to XMLParser
25 * 2005-03-27 fl event optimizations; complain about bogus events
26 * 2005-08-08 fl fixed read error handling in parse
27 * 2005-08-11 fl added runtime test for copy workaround (1.0.3)
28 * 2005-12-13 fl added expat_capi support (for xml.etree) (1.0.4)
29 * 2005-12-16 fl added support for non-standard encodings
Fredrik Lundh44ed4db2006-03-12 21:06:35 +000030 * 2006-03-08 fl fixed a couple of potential null-refs and leaks
31 * 2006-03-12 fl merge in 2.5 ssize_t changes
Florent Xicluna3e8c1892010-03-11 14:36:19 +000032 * 2007-08-25 fl call custom builder's close method from XMLParser
33 * 2007-08-31 fl added iter, extend from ET 1.3
34 * 2007-09-01 fl fixed ParseError exception, setslice source type, etc
35 * 2007-09-03 fl fixed handling of negative insert indexes
36 * 2007-09-04 fl added itertext from ET 1.3
37 * 2007-09-06 fl added position attribute to ParseError exception
38 * 2008-06-06 fl delay error reporting in iterparse (from Hrvoje Niksic)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000039 *
Florent Xicluna3e8c1892010-03-11 14:36:19 +000040 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
41 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000042 *
43 * info@pythonware.com
44 * http://www.pythonware.com
45 */
46
Fredrik Lundh6d52b552005-12-16 22:06:43 +000047/* Licensed to PSF under a Contributor Agreement. */
Florent Xicluna3e8c1892010-03-11 14:36:19 +000048/* See http://www.python.org/psf/license for licensing details. */
Fredrik Lundh6d52b552005-12-16 22:06:43 +000049
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000050#include "Python.h"
51
Fredrik Lundhdc075b92006-08-16 16:47:07 +000052#define VERSION "1.0.6"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000053
54/* -------------------------------------------------------------------- */
55/* configuration */
56
57/* Leave defined to include the expat-based XMLParser type */
58#define USE_EXPAT
59
Florent Xicluna3e8c1892010-03-11 14:36:19 +000060/* Define to do all expat calls via pyexpat's embedded expat library */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000061/* #define USE_PYEXPAT_CAPI */
62
63/* An element can hold this many children without extra memory
64 allocations. */
65#define STATIC_CHILDREN 4
66
67/* For best performance, chose a value so that 80-90% of all nodes
68 have no more than the given number of children. Set this to zero
69 to minimize the size of the element structure itself (this only
70 helps if you have lots of leaf nodes with attributes). */
71
72/* Also note that pymalloc always allocates blocks in multiples of
73 eight bytes. For the current version of cElementTree, this means
74 that the number of children should be an even number, at least on
75 32-bit platforms. */
76
77/* -------------------------------------------------------------------- */
78
79#if 0
80static int memory = 0;
81#define ALLOC(size, comment)\
82do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
83#define RELEASE(size, comment)\
84do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
85#else
86#define ALLOC(size, comment)
87#define RELEASE(size, comment)
88#endif
89
90/* compiler tweaks */
91#if defined(_MSC_VER)
92#define LOCAL(type) static __inline type __fastcall
93#else
94#define LOCAL(type) static type
95#endif
96
97/* compatibility macros */
Florent Xicluna3e8c1892010-03-11 14:36:19 +000098#if (PY_VERSION_HEX < 0x02060000)
99#define Py_REFCNT(ob) (((PyObject*)(ob))->ob_refcnt)
100#define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)
101#endif
102
Martin v. Löwis18e16552006-02-15 17:27:45 +0000103#if (PY_VERSION_HEX < 0x02050000)
104typedef int Py_ssize_t;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000105#define lenfunc inquiry
Martin v. Löwis18e16552006-02-15 17:27:45 +0000106#endif
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000107
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000108#if (PY_VERSION_HEX < 0x02040000)
109#define PyDict_CheckExact PyDict_Check
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000110
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000111#if !defined(Py_RETURN_NONE)
112#define Py_RETURN_NONE return Py_INCREF(Py_None), Py_None
113#endif
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000114#endif
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000115
116/* macros used to store 'join' flags in string object pointers. note
117 that all use of text and tail as object pointers must be wrapped in
118 JOIN_OBJ. see comments in the ElementObject definition for more
119 info. */
120#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
121#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
122#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~1))
123
124/* glue functions (see the init function for details) */
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000125static PyObject* elementtree_parseerror_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000126static PyObject* elementtree_copyelement_obj;
127static PyObject* elementtree_deepcopy_obj;
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000128static PyObject* elementtree_iter_obj;
129static PyObject* elementtree_itertext_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000130static PyObject* elementpath_obj;
131
132/* helpers */
133
Oren Milmanf15058a2017-10-11 16:29:12 +0300134/* Py_SETREF for a PyObject* that uses a join flag. */
135Py_LOCAL_INLINE(void)
136_set_joined_ptr(PyObject **p, PyObject *new_joined_ptr)
137{
138 PyObject *tmp = JOIN_OBJ(*p);
139 *p = new_joined_ptr;
140 Py_DECREF(tmp);
141}
142
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000143LOCAL(PyObject*)
144deepcopy(PyObject* object, PyObject* memo)
145{
146 /* do a deep copy of the given object */
147
148 PyObject* args;
149 PyObject* result;
150
151 if (!elementtree_deepcopy_obj) {
152 PyErr_SetString(
153 PyExc_RuntimeError,
154 "deepcopy helper not found"
155 );
156 return NULL;
157 }
158
159 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000160 if (!args)
161 return NULL;
162
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000163 Py_INCREF(object); PyTuple_SET_ITEM(args, 0, (PyObject*) object);
164 Py_INCREF(memo); PyTuple_SET_ITEM(args, 1, (PyObject*) memo);
165
166 result = PyObject_CallObject(elementtree_deepcopy_obj, args);
167
168 Py_DECREF(args);
169
170 return result;
171}
172
173LOCAL(PyObject*)
174list_join(PyObject* list)
175{
Serhiy Storchaka9c2c42c2017-04-02 20:37:03 +0300176 /* join list elements */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000177 PyObject* joiner;
178 PyObject* function;
179 PyObject* args;
180 PyObject* result;
181
182 switch (PyList_GET_SIZE(list)) {
183 case 0:
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000184 return PyString_FromString("");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000185 case 1:
186 result = PyList_GET_ITEM(list, 0);
187 Py_INCREF(result);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000188 return result;
189 }
190
191 /* two or more elements: slice out a suitable separator from the
192 first member, and use that to join the entire list */
193
194 joiner = PySequence_GetSlice(PyList_GET_ITEM(list, 0), 0, 0);
195 if (!joiner)
196 return NULL;
197
198 function = PyObject_GetAttrString(joiner, "join");
199 if (!function) {
200 Py_DECREF(joiner);
201 return NULL;
202 }
203
204 args = PyTuple_New(1);
Serhiy Storchaka9c2c42c2017-04-02 20:37:03 +0300205 if (!args) {
206 Py_DECREF(function);
207 Py_DECREF(joiner);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000208 return NULL;
Serhiy Storchaka9c2c42c2017-04-02 20:37:03 +0300209 }
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000210
Serhiy Storchaka9c2c42c2017-04-02 20:37:03 +0300211 Py_INCREF(list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000212 PyTuple_SET_ITEM(args, 0, list);
213
214 result = PyObject_CallObject(function, args);
215
216 Py_DECREF(args); /* also removes list */
217 Py_DECREF(function);
218 Py_DECREF(joiner);
219
220 return result;
221}
222
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000223/* -------------------------------------------------------------------- */
224/* the element type */
225
226typedef struct {
227
228 /* attributes (a dictionary object), or None if no attributes */
229 PyObject* attrib;
230
231 /* child elements */
232 int length; /* actual number of items */
233 int allocated; /* allocated items */
234
235 /* this either points to _children or to a malloced buffer */
236 PyObject* *children;
237
238 PyObject* _children[STATIC_CHILDREN];
239
240} ElementObjectExtra;
241
242typedef struct {
243 PyObject_HEAD
244
245 /* element tag (a string). */
246 PyObject* tag;
247
248 /* text before first child. note that this is a tagged pointer;
249 use JOIN_OBJ to get the object pointer. the join flag is used
250 to distinguish lists created by the tree builder from lists
251 assigned to the attribute by application code; the former
252 should be joined before being returned to the user, the latter
253 should be left intact. */
254 PyObject* text;
255
256 /* text after this element, in parent. note that this is a tagged
257 pointer; use JOIN_OBJ to get the object pointer. */
258 PyObject* tail;
259
260 ElementObjectExtra* extra;
261
262} ElementObject;
263
264staticforward PyTypeObject Element_Type;
265
Christian Heimese93237d2007-12-19 02:37:44 +0000266#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000267
268/* -------------------------------------------------------------------- */
269/* element constructor and destructor */
270
271LOCAL(int)
272element_new_extra(ElementObject* self, PyObject* attrib)
273{
274 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
275 if (!self->extra)
276 return -1;
277
278 if (!attrib)
279 attrib = Py_None;
280
281 Py_INCREF(attrib);
282 self->extra->attrib = attrib;
283
284 self->extra->length = 0;
285 self->extra->allocated = STATIC_CHILDREN;
286 self->extra->children = self->extra->_children;
287
288 return 0;
289}
290
291LOCAL(void)
292element_dealloc_extra(ElementObject* self)
293{
294 int i;
295
296 Py_DECREF(self->extra->attrib);
297
298 for (i = 0; i < self->extra->length; i++)
299 Py_DECREF(self->extra->children[i]);
300
301 if (self->extra->children != self->extra->_children)
302 PyObject_Free(self->extra->children);
303
304 PyObject_Free(self->extra);
305}
306
307LOCAL(PyObject*)
308element_new(PyObject* tag, PyObject* attrib)
309{
310 ElementObject* self;
311
312 self = PyObject_New(ElementObject, &Element_Type);
313 if (self == NULL)
314 return NULL;
315
316 /* use None for empty dictionaries */
317 if (PyDict_CheckExact(attrib) && !PyDict_Size(attrib))
318 attrib = Py_None;
319
320 self->extra = NULL;
321
322 if (attrib != Py_None) {
323
Neal Norwitzc6a989a2006-05-10 06:57:58 +0000324 if (element_new_extra(self, attrib) < 0) {
325 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000326 return NULL;
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000327 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000328
329 self->extra->length = 0;
330 self->extra->allocated = STATIC_CHILDREN;
331 self->extra->children = self->extra->_children;
332
333 }
334
335 Py_INCREF(tag);
336 self->tag = tag;
337
338 Py_INCREF(Py_None);
339 self->text = Py_None;
340
341 Py_INCREF(Py_None);
342 self->tail = Py_None;
343
344 ALLOC(sizeof(ElementObject), "create element");
345
346 return (PyObject*) self;
347}
348
349LOCAL(int)
Serhiy Storchakac4c64be2015-11-25 20:12:58 +0200350element_resize(ElementObject* self, Py_ssize_t extra)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000351{
Serhiy Storchakac4c64be2015-11-25 20:12:58 +0200352 Py_ssize_t size;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000353 PyObject* *children;
354
355 /* make sure self->children can hold the given number of extra
356 elements. set an exception and return -1 if allocation failed */
357
358 if (!self->extra)
359 element_new_extra(self, NULL);
360
361 size = self->extra->length + extra;
362
363 if (size > self->extra->allocated) {
364 /* use Python 2.4's list growth strategy */
365 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes87dcf3d2008-01-18 08:04:57 +0000366 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
367 * which needs at least 4 bytes.
368 * Although it's a false alarm always assume at least one child to
369 * be safe.
370 */
371 size = size ? size : 1;
Serhiy Storchakac4c64be2015-11-25 20:12:58 +0200372 if ((size_t)size > PY_SSIZE_T_MAX/sizeof(PyObject*))
373 goto nomemory;
374 if (size > INT_MAX) {
375 PyErr_SetString(PyExc_OverflowError,
376 "too many children");
377 return -1;
378 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000379 if (self->extra->children != self->extra->_children) {
Christian Heimes87dcf3d2008-01-18 08:04:57 +0000380 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
381 * "children", which needs at least 4 bytes. Although it's a
382 * false alarm always assume at least one child to be safe.
383 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000384 children = PyObject_Realloc(self->extra->children,
385 size * sizeof(PyObject*));
386 if (!children)
387 goto nomemory;
388 } else {
389 children = PyObject_Malloc(size * sizeof(PyObject*));
390 if (!children)
391 goto nomemory;
392 /* copy existing children from static area to malloc buffer */
393 memcpy(children, self->extra->children,
394 self->extra->length * sizeof(PyObject*));
395 }
396 self->extra->children = children;
397 self->extra->allocated = size;
398 }
399
400 return 0;
401
402 nomemory:
403 PyErr_NoMemory();
404 return -1;
405}
406
407LOCAL(int)
408element_add_subelement(ElementObject* self, PyObject* element)
409{
410 /* add a child element to a parent */
411
412 if (element_resize(self, 1) < 0)
413 return -1;
414
415 Py_INCREF(element);
416 self->extra->children[self->extra->length] = element;
417
418 self->extra->length++;
419
420 return 0;
421}
422
423LOCAL(PyObject*)
424element_get_attrib(ElementObject* self)
425{
426 /* return borrowed reference to attrib dictionary */
427 /* note: this function assumes that the extra section exists */
428
429 PyObject* res = self->extra->attrib;
430
431 if (res == Py_None) {
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000432 Py_DECREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000433 /* create missing dictionary */
434 res = PyDict_New();
435 if (!res)
436 return NULL;
437 self->extra->attrib = res;
438 }
439
440 return res;
441}
442
443LOCAL(PyObject*)
444element_get_text(ElementObject* self)
445{
446 /* return borrowed reference to text attribute */
447
Serhiy Storchaka9c2c42c2017-04-02 20:37:03 +0300448 PyObject *res = self->text;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000449
450 if (JOIN_GET(res)) {
451 res = JOIN_OBJ(res);
452 if (PyList_CheckExact(res)) {
Serhiy Storchaka9c2c42c2017-04-02 20:37:03 +0300453 PyObject *tmp = list_join(res);
454 if (!tmp)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000455 return NULL;
Serhiy Storchaka9c2c42c2017-04-02 20:37:03 +0300456 self->text = tmp;
457 Py_DECREF(res);
458 res = tmp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000459 }
460 }
461
462 return res;
463}
464
465LOCAL(PyObject*)
466element_get_tail(ElementObject* self)
467{
468 /* return borrowed reference to text attribute */
469
Serhiy Storchaka9c2c42c2017-04-02 20:37:03 +0300470 PyObject *res = self->tail;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000471
472 if (JOIN_GET(res)) {
473 res = JOIN_OBJ(res);
474 if (PyList_CheckExact(res)) {
Serhiy Storchaka9c2c42c2017-04-02 20:37:03 +0300475 PyObject *tmp = list_join(res);
476 if (!tmp)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000477 return NULL;
Serhiy Storchaka9c2c42c2017-04-02 20:37:03 +0300478 self->tail = tmp;
479 Py_DECREF(res);
480 res = tmp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000481 }
482 }
483
484 return res;
485}
486
487static PyObject*
488element(PyObject* self, PyObject* args, PyObject* kw)
489{
490 PyObject* elem;
491
492 PyObject* tag;
493 PyObject* attrib = NULL;
494 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag,
495 &PyDict_Type, &attrib))
496 return NULL;
497
498 if (attrib || kw) {
499 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
500 if (!attrib)
501 return NULL;
502 if (kw)
503 PyDict_Update(attrib, kw);
504 } else {
505 Py_INCREF(Py_None);
506 attrib = Py_None;
507 }
508
509 elem = element_new(tag, attrib);
510
511 Py_DECREF(attrib);
512
513 return elem;
514}
515
516static PyObject*
517subelement(PyObject* self, PyObject* args, PyObject* kw)
518{
519 PyObject* elem;
520
521 ElementObject* parent;
522 PyObject* tag;
523 PyObject* attrib = NULL;
524 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
525 &Element_Type, &parent, &tag,
526 &PyDict_Type, &attrib))
527 return NULL;
528
529 if (attrib || kw) {
530 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
531 if (!attrib)
532 return NULL;
533 if (kw)
534 PyDict_Update(attrib, kw);
535 } else {
536 Py_INCREF(Py_None);
537 attrib = Py_None;
538 }
539
540 elem = element_new(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000541 Py_DECREF(attrib);
Xiang Zhang9c0408d2017-03-22 14:32:52 +0800542 if (elem == NULL)
543 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000544
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000545 if (element_add_subelement(parent, elem) < 0) {
546 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000547 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000548 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000549
550 return elem;
551}
552
553static void
554element_dealloc(ElementObject* self)
555{
Serhiy Storchaka14518742016-12-28 09:23:17 +0200556 if (self->extra)
557 element_dealloc_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000558
559 /* discard attributes */
560 Py_DECREF(self->tag);
Serhiy Storchaka14518742016-12-28 09:23:17 +0200561 Py_DECREF(JOIN_OBJ(self->text));
562 Py_DECREF(JOIN_OBJ(self->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000563
564 RELEASE(sizeof(ElementObject), "destroy element");
565
566 PyObject_Del(self);
567}
568
569/* -------------------------------------------------------------------- */
570/* methods (in alphabetical order) */
571
572static PyObject*
573element_append(ElementObject* self, PyObject* args)
574{
575 PyObject* element;
576 if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element))
577 return NULL;
578
579 if (element_add_subelement(self, element) < 0)
580 return NULL;
581
582 Py_RETURN_NONE;
583}
584
585static PyObject*
586element_clear(ElementObject* self, PyObject* args)
587{
588 if (!PyArg_ParseTuple(args, ":clear"))
589 return NULL;
590
591 if (self->extra) {
592 element_dealloc_extra(self);
593 self->extra = NULL;
594 }
595
596 Py_INCREF(Py_None);
Oren Milmanf15058a2017-10-11 16:29:12 +0300597 _set_joined_ptr(&self->text, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000598
599 Py_INCREF(Py_None);
Oren Milmanf15058a2017-10-11 16:29:12 +0300600 _set_joined_ptr(&self->tail, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000601
602 Py_RETURN_NONE;
603}
604
605static PyObject*
606element_copy(ElementObject* self, PyObject* args)
607{
608 int i;
609 ElementObject* element;
610
611 if (!PyArg_ParseTuple(args, ":__copy__"))
612 return NULL;
613
614 element = (ElementObject*) element_new(
615 self->tag, (self->extra) ? self->extra->attrib : Py_None
616 );
617 if (!element)
618 return NULL;
619
Oren Milmanf15058a2017-10-11 16:29:12 +0300620 Py_INCREF(JOIN_OBJ(self->text));
621 _set_joined_ptr(&element->text, self->text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000622
Oren Milmanf15058a2017-10-11 16:29:12 +0300623 Py_INCREF(JOIN_OBJ(self->tail));
624 _set_joined_ptr(&element->tail, self->tail);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000625
626 if (self->extra) {
627
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000628 if (element_resize(element, self->extra->length) < 0) {
629 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000630 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000631 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000632
633 for (i = 0; i < self->extra->length; i++) {
634 Py_INCREF(self->extra->children[i]);
635 element->extra->children[i] = self->extra->children[i];
636 }
637
638 element->extra->length = self->extra->length;
639
640 }
641
642 return (PyObject*) element;
643}
644
645static PyObject*
646element_deepcopy(ElementObject* self, PyObject* args)
647{
648 int i;
649 ElementObject* element;
650 PyObject* tag;
651 PyObject* attrib;
652 PyObject* text;
653 PyObject* tail;
654 PyObject* id;
655
656 PyObject* memo;
657 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
658 return NULL;
659
660 tag = deepcopy(self->tag, memo);
661 if (!tag)
662 return NULL;
663
664 if (self->extra) {
665 attrib = deepcopy(self->extra->attrib, memo);
666 if (!attrib) {
667 Py_DECREF(tag);
668 return NULL;
669 }
670 } else {
671 Py_INCREF(Py_None);
672 attrib = Py_None;
673 }
674
675 element = (ElementObject*) element_new(tag, attrib);
676
677 Py_DECREF(tag);
678 Py_DECREF(attrib);
679
680 if (!element)
681 return NULL;
682
683 text = deepcopy(JOIN_OBJ(self->text), memo);
684 if (!text)
685 goto error;
Oren Milmanf15058a2017-10-11 16:29:12 +0300686 _set_joined_ptr(&element->text, JOIN_SET(text, JOIN_GET(self->text)));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000687
688 tail = deepcopy(JOIN_OBJ(self->tail), memo);
689 if (!tail)
690 goto error;
Oren Milmanf15058a2017-10-11 16:29:12 +0300691 _set_joined_ptr(&element->tail, JOIN_SET(tail, JOIN_GET(self->tail)));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000692
693 if (self->extra) {
694
695 if (element_resize(element, self->extra->length) < 0)
696 goto error;
697
698 for (i = 0; i < self->extra->length; i++) {
699 PyObject* child = deepcopy(self->extra->children[i], memo);
700 if (!child) {
701 element->extra->length = i;
702 goto error;
703 }
704 element->extra->children[i] = child;
705 }
706
707 element->extra->length = self->extra->length;
708
709 }
710
711 /* add object to memo dictionary (so deepcopy won't visit it again) */
712 id = PyInt_FromLong((Py_uintptr_t) self);
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000713 if (!id)
714 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000715
716 i = PyDict_SetItem(memo, id, (PyObject*) element);
717
718 Py_DECREF(id);
719
720 if (i < 0)
721 goto error;
722
723 return (PyObject*) element;
724
725 error:
726 Py_DECREF(element);
727 return NULL;
728}
729
730LOCAL(int)
731checkpath(PyObject* tag)
732{
Neal Norwitzc7074382006-06-12 02:06:17 +0000733 Py_ssize_t i;
734 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000735
736 /* check if a tag contains an xpath character */
737
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000738#define PATHCHAR(ch) \
739 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000740
741#if defined(Py_USING_UNICODE)
742 if (PyUnicode_Check(tag)) {
743 Py_UNICODE *p = PyUnicode_AS_UNICODE(tag);
744 for (i = 0; i < PyUnicode_GET_SIZE(tag); i++) {
745 if (p[i] == '{')
746 check = 0;
747 else if (p[i] == '}')
748 check = 1;
749 else if (check && PATHCHAR(p[i]))
750 return 1;
751 }
752 return 0;
753 }
754#endif
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000755 if (PyString_Check(tag)) {
756 char *p = PyString_AS_STRING(tag);
757 for (i = 0; i < PyString_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000758 if (p[i] == '{')
759 check = 0;
760 else if (p[i] == '}')
761 check = 1;
762 else if (check && PATHCHAR(p[i]))
763 return 1;
764 }
765 return 0;
766 }
767
768 return 1; /* unknown type; might be path expression */
769}
770
771static PyObject*
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000772element_extend(ElementObject* self, PyObject* args)
773{
774 PyObject* seq;
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300775 Py_ssize_t i;
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000776
777 PyObject* seq_in;
778 if (!PyArg_ParseTuple(args, "O:extend", &seq_in))
779 return NULL;
780
781 seq = PySequence_Fast(seq_in, "");
782 if (!seq) {
783 PyErr_Format(
784 PyExc_TypeError,
785 "expected sequence, not \"%.200s\"", Py_TYPE(seq_in)->tp_name
786 );
787 return NULL;
788 }
789
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300790 for (i = 0; i < PySequence_Fast_GET_SIZE(seq); i++) {
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000791 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
792 if (element_add_subelement(self, element) < 0) {
793 Py_DECREF(seq);
794 return NULL;
795 }
796 }
797
798 Py_DECREF(seq);
799
800 Py_RETURN_NONE;
801}
802
803static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000804element_find(ElementObject* self, PyObject* args)
805{
806 int i;
807
808 PyObject* tag;
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000809 PyObject* namespaces = Py_None;
810 if (!PyArg_ParseTuple(args, "O|O:find", &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000811 return NULL;
812
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000813 if (checkpath(tag) || namespaces != Py_None)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000814 return PyObject_CallMethod(
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000815 elementpath_obj, "find", "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000816 );
817
818 if (!self->extra)
819 Py_RETURN_NONE;
820
821 for (i = 0; i < self->extra->length; i++) {
822 PyObject* item = self->extra->children[i];
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300823 int rc;
824 if (!Element_CheckExact(item))
825 continue;
826 Py_INCREF(item);
827 rc = PyObject_Compare(((ElementObject*)item)->tag, tag);
828 if (rc == 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000829 return item;
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300830 Py_DECREF(item);
831 if (rc < 0 && PyErr_Occurred())
832 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000833 }
834
835 Py_RETURN_NONE;
836}
837
838static PyObject*
839element_findtext(ElementObject* self, PyObject* args)
840{
841 int i;
842
843 PyObject* tag;
844 PyObject* default_value = Py_None;
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000845 PyObject* namespaces = Py_None;
846 if (!PyArg_ParseTuple(args, "O|OO:findtext", &tag, &default_value, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000847 return NULL;
848
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000849 if (checkpath(tag) || namespaces != Py_None)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000850 return PyObject_CallMethod(
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000851 elementpath_obj, "findtext", "OOOO", self, tag, default_value, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000852 );
853
854 if (!self->extra) {
855 Py_INCREF(default_value);
856 return default_value;
857 }
858
859 for (i = 0; i < self->extra->length; i++) {
860 ElementObject* item = (ElementObject*) self->extra->children[i];
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300861 int rc;
862 if (!Element_CheckExact(item))
863 continue;
864 Py_INCREF(item);
865 rc = PyObject_Compare(item->tag, tag);
866 if (rc == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000867 PyObject* text = element_get_text(item);
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300868 if (text == Py_None) {
869 Py_DECREF(item);
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000870 return PyString_FromString("");
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300871 }
Neal Norwitz6f5ff3f2006-08-12 01:43:40 +0000872 Py_XINCREF(text);
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300873 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000874 return text;
875 }
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300876 Py_DECREF(item);
877 if (rc < 0 && PyErr_Occurred())
878 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000879 }
880
881 Py_INCREF(default_value);
882 return default_value;
883}
884
885static PyObject*
886element_findall(ElementObject* self, PyObject* args)
887{
888 int i;
889 PyObject* out;
890
891 PyObject* tag;
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000892 PyObject* namespaces = Py_None;
893 if (!PyArg_ParseTuple(args, "O|O:findall", &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000894 return NULL;
895
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000896 if (checkpath(tag) || namespaces != Py_None)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000897 return PyObject_CallMethod(
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000898 elementpath_obj, "findall", "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000899 );
900
901 out = PyList_New(0);
902 if (!out)
903 return NULL;
904
905 if (!self->extra)
906 return out;
907
908 for (i = 0; i < self->extra->length; i++) {
909 PyObject* item = self->extra->children[i];
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300910 int rc;
911 if (!Element_CheckExact(item))
912 continue;
913 Py_INCREF(item);
914 rc = PyObject_Compare(((ElementObject*)item)->tag, tag);
915 if (rc == 0)
916 rc = PyList_Append(out, item);
917 Py_DECREF(item);
918 if (rc < 0 && PyErr_Occurred()) {
919 Py_DECREF(out);
920 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000921 }
922 }
923
924 return out;
925}
926
927static PyObject*
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000928element_iterfind(ElementObject* self, PyObject* args)
929{
930 PyObject* tag;
931 PyObject* namespaces = Py_None;
932 if (!PyArg_ParseTuple(args, "O|O:iterfind", &tag, &namespaces))
933 return NULL;
934
935 return PyObject_CallMethod(
936 elementpath_obj, "iterfind", "OOO", self, tag, namespaces
937 );
938}
939
940static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000941element_get(ElementObject* self, PyObject* args)
942{
943 PyObject* value;
944
945 PyObject* key;
946 PyObject* default_value = Py_None;
947 if (!PyArg_ParseTuple(args, "O|O:get", &key, &default_value))
948 return NULL;
949
950 if (!self->extra || self->extra->attrib == Py_None)
951 value = default_value;
952 else {
953 value = PyDict_GetItem(self->extra->attrib, key);
954 if (!value)
955 value = default_value;
956 }
957
958 Py_INCREF(value);
959 return value;
960}
961
962static PyObject*
963element_getchildren(ElementObject* self, PyObject* args)
964{
965 int i;
966 PyObject* list;
967
Serhiy Storchaka09b52472017-05-17 10:08:11 +0300968 if (PyErr_WarnPy3k("This method will be removed in future versions. "
969 "Use 'list(elem)' or iteration over elem instead.",
970 1) < 0) {
971 return NULL;
972 }
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000973
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000974 if (!PyArg_ParseTuple(args, ":getchildren"))
975 return NULL;
976
977 if (!self->extra)
978 return PyList_New(0);
979
980 list = PyList_New(self->extra->length);
981 if (!list)
982 return NULL;
983
984 for (i = 0; i < self->extra->length; i++) {
985 PyObject* item = self->extra->children[i];
986 Py_INCREF(item);
987 PyList_SET_ITEM(list, i, item);
988 }
989
990 return list;
991}
992
993static PyObject*
Serhiy Storchaka09b52472017-05-17 10:08:11 +0300994element_iter_impl(ElementObject* self, PyObject* tag)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000995{
Serhiy Storchaka09b52472017-05-17 10:08:11 +0300996 PyObject* args;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000997 PyObject* result;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000998
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000999 if (!elementtree_iter_obj) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001000 PyErr_SetString(
1001 PyExc_RuntimeError,
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001002 "iter helper not found"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001003 );
1004 return NULL;
1005 }
1006
1007 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001008 if (!args)
1009 return NULL;
Neal Norwitz02876df2006-02-07 06:58:52 +00001010
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001011 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
1012 Py_INCREF(tag); PyTuple_SET_ITEM(args, 1, (PyObject*) tag);
1013
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001014 result = PyObject_CallObject(elementtree_iter_obj, args);
1015
1016 Py_DECREF(args);
1017
1018 return result;
1019}
1020
Serhiy Storchaka09b52472017-05-17 10:08:11 +03001021static PyObject*
1022element_iter(ElementObject* self, PyObject* args)
1023{
1024 PyObject* tag = Py_None;
1025 if (!PyArg_ParseTuple(args, "|O:iter", &tag))
1026 return NULL;
1027
1028 return element_iter_impl(self, tag);
1029}
1030
1031static PyObject*
1032element_getiterator(ElementObject* self, PyObject* args)
1033{
1034 PyObject* tag = Py_None;
1035 if (!PyArg_ParseTuple(args, "|O:getiterator", &tag))
1036 return NULL;
1037
1038 /* Change for a DeprecationWarning in 1.4 */
1039 if (Py_Py3kWarningFlag &&
1040 PyErr_WarnEx(PyExc_PendingDeprecationWarning,
1041 "This method will be removed in future versions. "
1042 "Use 'tree.iter()' or 'list(tree.iter())' instead.",
1043 1) < 0) {
1044 return NULL;
1045 }
1046 return element_iter_impl(self, tag);
1047}
1048
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001049
1050static PyObject*
1051element_itertext(ElementObject* self, PyObject* args)
1052{
1053 PyObject* result;
1054
1055 if (!PyArg_ParseTuple(args, ":itertext"))
1056 return NULL;
1057
1058 if (!elementtree_itertext_obj) {
1059 PyErr_SetString(
1060 PyExc_RuntimeError,
1061 "itertext helper not found"
1062 );
1063 return NULL;
1064 }
1065
1066 args = PyTuple_New(1);
1067 if (!args)
1068 return NULL;
1069
1070 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
1071
1072 result = PyObject_CallObject(elementtree_itertext_obj, args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001073
1074 Py_DECREF(args);
1075
1076 return result;
1077}
1078
1079static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001080element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001081{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001082 ElementObject* self = (ElementObject*) self_;
1083
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001084 if (!self->extra || index < 0 || index >= self->extra->length) {
1085 PyErr_SetString(
1086 PyExc_IndexError,
1087 "child index out of range"
1088 );
1089 return NULL;
1090 }
1091
1092 Py_INCREF(self->extra->children[index]);
1093 return self->extra->children[index];
1094}
1095
1096static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001097element_insert(ElementObject* self, PyObject* args)
1098{
1099 int i;
1100
1101 int index;
1102 PyObject* element;
1103 if (!PyArg_ParseTuple(args, "iO!:insert", &index,
1104 &Element_Type, &element))
1105 return NULL;
1106
1107 if (!self->extra)
1108 element_new_extra(self, NULL);
1109
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001110 if (index < 0) {
1111 index += self->extra->length;
1112 if (index < 0)
1113 index = 0;
1114 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001115 if (index > self->extra->length)
1116 index = self->extra->length;
1117
1118 if (element_resize(self, 1) < 0)
1119 return NULL;
1120
1121 for (i = self->extra->length; i > index; i--)
1122 self->extra->children[i] = self->extra->children[i-1];
1123
1124 Py_INCREF(element);
1125 self->extra->children[index] = element;
1126
1127 self->extra->length++;
1128
1129 Py_RETURN_NONE;
1130}
1131
1132static PyObject*
1133element_items(ElementObject* self, PyObject* args)
1134{
1135 if (!PyArg_ParseTuple(args, ":items"))
1136 return NULL;
1137
1138 if (!self->extra || self->extra->attrib == Py_None)
1139 return PyList_New(0);
1140
1141 return PyDict_Items(self->extra->attrib);
1142}
1143
1144static PyObject*
1145element_keys(ElementObject* self, PyObject* args)
1146{
1147 if (!PyArg_ParseTuple(args, ":keys"))
1148 return NULL;
1149
1150 if (!self->extra || self->extra->attrib == Py_None)
1151 return PyList_New(0);
1152
1153 return PyDict_Keys(self->extra->attrib);
1154}
1155
Martin v. Löwis18e16552006-02-15 17:27:45 +00001156static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001157element_length(ElementObject* self)
1158{
1159 if (!self->extra)
1160 return 0;
1161
1162 return self->extra->length;
1163}
1164
1165static PyObject*
1166element_makeelement(PyObject* self, PyObject* args, PyObject* kw)
1167{
1168 PyObject* elem;
1169
1170 PyObject* tag;
1171 PyObject* attrib;
1172 if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib))
1173 return NULL;
1174
1175 attrib = PyDict_Copy(attrib);
1176 if (!attrib)
1177 return NULL;
1178
1179 elem = element_new(tag, attrib);
1180
1181 Py_DECREF(attrib);
1182
1183 return elem;
1184}
1185
1186static PyObject*
1187element_reduce(ElementObject* self, PyObject* args)
1188{
1189 if (!PyArg_ParseTuple(args, ":__reduce__"))
1190 return NULL;
1191
1192 /* Hack alert: This method is used to work around a __copy__
1193 problem on certain 2.3 and 2.4 versions. To save time and
1194 simplify the code, we create the copy in here, and use a dummy
1195 copyelement helper to trick the copy module into doing the
1196 right thing. */
1197
1198 if (!elementtree_copyelement_obj) {
1199 PyErr_SetString(
1200 PyExc_RuntimeError,
1201 "copyelement helper not found"
1202 );
1203 return NULL;
1204 }
1205
1206 return Py_BuildValue(
1207 "O(N)", elementtree_copyelement_obj, element_copy(self, args)
1208 );
1209}
1210
1211static PyObject*
1212element_remove(ElementObject* self, PyObject* args)
1213{
1214 int i;
Serhiy Storchaka25598f32015-05-18 18:28:57 +03001215 int rc;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001216 PyObject* element;
Serhiy Storchaka25598f32015-05-18 18:28:57 +03001217 PyObject* found;
1218
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001219 if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element))
1220 return NULL;
1221
1222 if (!self->extra) {
1223 /* element has no children, so raise exception */
1224 PyErr_SetString(
1225 PyExc_ValueError,
1226 "list.remove(x): x not in list"
1227 );
1228 return NULL;
1229 }
1230
1231 for (i = 0; i < self->extra->length; i++) {
1232 if (self->extra->children[i] == element)
1233 break;
Serhiy Storchaka25598f32015-05-18 18:28:57 +03001234 rc = PyObject_Compare(self->extra->children[i], element);
1235 if (rc == 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001236 break;
Serhiy Storchaka25598f32015-05-18 18:28:57 +03001237 if (rc < 0 && PyErr_Occurred())
1238 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001239 }
1240
Serhiy Storchaka25598f32015-05-18 18:28:57 +03001241 if (i >= self->extra->length) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001242 /* element is not in children, so raise exception */
1243 PyErr_SetString(
1244 PyExc_ValueError,
1245 "list.remove(x): x not in list"
1246 );
1247 return NULL;
1248 }
1249
Serhiy Storchaka25598f32015-05-18 18:28:57 +03001250 found = self->extra->children[i];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001251
1252 self->extra->length--;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001253 for (; i < self->extra->length; i++)
1254 self->extra->children[i] = self->extra->children[i+1];
1255
Serhiy Storchaka25598f32015-05-18 18:28:57 +03001256 Py_DECREF(found);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001257 Py_RETURN_NONE;
1258}
1259
1260static PyObject*
1261element_repr(ElementObject* self)
1262{
Serhiy Storchaka1f7586e2016-06-12 10:06:32 +03001263 int status;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001264
Serhiy Storchaka1f7586e2016-06-12 10:06:32 +03001265 if (self->tag == NULL)
1266 return PyUnicode_FromFormat("<Element at %p>", self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001267
Serhiy Storchaka1f7586e2016-06-12 10:06:32 +03001268 status = Py_ReprEnter((PyObject *)self);
1269 if (status == 0) {
1270 PyObject *repr, *tag;
1271 tag = PyObject_Repr(self->tag);
Stéphane Wirtel41af9422017-06-12 15:30:48 +02001272 if (!tag) {
1273 Py_ReprLeave((PyObject *)self);
Serhiy Storchaka1f7586e2016-06-12 10:06:32 +03001274 return NULL;
Stéphane Wirtel41af9422017-06-12 15:30:48 +02001275 }
Florent Xiclunae2e81e82010-03-11 15:55:11 +00001276
Serhiy Storchaka1f7586e2016-06-12 10:06:32 +03001277 repr = PyString_FromFormat("<Element %s at %p>",
1278 PyString_AS_STRING(tag), self);
Benjamin Petersond7324bc2016-12-03 11:30:04 -08001279 Py_ReprLeave((PyObject *)self);
Serhiy Storchaka1f7586e2016-06-12 10:06:32 +03001280 Py_DECREF(tag);
1281 return repr;
1282 }
1283 if (status > 0)
1284 PyErr_Format(PyExc_RuntimeError,
1285 "reentrant call inside %s.__repr__",
1286 Py_TYPE(self)->tp_name);
1287 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001288}
1289
1290static PyObject*
1291element_set(ElementObject* self, PyObject* args)
1292{
1293 PyObject* attrib;
1294
1295 PyObject* key;
1296 PyObject* value;
1297 if (!PyArg_ParseTuple(args, "OO:set", &key, &value))
1298 return NULL;
1299
1300 if (!self->extra)
1301 element_new_extra(self, NULL);
1302
1303 attrib = element_get_attrib(self);
1304 if (!attrib)
1305 return NULL;
1306
1307 if (PyDict_SetItem(attrib, key, value) < 0)
1308 return NULL;
1309
1310 Py_RETURN_NONE;
1311}
1312
1313static int
Serhiy Storchakab5b76c32015-11-26 11:21:47 +02001314element_setitem(PyObject* self_, Py_ssize_t index_, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001315{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001316 ElementObject* self = (ElementObject*) self_;
Serhiy Storchakac4c64be2015-11-25 20:12:58 +02001317 int i, index;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001318 PyObject* old;
1319
Serhiy Storchakac4c64be2015-11-25 20:12:58 +02001320 if (!self->extra || index_ < 0 || index_ >= self->extra->length) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001321 PyErr_SetString(
1322 PyExc_IndexError,
1323 "child assignment index out of range");
1324 return -1;
1325 }
Serhiy Storchakac4c64be2015-11-25 20:12:58 +02001326 index = (int)index_;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001327
1328 old = self->extra->children[index];
1329
1330 if (item) {
1331 Py_INCREF(item);
1332 self->extra->children[index] = item;
1333 } else {
1334 self->extra->length--;
1335 for (i = index; i < self->extra->length; i++)
1336 self->extra->children[i] = self->extra->children[i+1];
1337 }
1338
1339 Py_DECREF(old);
1340
1341 return 0;
1342}
1343
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001344static PyObject*
1345element_subscr(PyObject* self_, PyObject* item)
1346{
1347 ElementObject* self = (ElementObject*) self_;
1348
1349#if (PY_VERSION_HEX < 0x02050000)
1350 if (PyInt_Check(item) || PyLong_Check(item)) {
1351 long i = PyInt_AsLong(item);
1352#else
1353 if (PyIndex_Check(item)) {
1354 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1355#endif
1356
1357 if (i == -1 && PyErr_Occurred()) {
1358 return NULL;
1359 }
1360 if (i < 0 && self->extra)
1361 i += self->extra->length;
1362 return element_getitem(self_, i);
1363 }
1364 else if (PySlice_Check(item)) {
1365 Py_ssize_t start, stop, step, slicelen, cur, i;
1366 PyObject* list;
1367
1368 if (!self->extra)
1369 return PyList_New(0);
1370
Serhiy Storchaka5e793212017-04-15 20:11:12 +03001371 if (_PySlice_Unpack(item, &start, &stop, &step) < 0) {
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001372 return NULL;
1373 }
Serhiy Storchakae41390a2017-04-08 11:48:57 +03001374 slicelen = _PySlice_AdjustIndices(self->extra->length, &start, &stop,
1375 step);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001376
1377 if (slicelen <= 0)
1378 return PyList_New(0);
1379 else {
1380 list = PyList_New(slicelen);
1381 if (!list)
1382 return NULL;
1383
1384 for (cur = start, i = 0; i < slicelen;
1385 cur += step, i++) {
1386 PyObject* item = self->extra->children[cur];
1387 Py_INCREF(item);
1388 PyList_SET_ITEM(list, i, item);
1389 }
1390
1391 return list;
1392 }
1393 }
1394 else {
1395 PyErr_SetString(PyExc_TypeError,
1396 "element indices must be integers");
1397 return NULL;
1398 }
1399}
1400
1401static int
1402element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1403{
1404 ElementObject* self = (ElementObject*) self_;
1405
1406#if (PY_VERSION_HEX < 0x02050000)
1407 if (PyInt_Check(item) || PyLong_Check(item)) {
1408 long i = PyInt_AsLong(item);
1409#else
1410 if (PyIndex_Check(item)) {
1411 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1412#endif
1413
1414 if (i == -1 && PyErr_Occurred()) {
1415 return -1;
1416 }
1417 if (i < 0 && self->extra)
1418 i += self->extra->length;
1419 return element_setitem(self_, i, value);
1420 }
1421 else if (PySlice_Check(item)) {
1422 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1423
1424 PyObject* recycle = NULL;
1425 PyObject* seq = NULL;
1426
1427 if (!self->extra)
1428 element_new_extra(self, NULL);
1429
Serhiy Storchaka5e793212017-04-15 20:11:12 +03001430 if (_PySlice_Unpack(item, &start, &stop, &step) < 0) {
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001431 return -1;
1432 }
Serhiy Storchakae41390a2017-04-08 11:48:57 +03001433 slicelen = _PySlice_AdjustIndices(self->extra->length, &start, &stop,
1434 step);
Serhiy Storchakac4c64be2015-11-25 20:12:58 +02001435 assert(slicelen <= self->extra->length);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001436
1437 if (value == NULL)
1438 newlen = 0;
1439 else {
1440 seq = PySequence_Fast(value, "");
1441 if (!seq) {
1442 PyErr_Format(
1443 PyExc_TypeError,
1444 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1445 );
1446 return -1;
1447 }
1448 newlen = PySequence_Size(seq);
1449 }
1450
1451 if (step != 1 && newlen != slicelen)
1452 {
Serhiy Storchakaa0ae9ff2015-11-22 12:31:11 +02001453 Py_XDECREF(seq);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001454 PyErr_Format(PyExc_ValueError,
1455#if (PY_VERSION_HEX < 0x02050000)
1456 "attempt to assign sequence of size %d "
1457 "to extended slice of size %d",
Serhiy Storchakaa0ae9ff2015-11-22 12:31:11 +02001458 (int)newlen, (int)slicelen
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001459#else
1460 "attempt to assign sequence of size %zd "
1461 "to extended slice of size %zd",
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001462 newlen, slicelen
Serhiy Storchakaa0ae9ff2015-11-22 12:31:11 +02001463#endif
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001464 );
1465 return -1;
1466 }
1467
1468
1469 /* Resize before creating the recycle bin, to prevent refleaks. */
1470 if (newlen > slicelen) {
1471 if (element_resize(self, newlen - slicelen) < 0) {
Serhiy Storchakaa0ae9ff2015-11-22 12:31:11 +02001472 Py_XDECREF(seq);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001473 return -1;
1474 }
1475 }
Serhiy Storchakac4c64be2015-11-25 20:12:58 +02001476 assert(newlen - slicelen <= INT_MAX - self->extra->length);
1477 assert(newlen - slicelen >= -self->extra->length);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001478
1479 if (slicelen > 0) {
1480 /* to avoid recursive calls to this method (via decref), move
1481 old items to the recycle bin here, and get rid of them when
1482 we're done modifying the element */
1483 recycle = PyList_New(slicelen);
1484 if (!recycle) {
Serhiy Storchakaa0ae9ff2015-11-22 12:31:11 +02001485 Py_XDECREF(seq);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001486 return -1;
1487 }
1488 for (cur = start, i = 0; i < slicelen;
1489 cur += step, i++)
1490 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1491 }
1492
1493 if (newlen < slicelen) {
1494 /* delete slice */
1495 for (i = stop; i < self->extra->length; i++)
1496 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1497 } else if (newlen > slicelen) {
1498 /* insert slice */
1499 for (i = self->extra->length-1; i >= stop; i--)
1500 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1501 }
1502
1503 /* replace the slice */
1504 for (cur = start, i = 0; i < newlen;
1505 cur += step, i++) {
1506 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1507 Py_INCREF(element);
1508 self->extra->children[cur] = element;
1509 }
1510
Serhiy Storchakac4c64be2015-11-25 20:12:58 +02001511 self->extra->length += (int)(newlen - slicelen);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001512
Serhiy Storchakaa0ae9ff2015-11-22 12:31:11 +02001513 Py_XDECREF(seq);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001514
1515 /* discard the recycle bin, and everything in it */
1516 Py_XDECREF(recycle);
1517
1518 return 0;
1519 }
1520 else {
1521 PyErr_SetString(PyExc_TypeError,
1522 "element indices must be integers");
1523 return -1;
1524 }
1525}
1526
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001527static PyMethodDef element_methods[] = {
1528
1529 {"clear", (PyCFunction) element_clear, METH_VARARGS},
1530
1531 {"get", (PyCFunction) element_get, METH_VARARGS},
1532 {"set", (PyCFunction) element_set, METH_VARARGS},
1533
1534 {"find", (PyCFunction) element_find, METH_VARARGS},
1535 {"findtext", (PyCFunction) element_findtext, METH_VARARGS},
1536 {"findall", (PyCFunction) element_findall, METH_VARARGS},
1537
1538 {"append", (PyCFunction) element_append, METH_VARARGS},
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001539 {"extend", (PyCFunction) element_extend, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001540 {"insert", (PyCFunction) element_insert, METH_VARARGS},
1541 {"remove", (PyCFunction) element_remove, METH_VARARGS},
1542
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001543 {"iter", (PyCFunction) element_iter, METH_VARARGS},
1544 {"itertext", (PyCFunction) element_itertext, METH_VARARGS},
1545 {"iterfind", (PyCFunction) element_iterfind, METH_VARARGS},
1546
Serhiy Storchaka09b52472017-05-17 10:08:11 +03001547 {"getiterator", (PyCFunction) element_getiterator, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001548 {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS},
1549
1550 {"items", (PyCFunction) element_items, METH_VARARGS},
1551 {"keys", (PyCFunction) element_keys, METH_VARARGS},
1552
1553 {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS},
1554
1555 {"__copy__", (PyCFunction) element_copy, METH_VARARGS},
1556 {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS},
1557
1558 /* Some 2.3 and 2.4 versions do not handle the __copy__ method on
1559 C objects correctly, so we have to fake it using a __reduce__-
1560 based hack (see the element_reduce implementation above for
1561 details). */
1562
1563 /* The behaviour has been changed in 2.3.5 and 2.4.1, so we're
1564 using a runtime test to figure out if we need to fake things
1565 or now (see the init code below). The following entry is
1566 enabled only if the hack is needed. */
1567
1568 {"!__reduce__", (PyCFunction) element_reduce, METH_VARARGS},
1569
1570 {NULL, NULL}
1571};
1572
1573static PyObject*
1574element_getattr(ElementObject* self, char* name)
1575{
1576 PyObject* res;
1577
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001578 /* handle common attributes first */
1579 if (strcmp(name, "tag") == 0) {
1580 res = self->tag;
1581 Py_INCREF(res);
1582 return res;
1583 } else if (strcmp(name, "text") == 0) {
1584 res = element_get_text(self);
Xiang Zhang827c7832017-03-22 12:25:51 +08001585 Py_XINCREF(res);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001586 return res;
1587 }
1588
1589 /* methods */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001590 res = Py_FindMethod(element_methods, (PyObject*) self, name);
1591 if (res)
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001592 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001593
1594 PyErr_Clear();
1595
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001596 /* less common attributes */
1597 if (strcmp(name, "tail") == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001598 res = element_get_tail(self);
1599 } else if (strcmp(name, "attrib") == 0) {
1600 if (!self->extra)
1601 element_new_extra(self, NULL);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001602 res = element_get_attrib(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001603 } else {
1604 PyErr_SetString(PyExc_AttributeError, name);
1605 return NULL;
1606 }
1607
1608 if (!res)
1609 return NULL;
1610
1611 Py_INCREF(res);
1612 return res;
1613}
1614
1615static int
1616element_setattr(ElementObject* self, const char* name, PyObject* value)
1617{
1618 if (value == NULL) {
1619 PyErr_SetString(
1620 PyExc_AttributeError,
1621 "can't delete element attributes"
1622 );
1623 return -1;
1624 }
1625
1626 if (strcmp(name, "tag") == 0) {
Serhiy Storchaka2e6c8292015-12-27 15:41:58 +02001627 Py_INCREF(value);
Serhiy Storchaka763a61c2016-04-10 18:05:12 +03001628 Py_SETREF(self->tag, value);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001629 } else if (strcmp(name, "text") == 0) {
Oren Milmanf15058a2017-10-11 16:29:12 +03001630 Py_INCREF(value);
1631 _set_joined_ptr(&self->text, value);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001632 } else if (strcmp(name, "tail") == 0) {
Oren Milmanf15058a2017-10-11 16:29:12 +03001633 Py_INCREF(value);
1634 _set_joined_ptr(&self->tail, value);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001635 } else if (strcmp(name, "attrib") == 0) {
1636 if (!self->extra)
1637 element_new_extra(self, NULL);
Serhiy Storchaka2e6c8292015-12-27 15:41:58 +02001638 Py_INCREF(value);
Serhiy Storchaka763a61c2016-04-10 18:05:12 +03001639 Py_SETREF(self->extra->attrib, value);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001640 } else {
1641 PyErr_SetString(PyExc_AttributeError, name);
1642 return -1;
1643 }
1644
1645 return 0;
1646}
1647
1648static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001649 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001650 0, /* sq_concat */
1651 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001652 element_getitem,
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001653 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001654 element_setitem,
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001655 0,
1656};
1657
1658static PyMappingMethods element_as_mapping = {
1659 (lenfunc) element_length,
1660 (binaryfunc) element_subscr,
1661 (objobjargproc) element_ass_subscr,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001662};
1663
1664statichere PyTypeObject Element_Type = {
Benjamin Petersona72d15c2017-09-13 21:20:29 -07001665 PyVarObject_HEAD_INIT(NULL, 0)
1666 "Element", sizeof(ElementObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001667 /* methods */
1668 (destructor)element_dealloc, /* tp_dealloc */
1669 0, /* tp_print */
1670 (getattrfunc)element_getattr, /* tp_getattr */
1671 (setattrfunc)element_setattr, /* tp_setattr */
1672 0, /* tp_compare */
1673 (reprfunc)element_repr, /* tp_repr */
1674 0, /* tp_as_number */
1675 &element_as_sequence, /* tp_as_sequence */
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001676 &element_as_mapping, /* tp_as_mapping */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001677};
1678
1679/* ==================================================================== */
1680/* the tree builder type */
1681
1682typedef struct {
1683 PyObject_HEAD
1684
1685 PyObject* root; /* root node (first created node) */
1686
1687 ElementObject* this; /* current node */
1688 ElementObject* last; /* most recently created node */
1689
1690 PyObject* data; /* data collector (string or list), or NULL */
1691
1692 PyObject* stack; /* element stack */
Neal Norwitzc7074382006-06-12 02:06:17 +00001693 Py_ssize_t index; /* current stack size (0=empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001694
1695 /* element tracing */
1696 PyObject* events; /* list of events, or NULL if not collecting */
1697 PyObject* start_event_obj; /* event objects (NULL to ignore) */
1698 PyObject* end_event_obj;
1699 PyObject* start_ns_event_obj;
1700 PyObject* end_ns_event_obj;
1701
1702} TreeBuilderObject;
1703
1704staticforward PyTypeObject TreeBuilder_Type;
1705
Christian Heimese93237d2007-12-19 02:37:44 +00001706#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001707
1708/* -------------------------------------------------------------------- */
1709/* constructor and destructor */
1710
1711LOCAL(PyObject*)
1712treebuilder_new(void)
1713{
1714 TreeBuilderObject* self;
1715
1716 self = PyObject_New(TreeBuilderObject, &TreeBuilder_Type);
1717 if (self == NULL)
1718 return NULL;
1719
1720 self->root = NULL;
1721
1722 Py_INCREF(Py_None);
1723 self->this = (ElementObject*) Py_None;
1724
1725 Py_INCREF(Py_None);
1726 self->last = (ElementObject*) Py_None;
1727
1728 self->data = NULL;
1729
1730 self->stack = PyList_New(20);
1731 self->index = 0;
1732
1733 self->events = NULL;
1734 self->start_event_obj = self->end_event_obj = NULL;
1735 self->start_ns_event_obj = self->end_ns_event_obj = NULL;
1736
1737 ALLOC(sizeof(TreeBuilderObject), "create treebuilder");
1738
1739 return (PyObject*) self;
1740}
1741
1742static PyObject*
Fredrik Lundh81707f12006-06-03 21:56:05 +00001743treebuilder(PyObject* self_, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001744{
1745 if (!PyArg_ParseTuple(args, ":TreeBuilder"))
1746 return NULL;
1747
1748 return treebuilder_new();
1749}
1750
1751static void
1752treebuilder_dealloc(TreeBuilderObject* self)
1753{
1754 Py_XDECREF(self->end_ns_event_obj);
1755 Py_XDECREF(self->start_ns_event_obj);
1756 Py_XDECREF(self->end_event_obj);
1757 Py_XDECREF(self->start_event_obj);
1758 Py_XDECREF(self->events);
1759 Py_DECREF(self->stack);
1760 Py_XDECREF(self->data);
1761 Py_DECREF(self->last);
1762 Py_DECREF(self->this);
1763 Py_XDECREF(self->root);
1764
1765 RELEASE(sizeof(TreeBuilderObject), "destroy treebuilder");
1766
1767 PyObject_Del(self);
1768}
1769
Serhiy Storchaka9c2c42c2017-04-02 20:37:03 +03001770/* -------------------------------------------------------------------- */
1771/* helpers for handling of arbitrary element-like objects */
1772
1773static void
1774treebuilder_set_element_text_or_tail(PyObject **data, PyObject **dest)
1775{
1776 PyObject *tmp = JOIN_OBJ(*dest);
1777 *dest = JOIN_SET(*data, PyList_CheckExact(*data));
1778 *data = NULL;
1779 Py_DECREF(tmp);
1780}
1781
1782LOCAL(void)
1783treebuilder_flush_data(TreeBuilderObject* self)
1784{
1785 ElementObject *element = self->last;
1786
1787 if (self->data) {
1788 if (self->this == element) {
1789 treebuilder_set_element_text_or_tail(
1790 &self->data,
1791 &element->text);
1792 }
1793 else {
1794 treebuilder_set_element_text_or_tail(
1795 &self->data,
1796 &element->tail);
1797 }
1798 }
1799}
1800
Serhiy Storchaka45cf0b72015-12-06 23:51:53 +02001801LOCAL(int)
1802treebuilder_append_event(TreeBuilderObject *self, PyObject *action,
1803 PyObject *node)
1804{
1805 if (action != NULL) {
1806 PyObject *res = PyTuple_Pack(2, action, node);
1807 if (res == NULL)
1808 return -1;
1809 if (PyList_Append(self->events, res) < 0) {
1810 Py_DECREF(res);
1811 return -1;
1812 }
1813 Py_DECREF(res);
1814 }
1815 return 0;
1816}
1817
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001818/* -------------------------------------------------------------------- */
1819/* handlers */
1820
1821LOCAL(PyObject*)
1822treebuilder_handle_xml(TreeBuilderObject* self, PyObject* encoding,
1823 PyObject* standalone)
1824{
1825 Py_RETURN_NONE;
1826}
1827
1828LOCAL(PyObject*)
1829treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
1830 PyObject* attrib)
1831{
1832 PyObject* node;
1833 PyObject* this;
1834
Serhiy Storchaka9c2c42c2017-04-02 20:37:03 +03001835 treebuilder_flush_data(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001836
1837 node = element_new(tag, attrib);
1838 if (!node)
1839 return NULL;
1840
1841 this = (PyObject*) self->this;
1842
1843 if (this != Py_None) {
1844 if (element_add_subelement((ElementObject*) this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001845 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001846 } else {
1847 if (self->root) {
1848 PyErr_SetString(
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001849 elementtree_parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001850 "multiple elements on top level"
1851 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001852 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001853 }
1854 Py_INCREF(node);
1855 self->root = node;
1856 }
1857
1858 if (self->index < PyList_GET_SIZE(self->stack)) {
1859 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001860 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001861 Py_INCREF(this);
1862 } else {
1863 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001864 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001865 }
1866 self->index++;
1867
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001868 Py_INCREF(node);
Serhiy Storchaka763a61c2016-04-10 18:05:12 +03001869 Py_SETREF(self->this, (ElementObject*) node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001870
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001871 Py_INCREF(node);
Serhiy Storchaka763a61c2016-04-10 18:05:12 +03001872 Py_SETREF(self->last, (ElementObject*) node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001873
Serhiy Storchaka45cf0b72015-12-06 23:51:53 +02001874 if (treebuilder_append_event(self, self->start_event_obj, node) < 0)
1875 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001876
1877 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001878
1879 error:
1880 Py_DECREF(node);
1881 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001882}
1883
1884LOCAL(PyObject*)
1885treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
1886{
1887 if (!self->data) {
Fredrik Lundhdc075b92006-08-16 16:47:07 +00001888 if (self->last == (ElementObject*) Py_None) {
1889 /* ignore calls to data before the first call to start */
1890 Py_RETURN_NONE;
1891 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001892 /* store the first item as is */
1893 Py_INCREF(data); self->data = data;
1894 } else {
1895 /* more than one item; use a list to collect items */
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001896 if (PyString_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
1897 PyString_CheckExact(data) && PyString_GET_SIZE(data) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001898 /* expat often generates single character data sections; handle
1899 the most common case by resizing the existing string... */
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001900 Py_ssize_t size = PyString_GET_SIZE(self->data);
1901 if (_PyString_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001902 return NULL;
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001903 PyString_AS_STRING(self->data)[size] = PyString_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001904 } else if (PyList_CheckExact(self->data)) {
1905 if (PyList_Append(self->data, data) < 0)
1906 return NULL;
1907 } else {
1908 PyObject* list = PyList_New(2);
1909 if (!list)
1910 return NULL;
1911 PyList_SET_ITEM(list, 0, self->data);
1912 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
1913 self->data = list;
1914 }
1915 }
1916
1917 Py_RETURN_NONE;
1918}
1919
1920LOCAL(PyObject*)
1921treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
1922{
Serhiy Storchaka2e6c8292015-12-27 15:41:58 +02001923 ElementObject *item;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001924
Serhiy Storchaka9c2c42c2017-04-02 20:37:03 +03001925 treebuilder_flush_data(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001926
1927 if (self->index == 0) {
1928 PyErr_SetString(
1929 PyExc_IndexError,
1930 "pop from empty stack"
1931 );
1932 return NULL;
1933 }
1934
Serhiy Storchaka2e6c8292015-12-27 15:41:58 +02001935 item = self->last;
1936 self->last = self->this;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001937 self->index--;
Serhiy Storchaka2e6c8292015-12-27 15:41:58 +02001938 self->this = (ElementObject *) PyList_GET_ITEM(self->stack, self->index);
1939 Py_INCREF(self->this);
1940 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001941
Serhiy Storchaka45cf0b72015-12-06 23:51:53 +02001942 if (treebuilder_append_event(self, self->end_event_obj, (PyObject*)self->last) < 0)
1943 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001944
1945 Py_INCREF(self->last);
1946 return (PyObject*) self->last;
1947}
1948
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001949/* -------------------------------------------------------------------- */
1950/* methods (in alphabetical order) */
1951
1952static PyObject*
1953treebuilder_data(TreeBuilderObject* self, PyObject* args)
1954{
1955 PyObject* data;
1956 if (!PyArg_ParseTuple(args, "O:data", &data))
1957 return NULL;
1958
1959 return treebuilder_handle_data(self, data);
1960}
1961
1962static PyObject*
1963treebuilder_end(TreeBuilderObject* self, PyObject* args)
1964{
1965 PyObject* tag;
1966 if (!PyArg_ParseTuple(args, "O:end", &tag))
1967 return NULL;
1968
1969 return treebuilder_handle_end(self, tag);
1970}
1971
1972LOCAL(PyObject*)
1973treebuilder_done(TreeBuilderObject* self)
1974{
1975 PyObject* res;
1976
1977 /* FIXME: check stack size? */
1978
1979 if (self->root)
1980 res = self->root;
1981 else
1982 res = Py_None;
1983
1984 Py_INCREF(res);
1985 return res;
1986}
1987
1988static PyObject*
1989treebuilder_close(TreeBuilderObject* self, PyObject* args)
1990{
1991 if (!PyArg_ParseTuple(args, ":close"))
1992 return NULL;
1993
1994 return treebuilder_done(self);
1995}
1996
1997static PyObject*
1998treebuilder_start(TreeBuilderObject* self, PyObject* args)
1999{
2000 PyObject* tag;
2001 PyObject* attrib = Py_None;
2002 if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib))
2003 return NULL;
2004
2005 return treebuilder_handle_start(self, tag, attrib);
2006}
2007
2008static PyObject*
2009treebuilder_xml(TreeBuilderObject* self, PyObject* args)
2010{
2011 PyObject* encoding;
2012 PyObject* standalone;
2013 if (!PyArg_ParseTuple(args, "OO:xml", &encoding, &standalone))
2014 return NULL;
2015
2016 return treebuilder_handle_xml(self, encoding, standalone);
2017}
2018
2019static PyMethodDef treebuilder_methods[] = {
2020 {"data", (PyCFunction) treebuilder_data, METH_VARARGS},
2021 {"start", (PyCFunction) treebuilder_start, METH_VARARGS},
2022 {"end", (PyCFunction) treebuilder_end, METH_VARARGS},
2023 {"xml", (PyCFunction) treebuilder_xml, METH_VARARGS},
2024 {"close", (PyCFunction) treebuilder_close, METH_VARARGS},
2025 {NULL, NULL}
2026};
2027
2028static PyObject*
2029treebuilder_getattr(TreeBuilderObject* self, char* name)
2030{
2031 return Py_FindMethod(treebuilder_methods, (PyObject*) self, name);
2032}
2033
2034statichere PyTypeObject TreeBuilder_Type = {
Benjamin Petersona72d15c2017-09-13 21:20:29 -07002035 PyVarObject_HEAD_INIT(NULL, 0)
2036 "TreeBuilder", sizeof(TreeBuilderObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002037 /* methods */
2038 (destructor)treebuilder_dealloc, /* tp_dealloc */
2039 0, /* tp_print */
2040 (getattrfunc)treebuilder_getattr, /* tp_getattr */
2041};
2042
2043/* ==================================================================== */
2044/* the expat interface */
2045
2046#if defined(USE_EXPAT)
2047
2048#include "expat.h"
2049
2050#if defined(USE_PYEXPAT_CAPI)
2051#include "pyexpat.h"
2052static struct PyExpat_CAPI* expat_capi;
2053#define EXPAT(func) (expat_capi->func)
2054#else
2055#define EXPAT(func) (XML_##func)
2056#endif
2057
2058typedef struct {
2059 PyObject_HEAD
2060
2061 XML_Parser parser;
2062
2063 PyObject* target;
2064 PyObject* entity;
2065
2066 PyObject* names;
2067
2068 PyObject* handle_xml;
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002069
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002070 PyObject* handle_start;
2071 PyObject* handle_data;
2072 PyObject* handle_end;
2073
2074 PyObject* handle_comment;
2075 PyObject* handle_pi;
2076
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002077 PyObject* handle_close;
2078
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002079} XMLParserObject;
2080
2081staticforward PyTypeObject XMLParser_Type;
2082
2083/* helpers */
2084
2085#if defined(Py_USING_UNICODE)
2086LOCAL(int)
2087checkstring(const char* string, int size)
2088{
2089 int i;
2090
2091 /* check if an 8-bit string contains UTF-8 characters */
2092 for (i = 0; i < size; i++)
2093 if (string[i] & 0x80)
2094 return 1;
2095
2096 return 0;
2097}
2098#endif
2099
2100LOCAL(PyObject*)
2101makestring(const char* string, int size)
2102{
2103 /* convert a UTF-8 string to either a 7-bit ascii string or a
2104 Unicode string */
2105
2106#if defined(Py_USING_UNICODE)
2107 if (checkstring(string, size))
2108 return PyUnicode_DecodeUTF8(string, size, "strict");
2109#endif
2110
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002111 return PyString_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002112}
2113
2114LOCAL(PyObject*)
2115makeuniversal(XMLParserObject* self, const char* string)
2116{
2117 /* convert a UTF-8 tag/attribute name from the expat parser
2118 to a universal name string */
2119
2120 int size = strlen(string);
2121 PyObject* key;
2122 PyObject* value;
2123
2124 /* look the 'raw' name up in the names dictionary */
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002125 key = PyString_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002126 if (!key)
2127 return NULL;
2128
2129 value = PyDict_GetItem(self->names, key);
2130
2131 if (value) {
2132 Py_INCREF(value);
2133 } else {
2134 /* new name. convert to universal name, and decode as
2135 necessary */
2136
2137 PyObject* tag;
2138 char* p;
2139 int i;
2140
2141 /* look for namespace separator */
2142 for (i = 0; i < size; i++)
2143 if (string[i] == '}')
2144 break;
2145 if (i != size) {
2146 /* convert to universal name */
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002147 tag = PyString_FromStringAndSize(NULL, size+1);
2148 p = PyString_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002149 p[0] = '{';
2150 memcpy(p+1, string, size);
2151 size++;
2152 } else {
2153 /* plain name; use key as tag */
2154 Py_INCREF(key);
2155 tag = key;
2156 }
2157
2158 /* decode universal name */
2159#if defined(Py_USING_UNICODE)
2160 /* inline makestring, to avoid duplicating the source string if
Martin Panter6a8163a2016-04-15 02:14:19 +00002161 it's not a utf-8 string */
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002162 p = PyString_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002163 if (checkstring(p, size)) {
2164 value = PyUnicode_DecodeUTF8(p, size, "strict");
2165 Py_DECREF(tag);
2166 if (!value) {
2167 Py_DECREF(key);
2168 return NULL;
2169 }
2170 } else
2171#endif
2172 value = tag; /* use tag as is */
2173
2174 /* add to names dictionary */
2175 if (PyDict_SetItem(self->names, key, value) < 0) {
2176 Py_DECREF(key);
2177 Py_DECREF(value);
2178 return NULL;
2179 }
2180 }
2181
2182 Py_DECREF(key);
2183 return value;
2184}
2185
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002186static void
2187expat_set_error(const char* message, int line, int column)
2188{
2189 PyObject *error;
2190 PyObject *position;
2191 char buffer[256];
2192
2193 sprintf(buffer, "%s: line %d, column %d", message, line, column);
2194
2195 error = PyObject_CallFunction(elementtree_parseerror_obj, "s", buffer);
2196 if (!error)
2197 return;
2198
2199 /* add position attribute */
2200 position = Py_BuildValue("(ii)", line, column);
2201 if (!position) {
2202 Py_DECREF(error);
2203 return;
2204 }
2205 if (PyObject_SetAttrString(error, "position", position) == -1) {
2206 Py_DECREF(error);
2207 Py_DECREF(position);
2208 return;
2209 }
2210 Py_DECREF(position);
2211
2212 PyErr_SetObject(elementtree_parseerror_obj, error);
2213 Py_DECREF(error);
2214}
2215
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002216/* -------------------------------------------------------------------- */
2217/* handlers */
2218
2219static void
2220expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2221 int data_len)
2222{
2223 PyObject* key;
2224 PyObject* value;
2225 PyObject* res;
2226
2227 if (data_len < 2 || data_in[0] != '&')
2228 return;
2229
2230 key = makestring(data_in + 1, data_len - 2);
2231 if (!key)
2232 return;
2233
2234 value = PyDict_GetItem(self->entity, key);
2235
2236 if (value) {
2237 if (TreeBuilder_CheckExact(self->target))
2238 res = treebuilder_handle_data(
2239 (TreeBuilderObject*) self->target, value
2240 );
2241 else if (self->handle_data)
2242 res = PyObject_CallFunction(self->handle_data, "O", value);
2243 else
2244 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002245 Py_XDECREF(res);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002246 } else if (!PyErr_Occurred()) {
2247 /* Report the first error, not the last */
2248 char message[128];
2249 sprintf(message, "undefined entity &%.100s;", PyString_AS_STRING(key));
2250 expat_set_error(
2251 message,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002252 EXPAT(GetErrorLineNumber)(self->parser),
2253 EXPAT(GetErrorColumnNumber)(self->parser)
2254 );
2255 }
2256
2257 Py_DECREF(key);
2258}
2259
2260static void
2261expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2262 const XML_Char **attrib_in)
2263{
2264 PyObject* res;
2265 PyObject* tag;
2266 PyObject* attrib;
2267 int ok;
2268
2269 /* tag name */
2270 tag = makeuniversal(self, tag_in);
2271 if (!tag)
2272 return; /* parser will look for errors */
2273
2274 /* attributes */
2275 if (attrib_in[0]) {
2276 attrib = PyDict_New();
Serhiy Storchaka33ea2972015-12-09 19:44:30 +02002277 if (!attrib) {
2278 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002279 return;
Serhiy Storchaka33ea2972015-12-09 19:44:30 +02002280 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002281 while (attrib_in[0] && attrib_in[1]) {
2282 PyObject* key = makeuniversal(self, attrib_in[0]);
2283 PyObject* value = makestring(attrib_in[1], strlen(attrib_in[1]));
2284 if (!key || !value) {
2285 Py_XDECREF(value);
2286 Py_XDECREF(key);
2287 Py_DECREF(attrib);
Serhiy Storchaka33ea2972015-12-09 19:44:30 +02002288 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002289 return;
2290 }
2291 ok = PyDict_SetItem(attrib, key, value);
2292 Py_DECREF(value);
2293 Py_DECREF(key);
2294 if (ok < 0) {
2295 Py_DECREF(attrib);
Serhiy Storchaka33ea2972015-12-09 19:44:30 +02002296 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002297 return;
2298 }
2299 attrib_in += 2;
2300 }
2301 } else {
2302 Py_INCREF(Py_None);
2303 attrib = Py_None;
2304 }
2305
2306 if (TreeBuilder_CheckExact(self->target))
2307 /* shortcut */
2308 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2309 tag, attrib);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002310 else if (self->handle_start) {
2311 if (attrib == Py_None) {
2312 Py_DECREF(attrib);
2313 attrib = PyDict_New();
Serhiy Storchaka33ea2972015-12-09 19:44:30 +02002314 if (!attrib) {
2315 Py_DECREF(tag);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002316 return;
Serhiy Storchaka33ea2972015-12-09 19:44:30 +02002317 }
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002318 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002319 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002320 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002321 res = NULL;
2322
2323 Py_DECREF(tag);
2324 Py_DECREF(attrib);
2325
2326 Py_XDECREF(res);
2327}
2328
2329static void
2330expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2331 int data_len)
2332{
2333 PyObject* data;
2334 PyObject* res;
2335
2336 data = makestring(data_in, data_len);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002337 if (!data)
2338 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002339
2340 if (TreeBuilder_CheckExact(self->target))
2341 /* shortcut */
2342 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
2343 else if (self->handle_data)
2344 res = PyObject_CallFunction(self->handle_data, "O", data);
2345 else
2346 res = NULL;
2347
2348 Py_DECREF(data);
2349
2350 Py_XDECREF(res);
2351}
2352
2353static void
2354expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
2355{
2356 PyObject* tag;
2357 PyObject* res = NULL;
2358
2359 if (TreeBuilder_CheckExact(self->target))
2360 /* shortcut */
2361 /* the standard tree builder doesn't look at the end tag */
2362 res = treebuilder_handle_end(
2363 (TreeBuilderObject*) self->target, Py_None
2364 );
2365 else if (self->handle_end) {
2366 tag = makeuniversal(self, tag_in);
2367 if (tag) {
2368 res = PyObject_CallFunction(self->handle_end, "O", tag);
2369 Py_DECREF(tag);
2370 }
2371 }
2372
2373 Py_XDECREF(res);
2374}
2375
2376static void
2377expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
2378 const XML_Char *uri)
2379{
Serhiy Storchaka45cf0b72015-12-06 23:51:53 +02002380 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
2381 PyObject *parcel;
2382 PyObject *sprefix = NULL;
2383 PyObject *suri = NULL;
2384
2385 if (PyErr_Occurred())
2386 return;
2387
2388 if (!target->events || !target->start_ns_event_obj)
2389 return;
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002390
Eli Benderskyf933e082013-11-28 06:25:45 -08002391 if (uri)
Eli Bendersky71142c42013-11-28 06:37:25 -08002392 suri = makestring(uri, strlen(uri));
Eli Benderskyf933e082013-11-28 06:25:45 -08002393 else
Eli Bendersky71142c42013-11-28 06:37:25 -08002394 suri = PyString_FromStringAndSize("", 0);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002395 if (!suri)
2396 return;
2397
2398 if (prefix)
2399 sprefix = makestring(prefix, strlen(prefix));
2400 else
2401 sprefix = PyString_FromStringAndSize("", 0);
2402 if (!sprefix) {
2403 Py_DECREF(suri);
2404 return;
2405 }
2406
Serhiy Storchaka45cf0b72015-12-06 23:51:53 +02002407 parcel = PyTuple_Pack(2, sprefix, suri);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002408 Py_DECREF(sprefix);
2409 Py_DECREF(suri);
Serhiy Storchaka45cf0b72015-12-06 23:51:53 +02002410 if (!parcel)
2411 return;
2412 treebuilder_append_event(target, target->start_ns_event_obj, parcel);
2413 Py_DECREF(parcel);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002414}
2415
2416static void
2417expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
2418{
Serhiy Storchaka45cf0b72015-12-06 23:51:53 +02002419 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
2420
2421 if (PyErr_Occurred())
2422 return;
2423
2424 if (!target->events)
2425 return;
2426
2427 treebuilder_append_event(target, target->end_ns_event_obj, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002428}
2429
2430static void
2431expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
2432{
2433 PyObject* comment;
2434 PyObject* res;
2435
2436 if (self->handle_comment) {
2437 comment = makestring(comment_in, strlen(comment_in));
2438 if (comment) {
2439 res = PyObject_CallFunction(self->handle_comment, "O", comment);
2440 Py_XDECREF(res);
2441 Py_DECREF(comment);
2442 }
2443 }
2444}
2445
2446static void
2447expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
2448 const XML_Char* data_in)
2449{
2450 PyObject* target;
2451 PyObject* data;
2452 PyObject* res;
2453
2454 if (self->handle_pi) {
2455 target = makestring(target_in, strlen(target_in));
2456 data = makestring(data_in, strlen(data_in));
2457 if (target && data) {
2458 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
2459 Py_XDECREF(res);
2460 Py_DECREF(data);
2461 Py_DECREF(target);
2462 } else {
2463 Py_XDECREF(data);
2464 Py_XDECREF(target);
2465 }
2466 }
2467}
2468
2469#if defined(Py_USING_UNICODE)
2470static int
2471expat_unknown_encoding_handler(XMLParserObject *self, const XML_Char *name,
2472 XML_Encoding *info)
2473{
2474 PyObject* u;
2475 Py_UNICODE* p;
2476 unsigned char s[256];
2477 int i;
2478
2479 memset(info, 0, sizeof(XML_Encoding));
2480
2481 for (i = 0; i < 256; i++)
2482 s[i] = i;
2483
Fredrik Lundhc3389992005-12-25 11:40:19 +00002484 u = PyUnicode_Decode((char*) s, 256, name, "replace");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002485 if (!u)
2486 return XML_STATUS_ERROR;
2487
2488 if (PyUnicode_GET_SIZE(u) != 256) {
2489 Py_DECREF(u);
Eli Benderskyb6717012013-08-04 06:09:49 -07002490 PyErr_SetString(PyExc_ValueError,
2491 "multi-byte encodings are not supported");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002492 return XML_STATUS_ERROR;
2493 }
2494
2495 p = PyUnicode_AS_UNICODE(u);
2496
2497 for (i = 0; i < 256; i++) {
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002498 if (p[i] != Py_UNICODE_REPLACEMENT_CHARACTER)
2499 info->map[i] = p[i];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002500 else
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002501 info->map[i] = -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002502 }
2503
2504 Py_DECREF(u);
2505
2506 return XML_STATUS_OK;
2507}
2508#endif
2509
2510/* -------------------------------------------------------------------- */
2511/* constructor and destructor */
2512
2513static PyObject*
Fredrik Lundh81707f12006-06-03 21:56:05 +00002514xmlparser(PyObject* self_, PyObject* args, PyObject* kw)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002515{
2516 XMLParserObject* self;
2517 /* FIXME: does this need to be static? */
2518 static XML_Memory_Handling_Suite memory_handler;
2519
2520 PyObject* target = NULL;
2521 char* encoding = NULL;
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +00002522 static char* kwlist[] = { "target", "encoding", NULL };
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002523 if (!PyArg_ParseTupleAndKeywords(args, kw, "|Oz:XMLParser", kwlist,
2524 &target, &encoding))
2525 return NULL;
2526
2527#if defined(USE_PYEXPAT_CAPI)
2528 if (!expat_capi) {
2529 PyErr_SetString(
2530 PyExc_RuntimeError, "cannot load dispatch table from pyexpat"
2531 );
2532 return NULL;
2533 }
2534#endif
2535
2536 self = PyObject_New(XMLParserObject, &XMLParser_Type);
2537 if (self == NULL)
2538 return NULL;
2539
2540 self->entity = PyDict_New();
2541 if (!self->entity) {
2542 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002543 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002544 }
2545
2546 self->names = PyDict_New();
2547 if (!self->names) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002548 PyObject_Del(self->entity);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002549 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002550 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002551 }
2552
2553 memory_handler.malloc_fcn = PyObject_Malloc;
2554 memory_handler.realloc_fcn = PyObject_Realloc;
2555 memory_handler.free_fcn = PyObject_Free;
2556
2557 self->parser = EXPAT(ParserCreate_MM)(encoding, &memory_handler, "}");
2558 if (!self->parser) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002559 PyObject_Del(self->names);
2560 PyObject_Del(self->entity);
2561 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002562 PyErr_NoMemory();
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002563 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002564 }
2565
2566 /* setup target handlers */
2567 if (!target) {
2568 target = treebuilder_new();
2569 if (!target) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002570 EXPAT(ParserFree)(self->parser);
2571 PyObject_Del(self->names);
2572 PyObject_Del(self->entity);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002573 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002574 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002575 }
2576 } else
2577 Py_INCREF(target);
2578 self->target = target;
2579
2580 self->handle_xml = PyObject_GetAttrString(target, "xml");
2581 self->handle_start = PyObject_GetAttrString(target, "start");
2582 self->handle_data = PyObject_GetAttrString(target, "data");
2583 self->handle_end = PyObject_GetAttrString(target, "end");
2584 self->handle_comment = PyObject_GetAttrString(target, "comment");
2585 self->handle_pi = PyObject_GetAttrString(target, "pi");
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002586 self->handle_close = PyObject_GetAttrString(target, "close");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002587
2588 PyErr_Clear();
2589
2590 /* configure parser */
2591 EXPAT(SetUserData)(self->parser, self);
2592 EXPAT(SetElementHandler)(
2593 self->parser,
2594 (XML_StartElementHandler) expat_start_handler,
2595 (XML_EndElementHandler) expat_end_handler
2596 );
2597 EXPAT(SetDefaultHandlerExpand)(
2598 self->parser,
2599 (XML_DefaultHandler) expat_default_handler
2600 );
2601 EXPAT(SetCharacterDataHandler)(
2602 self->parser,
2603 (XML_CharacterDataHandler) expat_data_handler
2604 );
2605 if (self->handle_comment)
2606 EXPAT(SetCommentHandler)(
2607 self->parser,
2608 (XML_CommentHandler) expat_comment_handler
2609 );
2610 if (self->handle_pi)
2611 EXPAT(SetProcessingInstructionHandler)(
2612 self->parser,
2613 (XML_ProcessingInstructionHandler) expat_pi_handler
2614 );
2615#if defined(Py_USING_UNICODE)
2616 EXPAT(SetUnknownEncodingHandler)(
2617 self->parser,
2618 (XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL
2619 );
2620#endif
2621
2622 ALLOC(sizeof(XMLParserObject), "create expatparser");
2623
2624 return (PyObject*) self;
2625}
2626
2627static void
2628xmlparser_dealloc(XMLParserObject* self)
2629{
2630 EXPAT(ParserFree)(self->parser);
2631
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002632 Py_XDECREF(self->handle_close);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002633 Py_XDECREF(self->handle_pi);
2634 Py_XDECREF(self->handle_comment);
2635 Py_XDECREF(self->handle_end);
2636 Py_XDECREF(self->handle_data);
2637 Py_XDECREF(self->handle_start);
2638 Py_XDECREF(self->handle_xml);
2639
2640 Py_DECREF(self->target);
2641 Py_DECREF(self->entity);
2642 Py_DECREF(self->names);
2643
2644 RELEASE(sizeof(XMLParserObject), "destroy expatparser");
2645
2646 PyObject_Del(self);
2647}
2648
2649/* -------------------------------------------------------------------- */
2650/* methods (in alphabetical order) */
2651
2652LOCAL(PyObject*)
2653expat_parse(XMLParserObject* self, char* data, int data_len, int final)
2654{
2655 int ok;
2656
2657 ok = EXPAT(Parse)(self->parser, data, data_len, final);
2658
2659 if (PyErr_Occurred())
2660 return NULL;
2661
2662 if (!ok) {
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002663 expat_set_error(
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002664 EXPAT(ErrorString)(EXPAT(GetErrorCode)(self->parser)),
2665 EXPAT(GetErrorLineNumber)(self->parser),
2666 EXPAT(GetErrorColumnNumber)(self->parser)
2667 );
2668 return NULL;
2669 }
2670
2671 Py_RETURN_NONE;
2672}
2673
2674static PyObject*
2675xmlparser_close(XMLParserObject* self, PyObject* args)
2676{
2677 /* end feeding data to parser */
2678
2679 PyObject* res;
2680 if (!PyArg_ParseTuple(args, ":close"))
2681 return NULL;
2682
2683 res = expat_parse(self, "", 0, 1);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002684 if (!res)
2685 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002686
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002687 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002688 Py_DECREF(res);
2689 return treebuilder_done((TreeBuilderObject*) self->target);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002690 } if (self->handle_close) {
2691 Py_DECREF(res);
2692 return PyObject_CallFunction(self->handle_close, "");
2693 } else
2694 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002695}
2696
2697static PyObject*
2698xmlparser_feed(XMLParserObject* self, PyObject* args)
2699{
2700 /* feed data to parser */
2701
2702 char* data;
2703 int data_len;
2704 if (!PyArg_ParseTuple(args, "s#:feed", &data, &data_len))
2705 return NULL;
2706
2707 return expat_parse(self, data, data_len, 0);
2708}
2709
2710static PyObject*
2711xmlparser_parse(XMLParserObject* self, PyObject* args)
2712{
2713 /* (internal) parse until end of input stream */
2714
2715 PyObject* reader;
2716 PyObject* buffer;
2717 PyObject* res;
2718
2719 PyObject* fileobj;
2720 if (!PyArg_ParseTuple(args, "O:_parse", &fileobj))
2721 return NULL;
2722
2723 reader = PyObject_GetAttrString(fileobj, "read");
2724 if (!reader)
2725 return NULL;
2726
2727 /* read from open file object */
2728 for (;;) {
2729
2730 buffer = PyObject_CallFunction(reader, "i", 64*1024);
2731
2732 if (!buffer) {
2733 /* read failed (e.g. due to KeyboardInterrupt) */
2734 Py_DECREF(reader);
2735 return NULL;
2736 }
2737
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002738 if (!PyString_CheckExact(buffer) || PyString_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002739 Py_DECREF(buffer);
2740 break;
2741 }
2742
Serhiy Storchakac4c64be2015-11-25 20:12:58 +02002743 if (PyString_GET_SIZE(buffer) > INT_MAX) {
2744 Py_DECREF(buffer);
2745 Py_DECREF(reader);
2746 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
2747 return NULL;
2748 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002749 res = expat_parse(
Serhiy Storchakac4c64be2015-11-25 20:12:58 +02002750 self, PyString_AS_STRING(buffer), (int)PyString_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002751 );
2752
2753 Py_DECREF(buffer);
2754
2755 if (!res) {
2756 Py_DECREF(reader);
2757 return NULL;
2758 }
2759 Py_DECREF(res);
2760
2761 }
2762
2763 Py_DECREF(reader);
2764
2765 res = expat_parse(self, "", 0, 1);
2766
2767 if (res && TreeBuilder_CheckExact(self->target)) {
2768 Py_DECREF(res);
2769 return treebuilder_done((TreeBuilderObject*) self->target);
2770 }
2771
2772 return res;
2773}
2774
2775static PyObject*
2776xmlparser_setevents(XMLParserObject* self, PyObject* args)
2777{
2778 /* activate element event reporting */
2779
Neal Norwitzc7074382006-06-12 02:06:17 +00002780 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002781 TreeBuilderObject* target;
2782
2783 PyObject* events; /* event collector */
2784 PyObject* event_set = Py_None;
2785 if (!PyArg_ParseTuple(args, "O!|O:_setevents", &PyList_Type, &events,
2786 &event_set))
2787 return NULL;
2788
2789 if (!TreeBuilder_CheckExact(self->target)) {
2790 PyErr_SetString(
2791 PyExc_TypeError,
2792 "event handling only supported for cElementTree.Treebuilder "
2793 "targets"
2794 );
2795 return NULL;
2796 }
2797
2798 target = (TreeBuilderObject*) self->target;
2799
2800 Py_INCREF(events);
Serhiy Storchakabc62af12016-04-06 09:51:18 +03002801 Py_XSETREF(target->events, events);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002802
2803 /* clear out existing events */
Serhiy Storchaka98a97222014-02-09 13:14:04 +02002804 Py_CLEAR(target->start_event_obj);
2805 Py_CLEAR(target->end_event_obj);
2806 Py_CLEAR(target->start_ns_event_obj);
2807 Py_CLEAR(target->end_ns_event_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002808
2809 if (event_set == Py_None) {
2810 /* default is "end" only */
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002811 target->end_event_obj = PyString_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002812 Py_RETURN_NONE;
2813 }
2814
2815 if (!PyTuple_Check(event_set)) /* FIXME: handle arbitrary sequences */
2816 goto error;
2817
2818 for (i = 0; i < PyTuple_GET_SIZE(event_set); i++) {
2819 PyObject* item = PyTuple_GET_ITEM(event_set, i);
2820 char* event;
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002821 if (!PyString_Check(item))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002822 goto error;
Serhiy Storchaka20a003b2015-12-24 11:51:24 +02002823 Py_INCREF(item);
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002824 event = PyString_AS_STRING(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002825 if (strcmp(event, "start") == 0) {
Serhiy Storchakabc62af12016-04-06 09:51:18 +03002826 Py_XSETREF(target->start_event_obj, item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002827 } else if (strcmp(event, "end") == 0) {
Serhiy Storchakabc62af12016-04-06 09:51:18 +03002828 Py_XSETREF(target->end_event_obj, item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002829 } else if (strcmp(event, "start-ns") == 0) {
Serhiy Storchakabc62af12016-04-06 09:51:18 +03002830 Py_XSETREF(target->start_ns_event_obj, item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002831 EXPAT(SetNamespaceDeclHandler)(
2832 self->parser,
2833 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2834 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2835 );
2836 } else if (strcmp(event, "end-ns") == 0) {
Serhiy Storchakabc62af12016-04-06 09:51:18 +03002837 Py_XSETREF(target->end_ns_event_obj, item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002838 EXPAT(SetNamespaceDeclHandler)(
2839 self->parser,
2840 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2841 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2842 );
2843 } else {
Serhiy Storchaka20a003b2015-12-24 11:51:24 +02002844 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002845 PyErr_Format(
2846 PyExc_ValueError,
2847 "unknown event '%s'", event
2848 );
2849 return NULL;
2850 }
2851 }
2852
2853 Py_RETURN_NONE;
2854
2855 error:
2856 PyErr_SetString(
2857 PyExc_TypeError,
2858 "invalid event tuple"
2859 );
2860 return NULL;
2861}
2862
2863static PyMethodDef xmlparser_methods[] = {
2864 {"feed", (PyCFunction) xmlparser_feed, METH_VARARGS},
2865 {"close", (PyCFunction) xmlparser_close, METH_VARARGS},
2866 {"_parse", (PyCFunction) xmlparser_parse, METH_VARARGS},
2867 {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
2868 {NULL, NULL}
2869};
2870
2871static PyObject*
2872xmlparser_getattr(XMLParserObject* self, char* name)
2873{
2874 PyObject* res;
2875
2876 res = Py_FindMethod(xmlparser_methods, (PyObject*) self, name);
2877 if (res)
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002878 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002879
2880 PyErr_Clear();
2881
2882 if (strcmp(name, "entity") == 0)
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002883 res = self->entity;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002884 else if (strcmp(name, "target") == 0)
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002885 res = self->target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002886 else if (strcmp(name, "version") == 0) {
2887 char buffer[100];
2888 sprintf(buffer, "Expat %d.%d.%d", XML_MAJOR_VERSION,
2889 XML_MINOR_VERSION, XML_MICRO_VERSION);
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002890 return PyString_FromString(buffer);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002891 } else {
2892 PyErr_SetString(PyExc_AttributeError, name);
2893 return NULL;
2894 }
2895
2896 Py_INCREF(res);
2897 return res;
2898}
2899
2900statichere PyTypeObject XMLParser_Type = {
Benjamin Petersona72d15c2017-09-13 21:20:29 -07002901 PyVarObject_HEAD_INIT(NULL, 0)
2902 "XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002903 /* methods */
2904 (destructor)xmlparser_dealloc, /* tp_dealloc */
2905 0, /* tp_print */
2906 (getattrfunc)xmlparser_getattr, /* tp_getattr */
2907};
2908
2909#endif
2910
2911/* ==================================================================== */
2912/* python module interface */
2913
2914static PyMethodDef _functions[] = {
2915 {"Element", (PyCFunction) element, METH_VARARGS|METH_KEYWORDS},
2916 {"SubElement", (PyCFunction) subelement, METH_VARARGS|METH_KEYWORDS},
2917 {"TreeBuilder", (PyCFunction) treebuilder, METH_VARARGS},
2918#if defined(USE_EXPAT)
2919 {"XMLParser", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
2920 {"XMLTreeBuilder", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
2921#endif
2922 {NULL, NULL}
2923};
2924
2925DL_EXPORT(void)
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002926init_elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002927{
2928 PyObject* m;
2929 PyObject* g;
2930 char* bootstrap;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002931
2932 /* Patch object type */
Christian Heimese93237d2007-12-19 02:37:44 +00002933 Py_TYPE(&Element_Type) = Py_TYPE(&TreeBuilder_Type) = &PyType_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002934#if defined(USE_EXPAT)
Christian Heimese93237d2007-12-19 02:37:44 +00002935 Py_TYPE(&XMLParser_Type) = &PyType_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002936#endif
2937
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002938 m = Py_InitModule("_elementtree", _functions);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002939 if (!m)
2940 return;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002941
2942 /* python glue code */
2943
2944 g = PyDict_New();
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002945 if (!g)
2946 return;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002947
2948 PyDict_SetItemString(g, "__builtins__", PyEval_GetBuiltins());
2949
2950 bootstrap = (
2951
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002952 "from copy import copy, deepcopy\n"
2953
2954 "try:\n"
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002955 " from xml.etree import ElementTree\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002956 "except ImportError:\n"
2957 " import ElementTree\n"
2958 "ET = ElementTree\n"
2959 "del ElementTree\n"
2960
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002961 "import _elementtree as cElementTree\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002962
2963 "try:\n" /* check if copy works as is */
2964 " copy(cElementTree.Element('x'))\n"
2965 "except:\n"
2966 " def copyelement(elem):\n"
2967 " return elem\n"
2968
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002969 "class CommentProxy:\n"
2970 " def __call__(self, text=None):\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002971 " element = cElementTree.Element(ET.Comment)\n"
2972 " element.text = text\n"
2973 " return element\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002974 " def __cmp__(self, other):\n"
2975 " return cmp(ET.Comment, other)\n"
2976 "cElementTree.Comment = CommentProxy()\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002977
2978 "class ElementTree(ET.ElementTree):\n" /* public */
2979 " def parse(self, source, parser=None):\n"
Florent Xicluna67d5d0e2011-10-29 03:38:56 +02002980 " close_source = False\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002981 " if not hasattr(source, 'read'):\n"
2982 " source = open(source, 'rb')\n"
Florent Xicluna67d5d0e2011-10-29 03:38:56 +02002983 " close_source = False\n"
2984 " try:\n"
2985 " if parser is not None:\n"
2986 " while 1:\n"
2987 " data = source.read(65536)\n"
2988 " if not data:\n"
2989 " break\n"
2990 " parser.feed(data)\n"
2991 " self._root = parser.close()\n"
2992 " else:\n"
2993 " parser = cElementTree.XMLParser()\n"
2994 " self._root = parser._parse(source)\n"
2995 " return self._root\n"
2996 " finally:\n"
2997 " if close_source:\n"
2998 " source.close()\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002999 "cElementTree.ElementTree = ElementTree\n"
3000
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003001 "def iter(node, tag=None):\n" /* helper */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003002 " if tag == '*':\n"
3003 " tag = None\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003004 " if tag is None or node.tag == tag:\n"
3005 " yield node\n"
3006 " for node in node:\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003007 " for node in iter(node, tag):\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003008 " yield node\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003009
3010 "def itertext(node):\n" /* helper */
3011 " if node.text:\n"
3012 " yield node.text\n"
3013 " for e in node:\n"
3014 " for s in e.itertext():\n"
3015 " yield s\n"
3016 " if e.tail:\n"
3017 " yield e.tail\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003018
3019 "def parse(source, parser=None):\n" /* public */
3020 " tree = ElementTree()\n"
3021 " tree.parse(source, parser)\n"
3022 " return tree\n"
3023 "cElementTree.parse = parse\n"
3024
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003025 "class iterparse(object):\n"
3026 " root = None\n"
3027 " def __init__(self, file, events=None):\n"
Florent Xicluna67d5d0e2011-10-29 03:38:56 +02003028 " self._close_file = False\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003029 " if not hasattr(file, 'read'):\n"
3030 " file = open(file, 'rb')\n"
Florent Xicluna67d5d0e2011-10-29 03:38:56 +02003031 " self._close_file = True\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003032 " self._file = file\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003033 " self._events = []\n"
3034 " self._index = 0\n"
Florent Xicluna0965ee22011-11-01 23:34:41 +01003035 " self._error = None\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003036 " self.root = self._root = None\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003037 " b = cElementTree.TreeBuilder()\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003038 " self._parser = cElementTree.XMLParser(b)\n"
3039 " self._parser._setevents(self._events, events)\n"
3040 " def next(self):\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003041 " while 1:\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003042 " try:\n"
3043 " item = self._events[self._index]\n"
Florent Xicluna0965ee22011-11-01 23:34:41 +01003044 " self._index += 1\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003045 " return item\n"
Florent Xicluna0965ee22011-11-01 23:34:41 +01003046 " except IndexError:\n"
3047 " pass\n"
3048 " if self._error:\n"
3049 " e = self._error\n"
3050 " self._error = None\n"
3051 " raise e\n"
3052 " if self._parser is None:\n"
3053 " self.root = self._root\n"
3054 " if self._close_file:\n"
3055 " self._file.close()\n"
3056 " raise StopIteration\n"
3057 " # load event buffer\n"
3058 " del self._events[:]\n"
3059 " self._index = 0\n"
3060 " data = self._file.read(16384)\n"
3061 " if data:\n"
3062 " try:\n"
3063 " self._parser.feed(data)\n"
3064 " except SyntaxError as exc:\n"
3065 " self._error = exc\n"
3066 " else:\n"
3067 " self._root = self._parser.close()\n"
3068 " self._parser = None\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003069 " def __iter__(self):\n"
3070 " return self\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003071 "cElementTree.iterparse = iterparse\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003072
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003073 "class PIProxy:\n"
3074 " def __call__(self, target, text=None):\n"
3075 " element = cElementTree.Element(ET.PI)\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003076 " element.text = target\n"
3077 " if text:\n"
3078 " element.text = element.text + ' ' + text\n"
3079 " return element\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003080 " def __cmp__(self, other):\n"
3081 " return cmp(ET.PI, other)\n"
3082 "cElementTree.PI = cElementTree.ProcessingInstruction = PIProxy()\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003083
3084 "def XML(text):\n" /* public */
3085 " parser = cElementTree.XMLParser()\n"
3086 " parser.feed(text)\n"
3087 " return parser.close()\n"
3088 "cElementTree.XML = cElementTree.fromstring = XML\n"
3089
3090 "def XMLID(text):\n" /* public */
3091 " tree = XML(text)\n"
3092 " ids = {}\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003093 " for elem in tree.iter():\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003094 " id = elem.get('id')\n"
3095 " if id:\n"
3096 " ids[id] = elem\n"
3097 " return tree, ids\n"
3098 "cElementTree.XMLID = XMLID\n"
3099
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003100 "try:\n"
3101 " register_namespace = ET.register_namespace\n"
3102 "except AttributeError:\n"
3103 " def register_namespace(prefix, uri):\n"
3104 " ET._namespace_map[uri] = prefix\n"
3105 "cElementTree.register_namespace = register_namespace\n"
3106
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003107 "cElementTree.dump = ET.dump\n"
3108 "cElementTree.ElementPath = ElementPath = ET.ElementPath\n"
3109 "cElementTree.iselement = ET.iselement\n"
3110 "cElementTree.QName = ET.QName\n"
3111 "cElementTree.tostring = ET.tostring\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003112 "cElementTree.fromstringlist = ET.fromstringlist\n"
3113 "cElementTree.tostringlist = ET.tostringlist\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003114 "cElementTree.VERSION = '" VERSION "'\n"
3115 "cElementTree.__version__ = '" VERSION "'\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003116
3117 );
3118
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003119 if (!PyRun_String(bootstrap, Py_file_input, g, NULL))
3120 return;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003121
3122 elementpath_obj = PyDict_GetItemString(g, "ElementPath");
3123
3124 elementtree_copyelement_obj = PyDict_GetItemString(g, "copyelement");
3125 if (elementtree_copyelement_obj) {
3126 /* reduce hack needed; enable reduce method */
3127 PyMethodDef* mp;
3128 for (mp = element_methods; mp->ml_name; mp++)
3129 if (mp->ml_meth == (PyCFunction) element_reduce) {
3130 mp->ml_name = "__reduce__";
3131 break;
3132 }
3133 } else
3134 PyErr_Clear();
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003135
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003136 elementtree_deepcopy_obj = PyDict_GetItemString(g, "deepcopy");
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003137 elementtree_iter_obj = PyDict_GetItemString(g, "iter");
3138 elementtree_itertext_obj = PyDict_GetItemString(g, "itertext");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003139
3140#if defined(USE_PYEXPAT_CAPI)
3141 /* link against pyexpat, if possible */
Larry Hastings402b73f2010-03-25 00:54:54 +00003142 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003143 if (expat_capi) {
3144 /* check that it's usable */
3145 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
3146 expat_capi->size < sizeof(struct PyExpat_CAPI) ||
3147 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
3148 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
3149 expat_capi->MICRO_VERSION != XML_MICRO_VERSION)
3150 expat_capi = NULL;
3151 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003152#endif
3153
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003154 elementtree_parseerror_obj = PyErr_NewException(
3155 "cElementTree.ParseError", PyExc_SyntaxError, NULL
3156 );
3157 Py_INCREF(elementtree_parseerror_obj);
3158 PyModule_AddObject(m, "ParseError", elementtree_parseerror_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003159}