blob: f7f992dd3a95573d58fad9f911f5725e19b8be02 [file] [log] [blame]
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001/*
2 * ElementTree
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003 * $Id: _elementtree.c 3473 2009-01-11 22:53:55Z fredrik $
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
5 * elementtree accelerator
6 *
7 * History:
8 * 1999-06-20 fl created (as part of sgmlop)
9 * 2001-05-29 fl effdom edition
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000010 * 2003-02-27 fl elementtree edition (alpha)
11 * 2004-06-03 fl updates for elementtree 1.2
Florent Xicluna3e8c1892010-03-11 14:36:19 +000012 * 2005-01-05 fl major optimization effort
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000013 * 2005-01-11 fl first public release (cElementTree 0.8)
14 * 2005-01-12 fl split element object into base and extras
15 * 2005-01-13 fl use tagged pointers for tail/text (cElementTree 0.9)
16 * 2005-01-17 fl added treebuilder close method
17 * 2005-01-17 fl fixed crash in getchildren
18 * 2005-01-18 fl removed observer api, added iterparse (cElementTree 0.9.3)
19 * 2005-01-23 fl revised iterparse api; added namespace event support (0.9.8)
20 * 2005-01-26 fl added VERSION module property (cElementTree 1.0)
21 * 2005-01-28 fl added remove method (1.0.1)
22 * 2005-03-01 fl added iselement function; fixed makeelement aliasing (1.0.2)
23 * 2005-03-13 fl export Comment and ProcessingInstruction/PI helpers
24 * 2005-03-26 fl added Comment and PI support to XMLParser
25 * 2005-03-27 fl event optimizations; complain about bogus events
26 * 2005-08-08 fl fixed read error handling in parse
27 * 2005-08-11 fl added runtime test for copy workaround (1.0.3)
28 * 2005-12-13 fl added expat_capi support (for xml.etree) (1.0.4)
29 * 2005-12-16 fl added support for non-standard encodings
Fredrik Lundh44ed4db2006-03-12 21:06:35 +000030 * 2006-03-08 fl fixed a couple of potential null-refs and leaks
31 * 2006-03-12 fl merge in 2.5 ssize_t changes
Florent Xicluna3e8c1892010-03-11 14:36:19 +000032 * 2007-08-25 fl call custom builder's close method from XMLParser
33 * 2007-08-31 fl added iter, extend from ET 1.3
34 * 2007-09-01 fl fixed ParseError exception, setslice source type, etc
35 * 2007-09-03 fl fixed handling of negative insert indexes
36 * 2007-09-04 fl added itertext from ET 1.3
37 * 2007-09-06 fl added position attribute to ParseError exception
38 * 2008-06-06 fl delay error reporting in iterparse (from Hrvoje Niksic)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000039 *
Florent Xicluna3e8c1892010-03-11 14:36:19 +000040 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
41 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000042 *
43 * info@pythonware.com
44 * http://www.pythonware.com
45 */
46
Fredrik Lundh6d52b552005-12-16 22:06:43 +000047/* Licensed to PSF under a Contributor Agreement. */
Florent Xicluna3e8c1892010-03-11 14:36:19 +000048/* See http://www.python.org/psf/license for licensing details. */
Fredrik Lundh6d52b552005-12-16 22:06:43 +000049
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000050#include "Python.h"
51
Fredrik Lundhdc075b92006-08-16 16:47:07 +000052#define VERSION "1.0.6"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000053
54/* -------------------------------------------------------------------- */
55/* configuration */
56
57/* Leave defined to include the expat-based XMLParser type */
58#define USE_EXPAT
59
Florent Xicluna3e8c1892010-03-11 14:36:19 +000060/* Define to do all expat calls via pyexpat's embedded expat library */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000061/* #define USE_PYEXPAT_CAPI */
62
63/* An element can hold this many children without extra memory
64 allocations. */
65#define STATIC_CHILDREN 4
66
67/* For best performance, chose a value so that 80-90% of all nodes
68 have no more than the given number of children. Set this to zero
69 to minimize the size of the element structure itself (this only
70 helps if you have lots of leaf nodes with attributes). */
71
72/* Also note that pymalloc always allocates blocks in multiples of
73 eight bytes. For the current version of cElementTree, this means
74 that the number of children should be an even number, at least on
75 32-bit platforms. */
76
77/* -------------------------------------------------------------------- */
78
79#if 0
80static int memory = 0;
81#define ALLOC(size, comment)\
82do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
83#define RELEASE(size, comment)\
84do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
85#else
86#define ALLOC(size, comment)
87#define RELEASE(size, comment)
88#endif
89
90/* compiler tweaks */
91#if defined(_MSC_VER)
92#define LOCAL(type) static __inline type __fastcall
93#else
94#define LOCAL(type) static type
95#endif
96
97/* compatibility macros */
Florent Xicluna3e8c1892010-03-11 14:36:19 +000098#if (PY_VERSION_HEX < 0x02060000)
99#define Py_REFCNT(ob) (((PyObject*)(ob))->ob_refcnt)
100#define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)
101#endif
102
Martin v. Löwis18e16552006-02-15 17:27:45 +0000103#if (PY_VERSION_HEX < 0x02050000)
104typedef int Py_ssize_t;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000105#define lenfunc inquiry
Martin v. Löwis18e16552006-02-15 17:27:45 +0000106#endif
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000107
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000108#if (PY_VERSION_HEX < 0x02040000)
109#define PyDict_CheckExact PyDict_Check
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000110
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000111#if !defined(Py_RETURN_NONE)
112#define Py_RETURN_NONE return Py_INCREF(Py_None), Py_None
113#endif
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000114#endif
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000115
116/* macros used to store 'join' flags in string object pointers. note
117 that all use of text and tail as object pointers must be wrapped in
118 JOIN_OBJ. see comments in the ElementObject definition for more
119 info. */
120#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
121#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
122#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~1))
123
124/* glue functions (see the init function for details) */
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000125static PyObject* elementtree_parseerror_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000126static PyObject* elementtree_copyelement_obj;
127static PyObject* elementtree_deepcopy_obj;
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000128static PyObject* elementtree_iter_obj;
129static PyObject* elementtree_itertext_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000130static PyObject* elementpath_obj;
131
132/* helpers */
133
Oren Milmanf15058a2017-10-11 16:29:12 +0300134/* Py_SETREF for a PyObject* that uses a join flag. */
135Py_LOCAL_INLINE(void)
136_set_joined_ptr(PyObject **p, PyObject *new_joined_ptr)
137{
138 PyObject *tmp = JOIN_OBJ(*p);
139 *p = new_joined_ptr;
140 Py_DECREF(tmp);
141}
142
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000143LOCAL(PyObject*)
144deepcopy(PyObject* object, PyObject* memo)
145{
146 /* do a deep copy of the given object */
147
148 PyObject* args;
149 PyObject* result;
150
151 if (!elementtree_deepcopy_obj) {
152 PyErr_SetString(
153 PyExc_RuntimeError,
154 "deepcopy helper not found"
155 );
156 return NULL;
157 }
158
159 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000160 if (!args)
161 return NULL;
162
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000163 Py_INCREF(object); PyTuple_SET_ITEM(args, 0, (PyObject*) object);
164 Py_INCREF(memo); PyTuple_SET_ITEM(args, 1, (PyObject*) memo);
165
166 result = PyObject_CallObject(elementtree_deepcopy_obj, args);
167
168 Py_DECREF(args);
169
170 return result;
171}
172
173LOCAL(PyObject*)
174list_join(PyObject* list)
175{
Serhiy Storchaka9c2c42c2017-04-02 20:37:03 +0300176 /* join list elements */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000177 PyObject* joiner;
178 PyObject* function;
179 PyObject* args;
180 PyObject* result;
181
182 switch (PyList_GET_SIZE(list)) {
183 case 0:
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000184 return PyString_FromString("");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000185 case 1:
186 result = PyList_GET_ITEM(list, 0);
187 Py_INCREF(result);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000188 return result;
189 }
190
191 /* two or more elements: slice out a suitable separator from the
192 first member, and use that to join the entire list */
193
194 joiner = PySequence_GetSlice(PyList_GET_ITEM(list, 0), 0, 0);
195 if (!joiner)
196 return NULL;
197
198 function = PyObject_GetAttrString(joiner, "join");
199 if (!function) {
200 Py_DECREF(joiner);
201 return NULL;
202 }
203
204 args = PyTuple_New(1);
Serhiy Storchaka9c2c42c2017-04-02 20:37:03 +0300205 if (!args) {
206 Py_DECREF(function);
207 Py_DECREF(joiner);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000208 return NULL;
Serhiy Storchaka9c2c42c2017-04-02 20:37:03 +0300209 }
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000210
Serhiy Storchaka9c2c42c2017-04-02 20:37:03 +0300211 Py_INCREF(list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000212 PyTuple_SET_ITEM(args, 0, list);
213
214 result = PyObject_CallObject(function, args);
215
216 Py_DECREF(args); /* also removes list */
217 Py_DECREF(function);
218 Py_DECREF(joiner);
219
220 return result;
221}
222
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000223/* -------------------------------------------------------------------- */
224/* the element type */
225
226typedef struct {
227
228 /* attributes (a dictionary object), or None if no attributes */
229 PyObject* attrib;
230
231 /* child elements */
232 int length; /* actual number of items */
233 int allocated; /* allocated items */
234
235 /* this either points to _children or to a malloced buffer */
236 PyObject* *children;
237
238 PyObject* _children[STATIC_CHILDREN];
239
240} ElementObjectExtra;
241
242typedef struct {
243 PyObject_HEAD
244
245 /* element tag (a string). */
246 PyObject* tag;
247
248 /* text before first child. note that this is a tagged pointer;
249 use JOIN_OBJ to get the object pointer. the join flag is used
250 to distinguish lists created by the tree builder from lists
251 assigned to the attribute by application code; the former
252 should be joined before being returned to the user, the latter
253 should be left intact. */
254 PyObject* text;
255
256 /* text after this element, in parent. note that this is a tagged
257 pointer; use JOIN_OBJ to get the object pointer. */
258 PyObject* tail;
259
260 ElementObjectExtra* extra;
261
262} ElementObject;
263
264staticforward PyTypeObject Element_Type;
265
Christian Heimese93237d2007-12-19 02:37:44 +0000266#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000267
268/* -------------------------------------------------------------------- */
269/* element constructor and destructor */
270
271LOCAL(int)
272element_new_extra(ElementObject* self, PyObject* attrib)
273{
274 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
275 if (!self->extra)
276 return -1;
277
278 if (!attrib)
279 attrib = Py_None;
280
281 Py_INCREF(attrib);
282 self->extra->attrib = attrib;
283
284 self->extra->length = 0;
285 self->extra->allocated = STATIC_CHILDREN;
286 self->extra->children = self->extra->_children;
287
288 return 0;
289}
290
291LOCAL(void)
292element_dealloc_extra(ElementObject* self)
293{
294 int i;
295
296 Py_DECREF(self->extra->attrib);
297
298 for (i = 0; i < self->extra->length; i++)
299 Py_DECREF(self->extra->children[i]);
300
301 if (self->extra->children != self->extra->_children)
302 PyObject_Free(self->extra->children);
303
304 PyObject_Free(self->extra);
305}
306
307LOCAL(PyObject*)
308element_new(PyObject* tag, PyObject* attrib)
309{
310 ElementObject* self;
311
312 self = PyObject_New(ElementObject, &Element_Type);
313 if (self == NULL)
314 return NULL;
315
316 /* use None for empty dictionaries */
317 if (PyDict_CheckExact(attrib) && !PyDict_Size(attrib))
318 attrib = Py_None;
319
320 self->extra = NULL;
321
322 if (attrib != Py_None) {
323
Neal Norwitzc6a989a2006-05-10 06:57:58 +0000324 if (element_new_extra(self, attrib) < 0) {
325 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000326 return NULL;
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000327 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000328
329 self->extra->length = 0;
330 self->extra->allocated = STATIC_CHILDREN;
331 self->extra->children = self->extra->_children;
332
333 }
334
335 Py_INCREF(tag);
336 self->tag = tag;
337
338 Py_INCREF(Py_None);
339 self->text = Py_None;
340
341 Py_INCREF(Py_None);
342 self->tail = Py_None;
343
344 ALLOC(sizeof(ElementObject), "create element");
345
346 return (PyObject*) self;
347}
348
349LOCAL(int)
Serhiy Storchakac4c64be2015-11-25 20:12:58 +0200350element_resize(ElementObject* self, Py_ssize_t extra)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000351{
Serhiy Storchakac4c64be2015-11-25 20:12:58 +0200352 Py_ssize_t size;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000353 PyObject* *children;
354
355 /* make sure self->children can hold the given number of extra
356 elements. set an exception and return -1 if allocation failed */
357
358 if (!self->extra)
359 element_new_extra(self, NULL);
360
361 size = self->extra->length + extra;
362
363 if (size > self->extra->allocated) {
364 /* use Python 2.4's list growth strategy */
365 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes87dcf3d2008-01-18 08:04:57 +0000366 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
367 * which needs at least 4 bytes.
368 * Although it's a false alarm always assume at least one child to
369 * be safe.
370 */
371 size = size ? size : 1;
Serhiy Storchakac4c64be2015-11-25 20:12:58 +0200372 if ((size_t)size > PY_SSIZE_T_MAX/sizeof(PyObject*))
373 goto nomemory;
374 if (size > INT_MAX) {
375 PyErr_SetString(PyExc_OverflowError,
376 "too many children");
377 return -1;
378 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000379 if (self->extra->children != self->extra->_children) {
Christian Heimes87dcf3d2008-01-18 08:04:57 +0000380 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
381 * "children", which needs at least 4 bytes. Although it's a
382 * false alarm always assume at least one child to be safe.
383 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000384 children = PyObject_Realloc(self->extra->children,
385 size * sizeof(PyObject*));
386 if (!children)
387 goto nomemory;
388 } else {
389 children = PyObject_Malloc(size * sizeof(PyObject*));
390 if (!children)
391 goto nomemory;
392 /* copy existing children from static area to malloc buffer */
393 memcpy(children, self->extra->children,
394 self->extra->length * sizeof(PyObject*));
395 }
396 self->extra->children = children;
397 self->extra->allocated = size;
398 }
399
400 return 0;
401
402 nomemory:
403 PyErr_NoMemory();
404 return -1;
405}
406
407LOCAL(int)
408element_add_subelement(ElementObject* self, PyObject* element)
409{
410 /* add a child element to a parent */
411
412 if (element_resize(self, 1) < 0)
413 return -1;
414
415 Py_INCREF(element);
416 self->extra->children[self->extra->length] = element;
417
418 self->extra->length++;
419
420 return 0;
421}
422
423LOCAL(PyObject*)
424element_get_attrib(ElementObject* self)
425{
426 /* return borrowed reference to attrib dictionary */
427 /* note: this function assumes that the extra section exists */
428
429 PyObject* res = self->extra->attrib;
430
431 if (res == Py_None) {
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000432 Py_DECREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000433 /* create missing dictionary */
434 res = PyDict_New();
435 if (!res)
436 return NULL;
437 self->extra->attrib = res;
438 }
439
440 return res;
441}
442
443LOCAL(PyObject*)
444element_get_text(ElementObject* self)
445{
446 /* return borrowed reference to text attribute */
447
Serhiy Storchaka9c2c42c2017-04-02 20:37:03 +0300448 PyObject *res = self->text;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000449
450 if (JOIN_GET(res)) {
451 res = JOIN_OBJ(res);
452 if (PyList_CheckExact(res)) {
Serhiy Storchaka9c2c42c2017-04-02 20:37:03 +0300453 PyObject *tmp = list_join(res);
454 if (!tmp)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000455 return NULL;
Serhiy Storchaka9c2c42c2017-04-02 20:37:03 +0300456 self->text = tmp;
457 Py_DECREF(res);
458 res = tmp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000459 }
460 }
461
462 return res;
463}
464
465LOCAL(PyObject*)
466element_get_tail(ElementObject* self)
467{
468 /* return borrowed reference to text attribute */
469
Serhiy Storchaka9c2c42c2017-04-02 20:37:03 +0300470 PyObject *res = self->tail;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000471
472 if (JOIN_GET(res)) {
473 res = JOIN_OBJ(res);
474 if (PyList_CheckExact(res)) {
Serhiy Storchaka9c2c42c2017-04-02 20:37:03 +0300475 PyObject *tmp = list_join(res);
476 if (!tmp)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000477 return NULL;
Serhiy Storchaka9c2c42c2017-04-02 20:37:03 +0300478 self->tail = tmp;
479 Py_DECREF(res);
480 res = tmp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000481 }
482 }
483
484 return res;
485}
486
487static PyObject*
488element(PyObject* self, PyObject* args, PyObject* kw)
489{
490 PyObject* elem;
491
492 PyObject* tag;
493 PyObject* attrib = NULL;
494 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag,
495 &PyDict_Type, &attrib))
496 return NULL;
497
498 if (attrib || kw) {
499 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
500 if (!attrib)
501 return NULL;
502 if (kw)
503 PyDict_Update(attrib, kw);
504 } else {
505 Py_INCREF(Py_None);
506 attrib = Py_None;
507 }
508
509 elem = element_new(tag, attrib);
510
511 Py_DECREF(attrib);
512
513 return elem;
514}
515
516static PyObject*
517subelement(PyObject* self, PyObject* args, PyObject* kw)
518{
519 PyObject* elem;
520
521 ElementObject* parent;
522 PyObject* tag;
523 PyObject* attrib = NULL;
524 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
525 &Element_Type, &parent, &tag,
526 &PyDict_Type, &attrib))
527 return NULL;
528
529 if (attrib || kw) {
530 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
531 if (!attrib)
532 return NULL;
533 if (kw)
534 PyDict_Update(attrib, kw);
535 } else {
536 Py_INCREF(Py_None);
537 attrib = Py_None;
538 }
539
540 elem = element_new(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000541 Py_DECREF(attrib);
Xiang Zhang9c0408d2017-03-22 14:32:52 +0800542 if (elem == NULL)
543 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000544
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000545 if (element_add_subelement(parent, elem) < 0) {
546 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000547 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000548 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000549
550 return elem;
551}
552
553static void
554element_dealloc(ElementObject* self)
555{
Serhiy Storchaka14518742016-12-28 09:23:17 +0200556 if (self->extra)
557 element_dealloc_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000558
559 /* discard attributes */
560 Py_DECREF(self->tag);
Serhiy Storchaka14518742016-12-28 09:23:17 +0200561 Py_DECREF(JOIN_OBJ(self->text));
562 Py_DECREF(JOIN_OBJ(self->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000563
564 RELEASE(sizeof(ElementObject), "destroy element");
565
566 PyObject_Del(self);
567}
568
569/* -------------------------------------------------------------------- */
570/* methods (in alphabetical order) */
571
572static PyObject*
573element_append(ElementObject* self, PyObject* args)
574{
575 PyObject* element;
576 if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element))
577 return NULL;
578
579 if (element_add_subelement(self, element) < 0)
580 return NULL;
581
582 Py_RETURN_NONE;
583}
584
585static PyObject*
586element_clear(ElementObject* self, PyObject* args)
587{
588 if (!PyArg_ParseTuple(args, ":clear"))
589 return NULL;
590
591 if (self->extra) {
592 element_dealloc_extra(self);
593 self->extra = NULL;
594 }
595
596 Py_INCREF(Py_None);
Oren Milmanf15058a2017-10-11 16:29:12 +0300597 _set_joined_ptr(&self->text, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000598
599 Py_INCREF(Py_None);
Oren Milmanf15058a2017-10-11 16:29:12 +0300600 _set_joined_ptr(&self->tail, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000601
602 Py_RETURN_NONE;
603}
604
605static PyObject*
606element_copy(ElementObject* self, PyObject* args)
607{
608 int i;
609 ElementObject* element;
610
611 if (!PyArg_ParseTuple(args, ":__copy__"))
612 return NULL;
613
614 element = (ElementObject*) element_new(
615 self->tag, (self->extra) ? self->extra->attrib : Py_None
616 );
617 if (!element)
618 return NULL;
619
Oren Milmanf15058a2017-10-11 16:29:12 +0300620 Py_INCREF(JOIN_OBJ(self->text));
621 _set_joined_ptr(&element->text, self->text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000622
Oren Milmanf15058a2017-10-11 16:29:12 +0300623 Py_INCREF(JOIN_OBJ(self->tail));
624 _set_joined_ptr(&element->tail, self->tail);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000625
626 if (self->extra) {
627
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000628 if (element_resize(element, self->extra->length) < 0) {
629 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000630 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000631 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000632
633 for (i = 0; i < self->extra->length; i++) {
634 Py_INCREF(self->extra->children[i]);
635 element->extra->children[i] = self->extra->children[i];
636 }
637
638 element->extra->length = self->extra->length;
639
640 }
641
642 return (PyObject*) element;
643}
644
645static PyObject*
646element_deepcopy(ElementObject* self, PyObject* args)
647{
648 int i;
649 ElementObject* element;
650 PyObject* tag;
651 PyObject* attrib;
652 PyObject* text;
653 PyObject* tail;
654 PyObject* id;
655
656 PyObject* memo;
657 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
658 return NULL;
659
660 tag = deepcopy(self->tag, memo);
661 if (!tag)
662 return NULL;
663
664 if (self->extra) {
665 attrib = deepcopy(self->extra->attrib, memo);
666 if (!attrib) {
667 Py_DECREF(tag);
668 return NULL;
669 }
670 } else {
671 Py_INCREF(Py_None);
672 attrib = Py_None;
673 }
674
675 element = (ElementObject*) element_new(tag, attrib);
676
677 Py_DECREF(tag);
678 Py_DECREF(attrib);
679
680 if (!element)
681 return NULL;
682
683 text = deepcopy(JOIN_OBJ(self->text), memo);
684 if (!text)
685 goto error;
Oren Milmanf15058a2017-10-11 16:29:12 +0300686 _set_joined_ptr(&element->text, JOIN_SET(text, JOIN_GET(self->text)));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000687
688 tail = deepcopy(JOIN_OBJ(self->tail), memo);
689 if (!tail)
690 goto error;
Oren Milmanf15058a2017-10-11 16:29:12 +0300691 _set_joined_ptr(&element->tail, JOIN_SET(tail, JOIN_GET(self->tail)));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000692
693 if (self->extra) {
694
695 if (element_resize(element, self->extra->length) < 0)
696 goto error;
697
698 for (i = 0; i < self->extra->length; i++) {
699 PyObject* child = deepcopy(self->extra->children[i], memo);
700 if (!child) {
701 element->extra->length = i;
702 goto error;
703 }
704 element->extra->children[i] = child;
705 }
706
707 element->extra->length = self->extra->length;
708
709 }
710
711 /* add object to memo dictionary (so deepcopy won't visit it again) */
712 id = PyInt_FromLong((Py_uintptr_t) self);
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000713 if (!id)
714 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000715
716 i = PyDict_SetItem(memo, id, (PyObject*) element);
717
718 Py_DECREF(id);
719
720 if (i < 0)
721 goto error;
722
723 return (PyObject*) element;
724
725 error:
726 Py_DECREF(element);
727 return NULL;
728}
729
730LOCAL(int)
731checkpath(PyObject* tag)
732{
Neal Norwitzc7074382006-06-12 02:06:17 +0000733 Py_ssize_t i;
734 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000735
736 /* check if a tag contains an xpath character */
737
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000738#define PATHCHAR(ch) \
739 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000740
741#if defined(Py_USING_UNICODE)
742 if (PyUnicode_Check(tag)) {
743 Py_UNICODE *p = PyUnicode_AS_UNICODE(tag);
744 for (i = 0; i < PyUnicode_GET_SIZE(tag); i++) {
745 if (p[i] == '{')
746 check = 0;
747 else if (p[i] == '}')
748 check = 1;
749 else if (check && PATHCHAR(p[i]))
750 return 1;
751 }
752 return 0;
753 }
754#endif
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000755 if (PyString_Check(tag)) {
756 char *p = PyString_AS_STRING(tag);
757 for (i = 0; i < PyString_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000758 if (p[i] == '{')
759 check = 0;
760 else if (p[i] == '}')
761 check = 1;
762 else if (check && PATHCHAR(p[i]))
763 return 1;
764 }
765 return 0;
766 }
767
768 return 1; /* unknown type; might be path expression */
769}
770
771static PyObject*
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000772element_extend(ElementObject* self, PyObject* args)
773{
774 PyObject* seq;
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300775 Py_ssize_t i;
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000776
777 PyObject* seq_in;
778 if (!PyArg_ParseTuple(args, "O:extend", &seq_in))
779 return NULL;
780
781 seq = PySequence_Fast(seq_in, "");
782 if (!seq) {
783 PyErr_Format(
784 PyExc_TypeError,
785 "expected sequence, not \"%.200s\"", Py_TYPE(seq_in)->tp_name
786 );
787 return NULL;
788 }
789
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300790 for (i = 0; i < PySequence_Fast_GET_SIZE(seq); i++) {
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000791 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
792 if (element_add_subelement(self, element) < 0) {
793 Py_DECREF(seq);
794 return NULL;
795 }
796 }
797
798 Py_DECREF(seq);
799
800 Py_RETURN_NONE;
801}
802
803static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000804element_find(ElementObject* self, PyObject* args)
805{
806 int i;
807
808 PyObject* tag;
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000809 PyObject* namespaces = Py_None;
810 if (!PyArg_ParseTuple(args, "O|O:find", &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000811 return NULL;
812
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000813 if (checkpath(tag) || namespaces != Py_None)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000814 return PyObject_CallMethod(
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000815 elementpath_obj, "find", "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000816 );
817
818 if (!self->extra)
819 Py_RETURN_NONE;
820
821 for (i = 0; i < self->extra->length; i++) {
822 PyObject* item = self->extra->children[i];
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300823 int rc;
824 if (!Element_CheckExact(item))
825 continue;
826 Py_INCREF(item);
827 rc = PyObject_Compare(((ElementObject*)item)->tag, tag);
828 if (rc == 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000829 return item;
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300830 Py_DECREF(item);
831 if (rc < 0 && PyErr_Occurred())
832 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000833 }
834
835 Py_RETURN_NONE;
836}
837
838static PyObject*
839element_findtext(ElementObject* self, PyObject* args)
840{
841 int i;
842
843 PyObject* tag;
844 PyObject* default_value = Py_None;
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000845 PyObject* namespaces = Py_None;
846 if (!PyArg_ParseTuple(args, "O|OO:findtext", &tag, &default_value, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000847 return NULL;
848
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000849 if (checkpath(tag) || namespaces != Py_None)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000850 return PyObject_CallMethod(
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000851 elementpath_obj, "findtext", "OOOO", self, tag, default_value, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000852 );
853
854 if (!self->extra) {
855 Py_INCREF(default_value);
856 return default_value;
857 }
858
859 for (i = 0; i < self->extra->length; i++) {
860 ElementObject* item = (ElementObject*) self->extra->children[i];
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300861 int rc;
862 if (!Element_CheckExact(item))
863 continue;
864 Py_INCREF(item);
865 rc = PyObject_Compare(item->tag, tag);
866 if (rc == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000867 PyObject* text = element_get_text(item);
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300868 if (text == Py_None) {
869 Py_DECREF(item);
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000870 return PyString_FromString("");
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300871 }
Neal Norwitz6f5ff3f2006-08-12 01:43:40 +0000872 Py_XINCREF(text);
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300873 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000874 return text;
875 }
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300876 Py_DECREF(item);
877 if (rc < 0 && PyErr_Occurred())
878 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000879 }
880
881 Py_INCREF(default_value);
882 return default_value;
883}
884
885static PyObject*
886element_findall(ElementObject* self, PyObject* args)
887{
888 int i;
889 PyObject* out;
890
891 PyObject* tag;
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000892 PyObject* namespaces = Py_None;
893 if (!PyArg_ParseTuple(args, "O|O:findall", &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000894 return NULL;
895
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000896 if (checkpath(tag) || namespaces != Py_None)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000897 return PyObject_CallMethod(
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000898 elementpath_obj, "findall", "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000899 );
900
901 out = PyList_New(0);
902 if (!out)
903 return NULL;
904
905 if (!self->extra)
906 return out;
907
908 for (i = 0; i < self->extra->length; i++) {
909 PyObject* item = self->extra->children[i];
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300910 int rc;
911 if (!Element_CheckExact(item))
912 continue;
913 Py_INCREF(item);
914 rc = PyObject_Compare(((ElementObject*)item)->tag, tag);
915 if (rc == 0)
916 rc = PyList_Append(out, item);
917 Py_DECREF(item);
918 if (rc < 0 && PyErr_Occurred()) {
919 Py_DECREF(out);
920 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000921 }
922 }
923
924 return out;
925}
926
927static PyObject*
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000928element_iterfind(ElementObject* self, PyObject* args)
929{
930 PyObject* tag;
931 PyObject* namespaces = Py_None;
932 if (!PyArg_ParseTuple(args, "O|O:iterfind", &tag, &namespaces))
933 return NULL;
934
935 return PyObject_CallMethod(
936 elementpath_obj, "iterfind", "OOO", self, tag, namespaces
937 );
938}
939
940static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000941element_get(ElementObject* self, PyObject* args)
942{
943 PyObject* value;
944
945 PyObject* key;
946 PyObject* default_value = Py_None;
947 if (!PyArg_ParseTuple(args, "O|O:get", &key, &default_value))
948 return NULL;
949
950 if (!self->extra || self->extra->attrib == Py_None)
951 value = default_value;
952 else {
953 value = PyDict_GetItem(self->extra->attrib, key);
954 if (!value)
955 value = default_value;
956 }
957
958 Py_INCREF(value);
959 return value;
960}
961
962static PyObject*
963element_getchildren(ElementObject* self, PyObject* args)
964{
965 int i;
966 PyObject* list;
967
Serhiy Storchaka09b52472017-05-17 10:08:11 +0300968 if (PyErr_WarnPy3k("This method will be removed in future versions. "
969 "Use 'list(elem)' or iteration over elem instead.",
970 1) < 0) {
971 return NULL;
972 }
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000973
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000974 if (!PyArg_ParseTuple(args, ":getchildren"))
975 return NULL;
976
977 if (!self->extra)
978 return PyList_New(0);
979
980 list = PyList_New(self->extra->length);
981 if (!list)
982 return NULL;
983
984 for (i = 0; i < self->extra->length; i++) {
985 PyObject* item = self->extra->children[i];
986 Py_INCREF(item);
987 PyList_SET_ITEM(list, i, item);
988 }
989
990 return list;
991}
992
993static PyObject*
Serhiy Storchaka09b52472017-05-17 10:08:11 +0300994element_iter_impl(ElementObject* self, PyObject* tag)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000995{
Serhiy Storchaka09b52472017-05-17 10:08:11 +0300996 PyObject* args;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000997 PyObject* result;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000998
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000999 if (!elementtree_iter_obj) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001000 PyErr_SetString(
1001 PyExc_RuntimeError,
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001002 "iter helper not found"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001003 );
1004 return NULL;
1005 }
1006
1007 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001008 if (!args)
1009 return NULL;
Neal Norwitz02876df2006-02-07 06:58:52 +00001010
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001011 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
1012 Py_INCREF(tag); PyTuple_SET_ITEM(args, 1, (PyObject*) tag);
1013
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001014 result = PyObject_CallObject(elementtree_iter_obj, args);
1015
1016 Py_DECREF(args);
1017
1018 return result;
1019}
1020
Serhiy Storchaka09b52472017-05-17 10:08:11 +03001021static PyObject*
1022element_iter(ElementObject* self, PyObject* args)
1023{
1024 PyObject* tag = Py_None;
1025 if (!PyArg_ParseTuple(args, "|O:iter", &tag))
1026 return NULL;
1027
1028 return element_iter_impl(self, tag);
1029}
1030
1031static PyObject*
1032element_getiterator(ElementObject* self, PyObject* args)
1033{
1034 PyObject* tag = Py_None;
1035 if (!PyArg_ParseTuple(args, "|O:getiterator", &tag))
1036 return NULL;
1037
1038 /* Change for a DeprecationWarning in 1.4 */
1039 if (Py_Py3kWarningFlag &&
1040 PyErr_WarnEx(PyExc_PendingDeprecationWarning,
1041 "This method will be removed in future versions. "
1042 "Use 'tree.iter()' or 'list(tree.iter())' instead.",
1043 1) < 0) {
1044 return NULL;
1045 }
1046 return element_iter_impl(self, tag);
1047}
1048
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001049
1050static PyObject*
1051element_itertext(ElementObject* self, PyObject* args)
1052{
1053 PyObject* result;
1054
1055 if (!PyArg_ParseTuple(args, ":itertext"))
1056 return NULL;
1057
1058 if (!elementtree_itertext_obj) {
1059 PyErr_SetString(
1060 PyExc_RuntimeError,
1061 "itertext helper not found"
1062 );
1063 return NULL;
1064 }
1065
1066 args = PyTuple_New(1);
1067 if (!args)
1068 return NULL;
1069
1070 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
1071
1072 result = PyObject_CallObject(elementtree_itertext_obj, args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001073
1074 Py_DECREF(args);
1075
1076 return result;
1077}
1078
1079static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001080element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001081{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001082 ElementObject* self = (ElementObject*) self_;
1083
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001084 if (!self->extra || index < 0 || index >= self->extra->length) {
1085 PyErr_SetString(
1086 PyExc_IndexError,
1087 "child index out of range"
1088 );
1089 return NULL;
1090 }
1091
1092 Py_INCREF(self->extra->children[index]);
1093 return self->extra->children[index];
1094}
1095
1096static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001097element_insert(ElementObject* self, PyObject* args)
1098{
1099 int i;
1100
1101 int index;
1102 PyObject* element;
1103 if (!PyArg_ParseTuple(args, "iO!:insert", &index,
1104 &Element_Type, &element))
1105 return NULL;
1106
1107 if (!self->extra)
1108 element_new_extra(self, NULL);
1109
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001110 if (index < 0) {
1111 index += self->extra->length;
1112 if (index < 0)
1113 index = 0;
1114 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001115 if (index > self->extra->length)
1116 index = self->extra->length;
1117
1118 if (element_resize(self, 1) < 0)
1119 return NULL;
1120
1121 for (i = self->extra->length; i > index; i--)
1122 self->extra->children[i] = self->extra->children[i-1];
1123
1124 Py_INCREF(element);
1125 self->extra->children[index] = element;
1126
1127 self->extra->length++;
1128
1129 Py_RETURN_NONE;
1130}
1131
1132static PyObject*
1133element_items(ElementObject* self, PyObject* args)
1134{
1135 if (!PyArg_ParseTuple(args, ":items"))
1136 return NULL;
1137
1138 if (!self->extra || self->extra->attrib == Py_None)
1139 return PyList_New(0);
1140
1141 return PyDict_Items(self->extra->attrib);
1142}
1143
1144static PyObject*
1145element_keys(ElementObject* self, PyObject* args)
1146{
1147 if (!PyArg_ParseTuple(args, ":keys"))
1148 return NULL;
1149
1150 if (!self->extra || self->extra->attrib == Py_None)
1151 return PyList_New(0);
1152
1153 return PyDict_Keys(self->extra->attrib);
1154}
1155
Martin v. Löwis18e16552006-02-15 17:27:45 +00001156static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001157element_length(ElementObject* self)
1158{
1159 if (!self->extra)
1160 return 0;
1161
1162 return self->extra->length;
1163}
1164
1165static PyObject*
1166element_makeelement(PyObject* self, PyObject* args, PyObject* kw)
1167{
1168 PyObject* elem;
1169
1170 PyObject* tag;
1171 PyObject* attrib;
1172 if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib))
1173 return NULL;
1174
1175 attrib = PyDict_Copy(attrib);
1176 if (!attrib)
1177 return NULL;
1178
1179 elem = element_new(tag, attrib);
1180
1181 Py_DECREF(attrib);
1182
1183 return elem;
1184}
1185
1186static PyObject*
1187element_reduce(ElementObject* self, PyObject* args)
1188{
1189 if (!PyArg_ParseTuple(args, ":__reduce__"))
1190 return NULL;
1191
1192 /* Hack alert: This method is used to work around a __copy__
1193 problem on certain 2.3 and 2.4 versions. To save time and
1194 simplify the code, we create the copy in here, and use a dummy
1195 copyelement helper to trick the copy module into doing the
1196 right thing. */
1197
1198 if (!elementtree_copyelement_obj) {
1199 PyErr_SetString(
1200 PyExc_RuntimeError,
1201 "copyelement helper not found"
1202 );
1203 return NULL;
1204 }
1205
1206 return Py_BuildValue(
1207 "O(N)", elementtree_copyelement_obj, element_copy(self, args)
1208 );
1209}
1210
1211static PyObject*
1212element_remove(ElementObject* self, PyObject* args)
1213{
1214 int i;
Serhiy Storchaka25598f32015-05-18 18:28:57 +03001215 int rc;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001216 PyObject* element;
Serhiy Storchaka25598f32015-05-18 18:28:57 +03001217 PyObject* found;
1218
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001219 if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element))
1220 return NULL;
1221
1222 if (!self->extra) {
1223 /* element has no children, so raise exception */
1224 PyErr_SetString(
1225 PyExc_ValueError,
1226 "list.remove(x): x not in list"
1227 );
1228 return NULL;
1229 }
1230
1231 for (i = 0; i < self->extra->length; i++) {
1232 if (self->extra->children[i] == element)
1233 break;
Serhiy Storchaka25598f32015-05-18 18:28:57 +03001234 rc = PyObject_Compare(self->extra->children[i], element);
1235 if (rc == 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001236 break;
Serhiy Storchaka25598f32015-05-18 18:28:57 +03001237 if (rc < 0 && PyErr_Occurred())
1238 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001239 }
1240
Serhiy Storchaka25598f32015-05-18 18:28:57 +03001241 if (i >= self->extra->length) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001242 /* element is not in children, so raise exception */
1243 PyErr_SetString(
1244 PyExc_ValueError,
1245 "list.remove(x): x not in list"
1246 );
1247 return NULL;
1248 }
1249
Serhiy Storchaka25598f32015-05-18 18:28:57 +03001250 found = self->extra->children[i];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001251
1252 self->extra->length--;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001253 for (; i < self->extra->length; i++)
1254 self->extra->children[i] = self->extra->children[i+1];
1255
Serhiy Storchaka25598f32015-05-18 18:28:57 +03001256 Py_DECREF(found);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001257 Py_RETURN_NONE;
1258}
1259
1260static PyObject*
1261element_repr(ElementObject* self)
1262{
Serhiy Storchaka1f7586e2016-06-12 10:06:32 +03001263 int status;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001264
Serhiy Storchaka1f7586e2016-06-12 10:06:32 +03001265 if (self->tag == NULL)
1266 return PyUnicode_FromFormat("<Element at %p>", self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001267
Serhiy Storchaka1f7586e2016-06-12 10:06:32 +03001268 status = Py_ReprEnter((PyObject *)self);
1269 if (status == 0) {
1270 PyObject *repr, *tag;
1271 tag = PyObject_Repr(self->tag);
Stéphane Wirtel41af9422017-06-12 15:30:48 +02001272 if (!tag) {
1273 Py_ReprLeave((PyObject *)self);
Serhiy Storchaka1f7586e2016-06-12 10:06:32 +03001274 return NULL;
Stéphane Wirtel41af9422017-06-12 15:30:48 +02001275 }
Florent Xiclunae2e81e82010-03-11 15:55:11 +00001276
Serhiy Storchaka1f7586e2016-06-12 10:06:32 +03001277 repr = PyString_FromFormat("<Element %s at %p>",
1278 PyString_AS_STRING(tag), self);
Benjamin Petersond7324bc2016-12-03 11:30:04 -08001279 Py_ReprLeave((PyObject *)self);
Serhiy Storchaka1f7586e2016-06-12 10:06:32 +03001280 Py_DECREF(tag);
1281 return repr;
1282 }
1283 if (status > 0)
1284 PyErr_Format(PyExc_RuntimeError,
1285 "reentrant call inside %s.__repr__",
1286 Py_TYPE(self)->tp_name);
1287 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001288}
1289
1290static PyObject*
1291element_set(ElementObject* self, PyObject* args)
1292{
1293 PyObject* attrib;
1294
1295 PyObject* key;
1296 PyObject* value;
1297 if (!PyArg_ParseTuple(args, "OO:set", &key, &value))
1298 return NULL;
1299
1300 if (!self->extra)
1301 element_new_extra(self, NULL);
1302
1303 attrib = element_get_attrib(self);
1304 if (!attrib)
1305 return NULL;
1306
1307 if (PyDict_SetItem(attrib, key, value) < 0)
1308 return NULL;
1309
1310 Py_RETURN_NONE;
1311}
1312
1313static int
Serhiy Storchakab5b76c32015-11-26 11:21:47 +02001314element_setitem(PyObject* self_, Py_ssize_t index_, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001315{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001316 ElementObject* self = (ElementObject*) self_;
Serhiy Storchakac4c64be2015-11-25 20:12:58 +02001317 int i, index;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001318 PyObject* old;
1319
Serhiy Storchakac4c64be2015-11-25 20:12:58 +02001320 if (!self->extra || index_ < 0 || index_ >= self->extra->length) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001321 PyErr_SetString(
1322 PyExc_IndexError,
1323 "child assignment index out of range");
1324 return -1;
1325 }
Serhiy Storchakac4c64be2015-11-25 20:12:58 +02001326 index = (int)index_;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001327
1328 old = self->extra->children[index];
1329
1330 if (item) {
1331 Py_INCREF(item);
1332 self->extra->children[index] = item;
1333 } else {
1334 self->extra->length--;
1335 for (i = index; i < self->extra->length; i++)
1336 self->extra->children[i] = self->extra->children[i+1];
1337 }
1338
1339 Py_DECREF(old);
1340
1341 return 0;
1342}
1343
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001344static PyObject*
1345element_subscr(PyObject* self_, PyObject* item)
1346{
1347 ElementObject* self = (ElementObject*) self_;
1348
1349#if (PY_VERSION_HEX < 0x02050000)
Serhiy Storchaka48c8bf22018-07-31 09:09:36 +03001350 if (_PyAnyInt_Check(item)) {
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001351 long i = PyInt_AsLong(item);
1352#else
1353 if (PyIndex_Check(item)) {
1354 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1355#endif
1356
1357 if (i == -1 && PyErr_Occurred()) {
1358 return NULL;
1359 }
1360 if (i < 0 && self->extra)
1361 i += self->extra->length;
1362 return element_getitem(self_, i);
1363 }
1364 else if (PySlice_Check(item)) {
1365 Py_ssize_t start, stop, step, slicelen, cur, i;
1366 PyObject* list;
1367
1368 if (!self->extra)
1369 return PyList_New(0);
1370
Serhiy Storchaka5e793212017-04-15 20:11:12 +03001371 if (_PySlice_Unpack(item, &start, &stop, &step) < 0) {
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001372 return NULL;
1373 }
Serhiy Storchakae41390a2017-04-08 11:48:57 +03001374 slicelen = _PySlice_AdjustIndices(self->extra->length, &start, &stop,
1375 step);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001376
1377 if (slicelen <= 0)
1378 return PyList_New(0);
1379 else {
1380 list = PyList_New(slicelen);
1381 if (!list)
1382 return NULL;
1383
1384 for (cur = start, i = 0; i < slicelen;
1385 cur += step, i++) {
1386 PyObject* item = self->extra->children[cur];
1387 Py_INCREF(item);
1388 PyList_SET_ITEM(list, i, item);
1389 }
1390
1391 return list;
1392 }
1393 }
1394 else {
1395 PyErr_SetString(PyExc_TypeError,
1396 "element indices must be integers");
1397 return NULL;
1398 }
1399}
1400
1401static int
1402element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1403{
1404 ElementObject* self = (ElementObject*) self_;
1405
1406#if (PY_VERSION_HEX < 0x02050000)
Serhiy Storchaka48c8bf22018-07-31 09:09:36 +03001407 if (_PyAnyInt_Check(item)) {
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001408 long i = PyInt_AsLong(item);
1409#else
1410 if (PyIndex_Check(item)) {
1411 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1412#endif
1413
1414 if (i == -1 && PyErr_Occurred()) {
1415 return -1;
1416 }
1417 if (i < 0 && self->extra)
1418 i += self->extra->length;
1419 return element_setitem(self_, i, value);
1420 }
1421 else if (PySlice_Check(item)) {
1422 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1423
1424 PyObject* recycle = NULL;
1425 PyObject* seq = NULL;
1426
1427 if (!self->extra)
1428 element_new_extra(self, NULL);
1429
Serhiy Storchaka5e793212017-04-15 20:11:12 +03001430 if (_PySlice_Unpack(item, &start, &stop, &step) < 0) {
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001431 return -1;
1432 }
Serhiy Storchakae41390a2017-04-08 11:48:57 +03001433 slicelen = _PySlice_AdjustIndices(self->extra->length, &start, &stop,
1434 step);
Serhiy Storchakac4c64be2015-11-25 20:12:58 +02001435 assert(slicelen <= self->extra->length);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001436
1437 if (value == NULL)
1438 newlen = 0;
1439 else {
1440 seq = PySequence_Fast(value, "");
1441 if (!seq) {
1442 PyErr_Format(
1443 PyExc_TypeError,
1444 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1445 );
1446 return -1;
1447 }
1448 newlen = PySequence_Size(seq);
1449 }
1450
1451 if (step != 1 && newlen != slicelen)
1452 {
Serhiy Storchakaa0ae9ff2015-11-22 12:31:11 +02001453 Py_XDECREF(seq);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001454 PyErr_Format(PyExc_ValueError,
1455#if (PY_VERSION_HEX < 0x02050000)
1456 "attempt to assign sequence of size %d "
1457 "to extended slice of size %d",
Serhiy Storchakaa0ae9ff2015-11-22 12:31:11 +02001458 (int)newlen, (int)slicelen
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001459#else
1460 "attempt to assign sequence of size %zd "
1461 "to extended slice of size %zd",
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001462 newlen, slicelen
Serhiy Storchakaa0ae9ff2015-11-22 12:31:11 +02001463#endif
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001464 );
1465 return -1;
1466 }
1467
1468
1469 /* Resize before creating the recycle bin, to prevent refleaks. */
1470 if (newlen > slicelen) {
1471 if (element_resize(self, newlen - slicelen) < 0) {
Serhiy Storchakaa0ae9ff2015-11-22 12:31:11 +02001472 Py_XDECREF(seq);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001473 return -1;
1474 }
1475 }
Serhiy Storchakac4c64be2015-11-25 20:12:58 +02001476 assert(newlen - slicelen <= INT_MAX - self->extra->length);
1477 assert(newlen - slicelen >= -self->extra->length);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001478
1479 if (slicelen > 0) {
1480 /* to avoid recursive calls to this method (via decref), move
1481 old items to the recycle bin here, and get rid of them when
1482 we're done modifying the element */
1483 recycle = PyList_New(slicelen);
1484 if (!recycle) {
Serhiy Storchakaa0ae9ff2015-11-22 12:31:11 +02001485 Py_XDECREF(seq);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001486 return -1;
1487 }
1488 for (cur = start, i = 0; i < slicelen;
1489 cur += step, i++)
1490 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1491 }
1492
1493 if (newlen < slicelen) {
1494 /* delete slice */
1495 for (i = stop; i < self->extra->length; i++)
1496 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1497 } else if (newlen > slicelen) {
1498 /* insert slice */
1499 for (i = self->extra->length-1; i >= stop; i--)
1500 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1501 }
1502
1503 /* replace the slice */
1504 for (cur = start, i = 0; i < newlen;
1505 cur += step, i++) {
1506 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1507 Py_INCREF(element);
1508 self->extra->children[cur] = element;
1509 }
1510
Serhiy Storchakac4c64be2015-11-25 20:12:58 +02001511 self->extra->length += (int)(newlen - slicelen);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001512
Serhiy Storchakaa0ae9ff2015-11-22 12:31:11 +02001513 Py_XDECREF(seq);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001514
1515 /* discard the recycle bin, and everything in it */
1516 Py_XDECREF(recycle);
1517
1518 return 0;
1519 }
1520 else {
1521 PyErr_SetString(PyExc_TypeError,
1522 "element indices must be integers");
1523 return -1;
1524 }
1525}
1526
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001527static PyMethodDef element_methods[] = {
1528
1529 {"clear", (PyCFunction) element_clear, METH_VARARGS},
1530
1531 {"get", (PyCFunction) element_get, METH_VARARGS},
1532 {"set", (PyCFunction) element_set, METH_VARARGS},
1533
1534 {"find", (PyCFunction) element_find, METH_VARARGS},
1535 {"findtext", (PyCFunction) element_findtext, METH_VARARGS},
1536 {"findall", (PyCFunction) element_findall, METH_VARARGS},
1537
1538 {"append", (PyCFunction) element_append, METH_VARARGS},
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001539 {"extend", (PyCFunction) element_extend, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001540 {"insert", (PyCFunction) element_insert, METH_VARARGS},
1541 {"remove", (PyCFunction) element_remove, METH_VARARGS},
1542
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001543 {"iter", (PyCFunction) element_iter, METH_VARARGS},
1544 {"itertext", (PyCFunction) element_itertext, METH_VARARGS},
1545 {"iterfind", (PyCFunction) element_iterfind, METH_VARARGS},
1546
Serhiy Storchaka09b52472017-05-17 10:08:11 +03001547 {"getiterator", (PyCFunction) element_getiterator, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001548 {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS},
1549
1550 {"items", (PyCFunction) element_items, METH_VARARGS},
1551 {"keys", (PyCFunction) element_keys, METH_VARARGS},
1552
1553 {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS},
1554
1555 {"__copy__", (PyCFunction) element_copy, METH_VARARGS},
1556 {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS},
1557
1558 /* Some 2.3 and 2.4 versions do not handle the __copy__ method on
1559 C objects correctly, so we have to fake it using a __reduce__-
1560 based hack (see the element_reduce implementation above for
1561 details). */
1562
1563 /* The behaviour has been changed in 2.3.5 and 2.4.1, so we're
1564 using a runtime test to figure out if we need to fake things
1565 or now (see the init code below). The following entry is
1566 enabled only if the hack is needed. */
1567
1568 {"!__reduce__", (PyCFunction) element_reduce, METH_VARARGS},
1569
1570 {NULL, NULL}
1571};
1572
1573static PyObject*
1574element_getattr(ElementObject* self, char* name)
1575{
1576 PyObject* res;
1577
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001578 /* handle common attributes first */
1579 if (strcmp(name, "tag") == 0) {
1580 res = self->tag;
1581 Py_INCREF(res);
1582 return res;
1583 } else if (strcmp(name, "text") == 0) {
1584 res = element_get_text(self);
Xiang Zhang827c7832017-03-22 12:25:51 +08001585 Py_XINCREF(res);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001586 return res;
1587 }
1588
1589 /* methods */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001590 res = Py_FindMethod(element_methods, (PyObject*) self, name);
1591 if (res)
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001592 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001593
1594 PyErr_Clear();
1595
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001596 /* less common attributes */
1597 if (strcmp(name, "tail") == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001598 res = element_get_tail(self);
1599 } else if (strcmp(name, "attrib") == 0) {
1600 if (!self->extra)
1601 element_new_extra(self, NULL);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001602 res = element_get_attrib(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001603 } else {
1604 PyErr_SetString(PyExc_AttributeError, name);
1605 return NULL;
1606 }
1607
1608 if (!res)
1609 return NULL;
1610
1611 Py_INCREF(res);
1612 return res;
1613}
1614
1615static int
1616element_setattr(ElementObject* self, const char* name, PyObject* value)
1617{
1618 if (value == NULL) {
1619 PyErr_SetString(
1620 PyExc_AttributeError,
1621 "can't delete element attributes"
1622 );
1623 return -1;
1624 }
1625
1626 if (strcmp(name, "tag") == 0) {
Serhiy Storchaka2e6c8292015-12-27 15:41:58 +02001627 Py_INCREF(value);
Serhiy Storchaka763a61c2016-04-10 18:05:12 +03001628 Py_SETREF(self->tag, value);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001629 } else if (strcmp(name, "text") == 0) {
Oren Milmanf15058a2017-10-11 16:29:12 +03001630 Py_INCREF(value);
1631 _set_joined_ptr(&self->text, value);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001632 } else if (strcmp(name, "tail") == 0) {
Oren Milmanf15058a2017-10-11 16:29:12 +03001633 Py_INCREF(value);
1634 _set_joined_ptr(&self->tail, value);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001635 } else if (strcmp(name, "attrib") == 0) {
1636 if (!self->extra)
1637 element_new_extra(self, NULL);
Serhiy Storchaka2e6c8292015-12-27 15:41:58 +02001638 Py_INCREF(value);
Serhiy Storchaka763a61c2016-04-10 18:05:12 +03001639 Py_SETREF(self->extra->attrib, value);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001640 } else {
1641 PyErr_SetString(PyExc_AttributeError, name);
1642 return -1;
1643 }
1644
1645 return 0;
1646}
1647
1648static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001649 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001650 0, /* sq_concat */
1651 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001652 element_getitem,
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001653 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001654 element_setitem,
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001655 0,
1656};
1657
1658static PyMappingMethods element_as_mapping = {
1659 (lenfunc) element_length,
1660 (binaryfunc) element_subscr,
1661 (objobjargproc) element_ass_subscr,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001662};
1663
1664statichere PyTypeObject Element_Type = {
Benjamin Petersona72d15c2017-09-13 21:20:29 -07001665 PyVarObject_HEAD_INIT(NULL, 0)
1666 "Element", sizeof(ElementObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001667 /* methods */
1668 (destructor)element_dealloc, /* tp_dealloc */
1669 0, /* tp_print */
1670 (getattrfunc)element_getattr, /* tp_getattr */
1671 (setattrfunc)element_setattr, /* tp_setattr */
1672 0, /* tp_compare */
1673 (reprfunc)element_repr, /* tp_repr */
1674 0, /* tp_as_number */
1675 &element_as_sequence, /* tp_as_sequence */
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001676 &element_as_mapping, /* tp_as_mapping */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001677};
1678
1679/* ==================================================================== */
1680/* the tree builder type */
1681
1682typedef struct {
1683 PyObject_HEAD
1684
1685 PyObject* root; /* root node (first created node) */
1686
1687 ElementObject* this; /* current node */
1688 ElementObject* last; /* most recently created node */
1689
1690 PyObject* data; /* data collector (string or list), or NULL */
1691
1692 PyObject* stack; /* element stack */
Neal Norwitzc7074382006-06-12 02:06:17 +00001693 Py_ssize_t index; /* current stack size (0=empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001694
1695 /* element tracing */
1696 PyObject* events; /* list of events, or NULL if not collecting */
1697 PyObject* start_event_obj; /* event objects (NULL to ignore) */
1698 PyObject* end_event_obj;
1699 PyObject* start_ns_event_obj;
1700 PyObject* end_ns_event_obj;
1701
1702} TreeBuilderObject;
1703
1704staticforward PyTypeObject TreeBuilder_Type;
1705
Christian Heimese93237d2007-12-19 02:37:44 +00001706#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001707
1708/* -------------------------------------------------------------------- */
1709/* constructor and destructor */
1710
1711LOCAL(PyObject*)
1712treebuilder_new(void)
1713{
1714 TreeBuilderObject* self;
1715
1716 self = PyObject_New(TreeBuilderObject, &TreeBuilder_Type);
1717 if (self == NULL)
1718 return NULL;
1719
1720 self->root = NULL;
1721
1722 Py_INCREF(Py_None);
1723 self->this = (ElementObject*) Py_None;
1724
1725 Py_INCREF(Py_None);
1726 self->last = (ElementObject*) Py_None;
1727
1728 self->data = NULL;
1729
1730 self->stack = PyList_New(20);
1731 self->index = 0;
1732
1733 self->events = NULL;
1734 self->start_event_obj = self->end_event_obj = NULL;
1735 self->start_ns_event_obj = self->end_ns_event_obj = NULL;
1736
1737 ALLOC(sizeof(TreeBuilderObject), "create treebuilder");
1738
1739 return (PyObject*) self;
1740}
1741
1742static PyObject*
Fredrik Lundh81707f12006-06-03 21:56:05 +00001743treebuilder(PyObject* self_, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001744{
1745 if (!PyArg_ParseTuple(args, ":TreeBuilder"))
1746 return NULL;
1747
1748 return treebuilder_new();
1749}
1750
1751static void
1752treebuilder_dealloc(TreeBuilderObject* self)
1753{
1754 Py_XDECREF(self->end_ns_event_obj);
1755 Py_XDECREF(self->start_ns_event_obj);
1756 Py_XDECREF(self->end_event_obj);
1757 Py_XDECREF(self->start_event_obj);
1758 Py_XDECREF(self->events);
1759 Py_DECREF(self->stack);
1760 Py_XDECREF(self->data);
1761 Py_DECREF(self->last);
1762 Py_DECREF(self->this);
1763 Py_XDECREF(self->root);
1764
1765 RELEASE(sizeof(TreeBuilderObject), "destroy treebuilder");
1766
1767 PyObject_Del(self);
1768}
1769
Serhiy Storchaka9c2c42c2017-04-02 20:37:03 +03001770/* -------------------------------------------------------------------- */
1771/* helpers for handling of arbitrary element-like objects */
1772
1773static void
1774treebuilder_set_element_text_or_tail(PyObject **data, PyObject **dest)
1775{
1776 PyObject *tmp = JOIN_OBJ(*dest);
1777 *dest = JOIN_SET(*data, PyList_CheckExact(*data));
1778 *data = NULL;
1779 Py_DECREF(tmp);
1780}
1781
1782LOCAL(void)
1783treebuilder_flush_data(TreeBuilderObject* self)
1784{
1785 ElementObject *element = self->last;
1786
1787 if (self->data) {
1788 if (self->this == element) {
1789 treebuilder_set_element_text_or_tail(
1790 &self->data,
1791 &element->text);
1792 }
1793 else {
1794 treebuilder_set_element_text_or_tail(
1795 &self->data,
1796 &element->tail);
1797 }
1798 }
1799}
1800
Serhiy Storchaka45cf0b72015-12-06 23:51:53 +02001801LOCAL(int)
1802treebuilder_append_event(TreeBuilderObject *self, PyObject *action,
1803 PyObject *node)
1804{
1805 if (action != NULL) {
1806 PyObject *res = PyTuple_Pack(2, action, node);
1807 if (res == NULL)
1808 return -1;
1809 if (PyList_Append(self->events, res) < 0) {
1810 Py_DECREF(res);
1811 return -1;
1812 }
1813 Py_DECREF(res);
1814 }
1815 return 0;
1816}
1817
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001818/* -------------------------------------------------------------------- */
1819/* handlers */
1820
1821LOCAL(PyObject*)
1822treebuilder_handle_xml(TreeBuilderObject* self, PyObject* encoding,
1823 PyObject* standalone)
1824{
1825 Py_RETURN_NONE;
1826}
1827
1828LOCAL(PyObject*)
1829treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
1830 PyObject* attrib)
1831{
1832 PyObject* node;
1833 PyObject* this;
1834
Serhiy Storchaka9c2c42c2017-04-02 20:37:03 +03001835 treebuilder_flush_data(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001836
1837 node = element_new(tag, attrib);
1838 if (!node)
1839 return NULL;
1840
1841 this = (PyObject*) self->this;
1842
1843 if (this != Py_None) {
1844 if (element_add_subelement((ElementObject*) this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001845 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001846 } else {
1847 if (self->root) {
1848 PyErr_SetString(
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001849 elementtree_parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001850 "multiple elements on top level"
1851 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001852 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001853 }
1854 Py_INCREF(node);
1855 self->root = node;
1856 }
1857
1858 if (self->index < PyList_GET_SIZE(self->stack)) {
1859 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001860 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001861 Py_INCREF(this);
1862 } else {
1863 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001864 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001865 }
1866 self->index++;
1867
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001868 Py_INCREF(node);
Serhiy Storchaka763a61c2016-04-10 18:05:12 +03001869 Py_SETREF(self->this, (ElementObject*) node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001870
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001871 Py_INCREF(node);
Serhiy Storchaka763a61c2016-04-10 18:05:12 +03001872 Py_SETREF(self->last, (ElementObject*) node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001873
Serhiy Storchaka45cf0b72015-12-06 23:51:53 +02001874 if (treebuilder_append_event(self, self->start_event_obj, node) < 0)
1875 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001876
1877 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001878
1879 error:
1880 Py_DECREF(node);
1881 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001882}
1883
1884LOCAL(PyObject*)
1885treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
1886{
1887 if (!self->data) {
Fredrik Lundhdc075b92006-08-16 16:47:07 +00001888 if (self->last == (ElementObject*) Py_None) {
1889 /* ignore calls to data before the first call to start */
1890 Py_RETURN_NONE;
1891 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001892 /* store the first item as is */
1893 Py_INCREF(data); self->data = data;
1894 } else {
1895 /* more than one item; use a list to collect items */
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001896 if (PyString_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
1897 PyString_CheckExact(data) && PyString_GET_SIZE(data) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001898 /* expat often generates single character data sections; handle
1899 the most common case by resizing the existing string... */
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001900 Py_ssize_t size = PyString_GET_SIZE(self->data);
1901 if (_PyString_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001902 return NULL;
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001903 PyString_AS_STRING(self->data)[size] = PyString_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001904 } else if (PyList_CheckExact(self->data)) {
1905 if (PyList_Append(self->data, data) < 0)
1906 return NULL;
1907 } else {
1908 PyObject* list = PyList_New(2);
1909 if (!list)
1910 return NULL;
1911 PyList_SET_ITEM(list, 0, self->data);
1912 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
1913 self->data = list;
1914 }
1915 }
1916
1917 Py_RETURN_NONE;
1918}
1919
1920LOCAL(PyObject*)
1921treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
1922{
Serhiy Storchaka2e6c8292015-12-27 15:41:58 +02001923 ElementObject *item;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001924
Serhiy Storchaka9c2c42c2017-04-02 20:37:03 +03001925 treebuilder_flush_data(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001926
1927 if (self->index == 0) {
1928 PyErr_SetString(
1929 PyExc_IndexError,
1930 "pop from empty stack"
1931 );
1932 return NULL;
1933 }
1934
Serhiy Storchaka2e6c8292015-12-27 15:41:58 +02001935 item = self->last;
1936 self->last = self->this;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001937 self->index--;
Serhiy Storchaka2e6c8292015-12-27 15:41:58 +02001938 self->this = (ElementObject *) PyList_GET_ITEM(self->stack, self->index);
1939 Py_INCREF(self->this);
1940 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001941
Serhiy Storchaka45cf0b72015-12-06 23:51:53 +02001942 if (treebuilder_append_event(self, self->end_event_obj, (PyObject*)self->last) < 0)
1943 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001944
1945 Py_INCREF(self->last);
1946 return (PyObject*) self->last;
1947}
1948
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001949/* -------------------------------------------------------------------- */
1950/* methods (in alphabetical order) */
1951
1952static PyObject*
1953treebuilder_data(TreeBuilderObject* self, PyObject* args)
1954{
1955 PyObject* data;
1956 if (!PyArg_ParseTuple(args, "O:data", &data))
1957 return NULL;
1958
1959 return treebuilder_handle_data(self, data);
1960}
1961
1962static PyObject*
1963treebuilder_end(TreeBuilderObject* self, PyObject* args)
1964{
1965 PyObject* tag;
1966 if (!PyArg_ParseTuple(args, "O:end", &tag))
1967 return NULL;
1968
1969 return treebuilder_handle_end(self, tag);
1970}
1971
1972LOCAL(PyObject*)
1973treebuilder_done(TreeBuilderObject* self)
1974{
1975 PyObject* res;
1976
1977 /* FIXME: check stack size? */
1978
1979 if (self->root)
1980 res = self->root;
1981 else
1982 res = Py_None;
1983
1984 Py_INCREF(res);
1985 return res;
1986}
1987
1988static PyObject*
1989treebuilder_close(TreeBuilderObject* self, PyObject* args)
1990{
1991 if (!PyArg_ParseTuple(args, ":close"))
1992 return NULL;
1993
1994 return treebuilder_done(self);
1995}
1996
1997static PyObject*
1998treebuilder_start(TreeBuilderObject* self, PyObject* args)
1999{
2000 PyObject* tag;
2001 PyObject* attrib = Py_None;
2002 if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib))
2003 return NULL;
2004
2005 return treebuilder_handle_start(self, tag, attrib);
2006}
2007
2008static PyObject*
2009treebuilder_xml(TreeBuilderObject* self, PyObject* args)
2010{
2011 PyObject* encoding;
2012 PyObject* standalone;
2013 if (!PyArg_ParseTuple(args, "OO:xml", &encoding, &standalone))
2014 return NULL;
2015
2016 return treebuilder_handle_xml(self, encoding, standalone);
2017}
2018
2019static PyMethodDef treebuilder_methods[] = {
2020 {"data", (PyCFunction) treebuilder_data, METH_VARARGS},
2021 {"start", (PyCFunction) treebuilder_start, METH_VARARGS},
2022 {"end", (PyCFunction) treebuilder_end, METH_VARARGS},
2023 {"xml", (PyCFunction) treebuilder_xml, METH_VARARGS},
2024 {"close", (PyCFunction) treebuilder_close, METH_VARARGS},
2025 {NULL, NULL}
2026};
2027
2028static PyObject*
2029treebuilder_getattr(TreeBuilderObject* self, char* name)
2030{
2031 return Py_FindMethod(treebuilder_methods, (PyObject*) self, name);
2032}
2033
2034statichere PyTypeObject TreeBuilder_Type = {
Benjamin Petersona72d15c2017-09-13 21:20:29 -07002035 PyVarObject_HEAD_INIT(NULL, 0)
2036 "TreeBuilder", sizeof(TreeBuilderObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002037 /* methods */
2038 (destructor)treebuilder_dealloc, /* tp_dealloc */
2039 0, /* tp_print */
2040 (getattrfunc)treebuilder_getattr, /* tp_getattr */
2041};
2042
2043/* ==================================================================== */
2044/* the expat interface */
2045
2046#if defined(USE_EXPAT)
2047
2048#include "expat.h"
2049
2050#if defined(USE_PYEXPAT_CAPI)
2051#include "pyexpat.h"
2052static struct PyExpat_CAPI* expat_capi;
2053#define EXPAT(func) (expat_capi->func)
2054#else
2055#define EXPAT(func) (XML_##func)
2056#endif
2057
2058typedef struct {
2059 PyObject_HEAD
2060
2061 XML_Parser parser;
2062
2063 PyObject* target;
2064 PyObject* entity;
2065
2066 PyObject* names;
2067
2068 PyObject* handle_xml;
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002069
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002070 PyObject* handle_start;
2071 PyObject* handle_data;
2072 PyObject* handle_end;
2073
2074 PyObject* handle_comment;
2075 PyObject* handle_pi;
2076
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002077 PyObject* handle_close;
2078
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002079} XMLParserObject;
2080
2081staticforward PyTypeObject XMLParser_Type;
2082
2083/* helpers */
2084
2085#if defined(Py_USING_UNICODE)
2086LOCAL(int)
2087checkstring(const char* string, int size)
2088{
2089 int i;
2090
2091 /* check if an 8-bit string contains UTF-8 characters */
2092 for (i = 0; i < size; i++)
2093 if (string[i] & 0x80)
2094 return 1;
2095
2096 return 0;
2097}
2098#endif
2099
2100LOCAL(PyObject*)
2101makestring(const char* string, int size)
2102{
2103 /* convert a UTF-8 string to either a 7-bit ascii string or a
2104 Unicode string */
2105
2106#if defined(Py_USING_UNICODE)
2107 if (checkstring(string, size))
2108 return PyUnicode_DecodeUTF8(string, size, "strict");
2109#endif
2110
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002111 return PyString_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002112}
2113
2114LOCAL(PyObject*)
2115makeuniversal(XMLParserObject* self, const char* string)
2116{
2117 /* convert a UTF-8 tag/attribute name from the expat parser
2118 to a universal name string */
2119
2120 int size = strlen(string);
2121 PyObject* key;
2122 PyObject* value;
2123
2124 /* look the 'raw' name up in the names dictionary */
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002125 key = PyString_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002126 if (!key)
2127 return NULL;
2128
2129 value = PyDict_GetItem(self->names, key);
2130
2131 if (value) {
2132 Py_INCREF(value);
2133 } else {
2134 /* new name. convert to universal name, and decode as
2135 necessary */
2136
2137 PyObject* tag;
2138 char* p;
2139 int i;
2140
2141 /* look for namespace separator */
2142 for (i = 0; i < size; i++)
2143 if (string[i] == '}')
2144 break;
2145 if (i != size) {
2146 /* convert to universal name */
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002147 tag = PyString_FromStringAndSize(NULL, size+1);
2148 p = PyString_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002149 p[0] = '{';
2150 memcpy(p+1, string, size);
2151 size++;
2152 } else {
2153 /* plain name; use key as tag */
2154 Py_INCREF(key);
2155 tag = key;
2156 }
2157
2158 /* decode universal name */
2159#if defined(Py_USING_UNICODE)
2160 /* inline makestring, to avoid duplicating the source string if
Martin Panter6a8163a2016-04-15 02:14:19 +00002161 it's not a utf-8 string */
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002162 p = PyString_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002163 if (checkstring(p, size)) {
2164 value = PyUnicode_DecodeUTF8(p, size, "strict");
2165 Py_DECREF(tag);
2166 if (!value) {
2167 Py_DECREF(key);
2168 return NULL;
2169 }
2170 } else
2171#endif
2172 value = tag; /* use tag as is */
2173
2174 /* add to names dictionary */
2175 if (PyDict_SetItem(self->names, key, value) < 0) {
2176 Py_DECREF(key);
2177 Py_DECREF(value);
2178 return NULL;
2179 }
2180 }
2181
2182 Py_DECREF(key);
2183 return value;
2184}
2185
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002186static void
2187expat_set_error(const char* message, int line, int column)
2188{
2189 PyObject *error;
2190 PyObject *position;
2191 char buffer[256];
2192
2193 sprintf(buffer, "%s: line %d, column %d", message, line, column);
2194
2195 error = PyObject_CallFunction(elementtree_parseerror_obj, "s", buffer);
2196 if (!error)
2197 return;
2198
2199 /* add position attribute */
2200 position = Py_BuildValue("(ii)", line, column);
2201 if (!position) {
2202 Py_DECREF(error);
2203 return;
2204 }
2205 if (PyObject_SetAttrString(error, "position", position) == -1) {
2206 Py_DECREF(error);
2207 Py_DECREF(position);
2208 return;
2209 }
2210 Py_DECREF(position);
2211
2212 PyErr_SetObject(elementtree_parseerror_obj, error);
2213 Py_DECREF(error);
2214}
2215
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002216/* -------------------------------------------------------------------- */
2217/* handlers */
2218
2219static void
2220expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2221 int data_len)
2222{
2223 PyObject* key;
2224 PyObject* value;
2225 PyObject* res;
2226
2227 if (data_len < 2 || data_in[0] != '&')
2228 return;
2229
2230 key = makestring(data_in + 1, data_len - 2);
2231 if (!key)
2232 return;
2233
2234 value = PyDict_GetItem(self->entity, key);
2235
2236 if (value) {
2237 if (TreeBuilder_CheckExact(self->target))
2238 res = treebuilder_handle_data(
2239 (TreeBuilderObject*) self->target, value
2240 );
2241 else if (self->handle_data)
2242 res = PyObject_CallFunction(self->handle_data, "O", value);
2243 else
2244 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002245 Py_XDECREF(res);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002246 } else if (!PyErr_Occurred()) {
2247 /* Report the first error, not the last */
2248 char message[128];
2249 sprintf(message, "undefined entity &%.100s;", PyString_AS_STRING(key));
2250 expat_set_error(
2251 message,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002252 EXPAT(GetErrorLineNumber)(self->parser),
2253 EXPAT(GetErrorColumnNumber)(self->parser)
2254 );
2255 }
2256
2257 Py_DECREF(key);
2258}
2259
2260static void
2261expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2262 const XML_Char **attrib_in)
2263{
2264 PyObject* res;
2265 PyObject* tag;
2266 PyObject* attrib;
2267 int ok;
2268
2269 /* tag name */
2270 tag = makeuniversal(self, tag_in);
2271 if (!tag)
2272 return; /* parser will look for errors */
2273
2274 /* attributes */
2275 if (attrib_in[0]) {
2276 attrib = PyDict_New();
Serhiy Storchaka33ea2972015-12-09 19:44:30 +02002277 if (!attrib) {
2278 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002279 return;
Serhiy Storchaka33ea2972015-12-09 19:44:30 +02002280 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002281 while (attrib_in[0] && attrib_in[1]) {
2282 PyObject* key = makeuniversal(self, attrib_in[0]);
2283 PyObject* value = makestring(attrib_in[1], strlen(attrib_in[1]));
2284 if (!key || !value) {
2285 Py_XDECREF(value);
2286 Py_XDECREF(key);
2287 Py_DECREF(attrib);
Serhiy Storchaka33ea2972015-12-09 19:44:30 +02002288 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002289 return;
2290 }
2291 ok = PyDict_SetItem(attrib, key, value);
2292 Py_DECREF(value);
2293 Py_DECREF(key);
2294 if (ok < 0) {
2295 Py_DECREF(attrib);
Serhiy Storchaka33ea2972015-12-09 19:44:30 +02002296 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002297 return;
2298 }
2299 attrib_in += 2;
2300 }
2301 } else {
2302 Py_INCREF(Py_None);
2303 attrib = Py_None;
2304 }
2305
2306 if (TreeBuilder_CheckExact(self->target))
2307 /* shortcut */
2308 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2309 tag, attrib);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002310 else if (self->handle_start) {
2311 if (attrib == Py_None) {
2312 Py_DECREF(attrib);
2313 attrib = PyDict_New();
Serhiy Storchaka33ea2972015-12-09 19:44:30 +02002314 if (!attrib) {
2315 Py_DECREF(tag);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002316 return;
Serhiy Storchaka33ea2972015-12-09 19:44:30 +02002317 }
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002318 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002319 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002320 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002321 res = NULL;
2322
2323 Py_DECREF(tag);
2324 Py_DECREF(attrib);
2325
2326 Py_XDECREF(res);
2327}
2328
2329static void
2330expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2331 int data_len)
2332{
2333 PyObject* data;
2334 PyObject* res;
2335
2336 data = makestring(data_in, data_len);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002337 if (!data)
2338 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002339
2340 if (TreeBuilder_CheckExact(self->target))
2341 /* shortcut */
2342 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
2343 else if (self->handle_data)
2344 res = PyObject_CallFunction(self->handle_data, "O", data);
2345 else
2346 res = NULL;
2347
2348 Py_DECREF(data);
2349
2350 Py_XDECREF(res);
2351}
2352
2353static void
2354expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
2355{
2356 PyObject* tag;
2357 PyObject* res = NULL;
2358
2359 if (TreeBuilder_CheckExact(self->target))
2360 /* shortcut */
2361 /* the standard tree builder doesn't look at the end tag */
2362 res = treebuilder_handle_end(
2363 (TreeBuilderObject*) self->target, Py_None
2364 );
2365 else if (self->handle_end) {
2366 tag = makeuniversal(self, tag_in);
2367 if (tag) {
2368 res = PyObject_CallFunction(self->handle_end, "O", tag);
2369 Py_DECREF(tag);
2370 }
2371 }
2372
2373 Py_XDECREF(res);
2374}
2375
2376static void
2377expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
2378 const XML_Char *uri)
2379{
Serhiy Storchaka45cf0b72015-12-06 23:51:53 +02002380 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
2381 PyObject *parcel;
2382 PyObject *sprefix = NULL;
2383 PyObject *suri = NULL;
2384
2385 if (PyErr_Occurred())
2386 return;
2387
2388 if (!target->events || !target->start_ns_event_obj)
2389 return;
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002390
Eli Benderskyf933e082013-11-28 06:25:45 -08002391 if (uri)
Eli Bendersky71142c42013-11-28 06:37:25 -08002392 suri = makestring(uri, strlen(uri));
Eli Benderskyf933e082013-11-28 06:25:45 -08002393 else
Eli Bendersky71142c42013-11-28 06:37:25 -08002394 suri = PyString_FromStringAndSize("", 0);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002395 if (!suri)
2396 return;
2397
2398 if (prefix)
2399 sprefix = makestring(prefix, strlen(prefix));
2400 else
2401 sprefix = PyString_FromStringAndSize("", 0);
2402 if (!sprefix) {
2403 Py_DECREF(suri);
2404 return;
2405 }
2406
Serhiy Storchaka45cf0b72015-12-06 23:51:53 +02002407 parcel = PyTuple_Pack(2, sprefix, suri);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002408 Py_DECREF(sprefix);
2409 Py_DECREF(suri);
Serhiy Storchaka45cf0b72015-12-06 23:51:53 +02002410 if (!parcel)
2411 return;
2412 treebuilder_append_event(target, target->start_ns_event_obj, parcel);
2413 Py_DECREF(parcel);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002414}
2415
2416static void
2417expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
2418{
Serhiy Storchaka45cf0b72015-12-06 23:51:53 +02002419 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
2420
2421 if (PyErr_Occurred())
2422 return;
2423
2424 if (!target->events)
2425 return;
2426
2427 treebuilder_append_event(target, target->end_ns_event_obj, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002428}
2429
2430static void
2431expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
2432{
2433 PyObject* comment;
2434 PyObject* res;
2435
2436 if (self->handle_comment) {
2437 comment = makestring(comment_in, strlen(comment_in));
2438 if (comment) {
2439 res = PyObject_CallFunction(self->handle_comment, "O", comment);
2440 Py_XDECREF(res);
2441 Py_DECREF(comment);
2442 }
2443 }
2444}
2445
2446static void
2447expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
2448 const XML_Char* data_in)
2449{
2450 PyObject* target;
2451 PyObject* data;
2452 PyObject* res;
2453
2454 if (self->handle_pi) {
2455 target = makestring(target_in, strlen(target_in));
2456 data = makestring(data_in, strlen(data_in));
2457 if (target && data) {
2458 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
2459 Py_XDECREF(res);
2460 Py_DECREF(data);
2461 Py_DECREF(target);
2462 } else {
2463 Py_XDECREF(data);
2464 Py_XDECREF(target);
2465 }
2466 }
2467}
2468
2469#if defined(Py_USING_UNICODE)
2470static int
2471expat_unknown_encoding_handler(XMLParserObject *self, const XML_Char *name,
2472 XML_Encoding *info)
2473{
2474 PyObject* u;
2475 Py_UNICODE* p;
2476 unsigned char s[256];
2477 int i;
2478
2479 memset(info, 0, sizeof(XML_Encoding));
2480
2481 for (i = 0; i < 256; i++)
2482 s[i] = i;
2483
Fredrik Lundhc3389992005-12-25 11:40:19 +00002484 u = PyUnicode_Decode((char*) s, 256, name, "replace");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002485 if (!u)
2486 return XML_STATUS_ERROR;
2487
2488 if (PyUnicode_GET_SIZE(u) != 256) {
2489 Py_DECREF(u);
Eli Benderskyb6717012013-08-04 06:09:49 -07002490 PyErr_SetString(PyExc_ValueError,
2491 "multi-byte encodings are not supported");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002492 return XML_STATUS_ERROR;
2493 }
2494
2495 p = PyUnicode_AS_UNICODE(u);
2496
2497 for (i = 0; i < 256; i++) {
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002498 if (p[i] != Py_UNICODE_REPLACEMENT_CHARACTER)
2499 info->map[i] = p[i];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002500 else
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002501 info->map[i] = -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002502 }
2503
2504 Py_DECREF(u);
2505
2506 return XML_STATUS_OK;
2507}
2508#endif
2509
2510/* -------------------------------------------------------------------- */
2511/* constructor and destructor */
2512
scoder0694b6a2018-03-24 06:56:41 +01002513static int
2514ignore_attribute_error(PyObject *value)
2515{
2516 if (value == NULL) {
2517 if (!PyErr_ExceptionMatches(PyExc_AttributeError)) {
2518 return -1;
2519 }
2520 PyErr_Clear();
2521 }
2522 return 0;
2523}
2524
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002525static PyObject*
Fredrik Lundh81707f12006-06-03 21:56:05 +00002526xmlparser(PyObject* self_, PyObject* args, PyObject* kw)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002527{
2528 XMLParserObject* self;
2529 /* FIXME: does this need to be static? */
2530 static XML_Memory_Handling_Suite memory_handler;
2531
2532 PyObject* target = NULL;
2533 char* encoding = NULL;
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +00002534 static char* kwlist[] = { "target", "encoding", NULL };
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002535 if (!PyArg_ParseTupleAndKeywords(args, kw, "|Oz:XMLParser", kwlist,
2536 &target, &encoding))
2537 return NULL;
2538
2539#if defined(USE_PYEXPAT_CAPI)
2540 if (!expat_capi) {
2541 PyErr_SetString(
2542 PyExc_RuntimeError, "cannot load dispatch table from pyexpat"
2543 );
2544 return NULL;
2545 }
2546#endif
2547
2548 self = PyObject_New(XMLParserObject, &XMLParser_Type);
2549 if (self == NULL)
2550 return NULL;
2551
2552 self->entity = PyDict_New();
2553 if (!self->entity) {
2554 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002555 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002556 }
2557
2558 self->names = PyDict_New();
2559 if (!self->names) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002560 PyObject_Del(self->entity);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002561 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002562 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002563 }
2564
2565 memory_handler.malloc_fcn = PyObject_Malloc;
2566 memory_handler.realloc_fcn = PyObject_Realloc;
2567 memory_handler.free_fcn = PyObject_Free;
2568
2569 self->parser = EXPAT(ParserCreate_MM)(encoding, &memory_handler, "}");
2570 if (!self->parser) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002571 PyObject_Del(self->names);
2572 PyObject_Del(self->entity);
2573 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002574 PyErr_NoMemory();
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002575 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002576 }
2577
scoderc498cd82018-03-31 14:23:30 +02002578 ALLOC(sizeof(XMLParserObject), "create expatparser");
2579
2580 /* Init to NULL to keep the error handling below manageable. */
2581 self->target =
2582 self->handle_xml =
2583 self->handle_start =
2584 self->handle_data =
2585 self->handle_end =
2586 self->handle_comment =
2587 self->handle_pi =
2588 self->handle_close =
2589 NULL;
2590
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002591 /* setup target handlers */
2592 if (!target) {
2593 target = treebuilder_new();
2594 if (!target) {
scoderc498cd82018-03-31 14:23:30 +02002595 Py_DECREF(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002596 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002597 }
2598 } else
2599 Py_INCREF(target);
2600 self->target = target;
2601
2602 self->handle_xml = PyObject_GetAttrString(target, "xml");
scoder0694b6a2018-03-24 06:56:41 +01002603 if (ignore_attribute_error(self->handle_xml)) {
scoderc498cd82018-03-31 14:23:30 +02002604 Py_DECREF(self);
scoder0694b6a2018-03-24 06:56:41 +01002605 return NULL;
2606 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002607 self->handle_start = PyObject_GetAttrString(target, "start");
scoder0694b6a2018-03-24 06:56:41 +01002608 if (ignore_attribute_error(self->handle_start)) {
scoderc498cd82018-03-31 14:23:30 +02002609 Py_DECREF(self);
scoder0694b6a2018-03-24 06:56:41 +01002610 return NULL;
2611 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002612 self->handle_data = PyObject_GetAttrString(target, "data");
scoder0694b6a2018-03-24 06:56:41 +01002613 if (ignore_attribute_error(self->handle_data)) {
scoderc498cd82018-03-31 14:23:30 +02002614 Py_DECREF(self);
scoder0694b6a2018-03-24 06:56:41 +01002615 return NULL;
2616 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002617 self->handle_end = PyObject_GetAttrString(target, "end");
scoder0694b6a2018-03-24 06:56:41 +01002618 if (ignore_attribute_error(self->handle_end)) {
scoderc498cd82018-03-31 14:23:30 +02002619 Py_DECREF(self);
scoder0694b6a2018-03-24 06:56:41 +01002620 return NULL;
2621 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002622 self->handle_comment = PyObject_GetAttrString(target, "comment");
scoder0694b6a2018-03-24 06:56:41 +01002623 if (ignore_attribute_error(self->handle_comment)) {
scoderc498cd82018-03-31 14:23:30 +02002624 Py_DECREF(self);
scoder0694b6a2018-03-24 06:56:41 +01002625 return NULL;
2626 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002627 self->handle_pi = PyObject_GetAttrString(target, "pi");
scoder0694b6a2018-03-24 06:56:41 +01002628 if (ignore_attribute_error(self->handle_pi)) {
scoderc498cd82018-03-31 14:23:30 +02002629 Py_DECREF(self);
scoder0694b6a2018-03-24 06:56:41 +01002630 return NULL;
2631 }
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002632 self->handle_close = PyObject_GetAttrString(target, "close");
scoder0694b6a2018-03-24 06:56:41 +01002633 if (ignore_attribute_error(self->handle_close)) {
scoderc498cd82018-03-31 14:23:30 +02002634 Py_DECREF(self);
scoder0694b6a2018-03-24 06:56:41 +01002635 return NULL;
2636 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002637
2638 /* configure parser */
2639 EXPAT(SetUserData)(self->parser, self);
2640 EXPAT(SetElementHandler)(
2641 self->parser,
2642 (XML_StartElementHandler) expat_start_handler,
2643 (XML_EndElementHandler) expat_end_handler
2644 );
2645 EXPAT(SetDefaultHandlerExpand)(
2646 self->parser,
2647 (XML_DefaultHandler) expat_default_handler
2648 );
2649 EXPAT(SetCharacterDataHandler)(
2650 self->parser,
2651 (XML_CharacterDataHandler) expat_data_handler
2652 );
2653 if (self->handle_comment)
2654 EXPAT(SetCommentHandler)(
2655 self->parser,
2656 (XML_CommentHandler) expat_comment_handler
2657 );
2658 if (self->handle_pi)
2659 EXPAT(SetProcessingInstructionHandler)(
2660 self->parser,
2661 (XML_ProcessingInstructionHandler) expat_pi_handler
2662 );
2663#if defined(Py_USING_UNICODE)
2664 EXPAT(SetUnknownEncodingHandler)(
2665 self->parser,
2666 (XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL
2667 );
2668#endif
2669
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002670 return (PyObject*) self;
2671}
2672
2673static void
2674xmlparser_dealloc(XMLParserObject* self)
2675{
2676 EXPAT(ParserFree)(self->parser);
2677
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002678 Py_XDECREF(self->handle_close);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002679 Py_XDECREF(self->handle_pi);
2680 Py_XDECREF(self->handle_comment);
2681 Py_XDECREF(self->handle_end);
2682 Py_XDECREF(self->handle_data);
2683 Py_XDECREF(self->handle_start);
2684 Py_XDECREF(self->handle_xml);
2685
2686 Py_DECREF(self->target);
2687 Py_DECREF(self->entity);
2688 Py_DECREF(self->names);
2689
2690 RELEASE(sizeof(XMLParserObject), "destroy expatparser");
2691
2692 PyObject_Del(self);
2693}
2694
2695/* -------------------------------------------------------------------- */
2696/* methods (in alphabetical order) */
2697
2698LOCAL(PyObject*)
2699expat_parse(XMLParserObject* self, char* data, int data_len, int final)
2700{
2701 int ok;
2702
2703 ok = EXPAT(Parse)(self->parser, data, data_len, final);
2704
2705 if (PyErr_Occurred())
2706 return NULL;
2707
2708 if (!ok) {
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002709 expat_set_error(
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002710 EXPAT(ErrorString)(EXPAT(GetErrorCode)(self->parser)),
2711 EXPAT(GetErrorLineNumber)(self->parser),
2712 EXPAT(GetErrorColumnNumber)(self->parser)
2713 );
2714 return NULL;
2715 }
2716
2717 Py_RETURN_NONE;
2718}
2719
2720static PyObject*
2721xmlparser_close(XMLParserObject* self, PyObject* args)
2722{
2723 /* end feeding data to parser */
2724
2725 PyObject* res;
2726 if (!PyArg_ParseTuple(args, ":close"))
2727 return NULL;
2728
2729 res = expat_parse(self, "", 0, 1);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002730 if (!res)
2731 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002732
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002733 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002734 Py_DECREF(res);
2735 return treebuilder_done((TreeBuilderObject*) self->target);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002736 } if (self->handle_close) {
2737 Py_DECREF(res);
2738 return PyObject_CallFunction(self->handle_close, "");
2739 } else
2740 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002741}
2742
2743static PyObject*
2744xmlparser_feed(XMLParserObject* self, PyObject* args)
2745{
2746 /* feed data to parser */
2747
2748 char* data;
2749 int data_len;
2750 if (!PyArg_ParseTuple(args, "s#:feed", &data, &data_len))
2751 return NULL;
2752
2753 return expat_parse(self, data, data_len, 0);
2754}
2755
2756static PyObject*
2757xmlparser_parse(XMLParserObject* self, PyObject* args)
2758{
2759 /* (internal) parse until end of input stream */
2760
2761 PyObject* reader;
2762 PyObject* buffer;
2763 PyObject* res;
2764
2765 PyObject* fileobj;
2766 if (!PyArg_ParseTuple(args, "O:_parse", &fileobj))
2767 return NULL;
2768
2769 reader = PyObject_GetAttrString(fileobj, "read");
2770 if (!reader)
2771 return NULL;
2772
2773 /* read from open file object */
2774 for (;;) {
2775
2776 buffer = PyObject_CallFunction(reader, "i", 64*1024);
2777
2778 if (!buffer) {
2779 /* read failed (e.g. due to KeyboardInterrupt) */
2780 Py_DECREF(reader);
2781 return NULL;
2782 }
2783
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002784 if (!PyString_CheckExact(buffer) || PyString_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002785 Py_DECREF(buffer);
2786 break;
2787 }
2788
Serhiy Storchakac4c64be2015-11-25 20:12:58 +02002789 if (PyString_GET_SIZE(buffer) > INT_MAX) {
2790 Py_DECREF(buffer);
2791 Py_DECREF(reader);
2792 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
2793 return NULL;
2794 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002795 res = expat_parse(
Serhiy Storchakac4c64be2015-11-25 20:12:58 +02002796 self, PyString_AS_STRING(buffer), (int)PyString_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002797 );
2798
2799 Py_DECREF(buffer);
2800
2801 if (!res) {
2802 Py_DECREF(reader);
2803 return NULL;
2804 }
2805 Py_DECREF(res);
2806
2807 }
2808
2809 Py_DECREF(reader);
2810
2811 res = expat_parse(self, "", 0, 1);
2812
2813 if (res && TreeBuilder_CheckExact(self->target)) {
2814 Py_DECREF(res);
2815 return treebuilder_done((TreeBuilderObject*) self->target);
2816 }
2817
2818 return res;
2819}
2820
2821static PyObject*
2822xmlparser_setevents(XMLParserObject* self, PyObject* args)
2823{
2824 /* activate element event reporting */
2825
Neal Norwitzc7074382006-06-12 02:06:17 +00002826 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002827 TreeBuilderObject* target;
2828
2829 PyObject* events; /* event collector */
2830 PyObject* event_set = Py_None;
2831 if (!PyArg_ParseTuple(args, "O!|O:_setevents", &PyList_Type, &events,
2832 &event_set))
2833 return NULL;
2834
2835 if (!TreeBuilder_CheckExact(self->target)) {
2836 PyErr_SetString(
2837 PyExc_TypeError,
2838 "event handling only supported for cElementTree.Treebuilder "
2839 "targets"
2840 );
2841 return NULL;
2842 }
2843
2844 target = (TreeBuilderObject*) self->target;
2845
2846 Py_INCREF(events);
Serhiy Storchakabc62af12016-04-06 09:51:18 +03002847 Py_XSETREF(target->events, events);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002848
2849 /* clear out existing events */
Serhiy Storchaka98a97222014-02-09 13:14:04 +02002850 Py_CLEAR(target->start_event_obj);
2851 Py_CLEAR(target->end_event_obj);
2852 Py_CLEAR(target->start_ns_event_obj);
2853 Py_CLEAR(target->end_ns_event_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002854
2855 if (event_set == Py_None) {
2856 /* default is "end" only */
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002857 target->end_event_obj = PyString_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002858 Py_RETURN_NONE;
2859 }
2860
2861 if (!PyTuple_Check(event_set)) /* FIXME: handle arbitrary sequences */
2862 goto error;
2863
2864 for (i = 0; i < PyTuple_GET_SIZE(event_set); i++) {
2865 PyObject* item = PyTuple_GET_ITEM(event_set, i);
2866 char* event;
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002867 if (!PyString_Check(item))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002868 goto error;
Serhiy Storchaka20a003b2015-12-24 11:51:24 +02002869 Py_INCREF(item);
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002870 event = PyString_AS_STRING(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002871 if (strcmp(event, "start") == 0) {
Serhiy Storchakabc62af12016-04-06 09:51:18 +03002872 Py_XSETREF(target->start_event_obj, item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002873 } else if (strcmp(event, "end") == 0) {
Serhiy Storchakabc62af12016-04-06 09:51:18 +03002874 Py_XSETREF(target->end_event_obj, item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002875 } else if (strcmp(event, "start-ns") == 0) {
Serhiy Storchakabc62af12016-04-06 09:51:18 +03002876 Py_XSETREF(target->start_ns_event_obj, item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002877 EXPAT(SetNamespaceDeclHandler)(
2878 self->parser,
2879 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2880 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2881 );
2882 } else if (strcmp(event, "end-ns") == 0) {
Serhiy Storchakabc62af12016-04-06 09:51:18 +03002883 Py_XSETREF(target->end_ns_event_obj, item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002884 EXPAT(SetNamespaceDeclHandler)(
2885 self->parser,
2886 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2887 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2888 );
2889 } else {
Serhiy Storchaka20a003b2015-12-24 11:51:24 +02002890 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002891 PyErr_Format(
2892 PyExc_ValueError,
2893 "unknown event '%s'", event
2894 );
2895 return NULL;
2896 }
2897 }
2898
2899 Py_RETURN_NONE;
2900
2901 error:
2902 PyErr_SetString(
2903 PyExc_TypeError,
2904 "invalid event tuple"
2905 );
2906 return NULL;
2907}
2908
2909static PyMethodDef xmlparser_methods[] = {
2910 {"feed", (PyCFunction) xmlparser_feed, METH_VARARGS},
2911 {"close", (PyCFunction) xmlparser_close, METH_VARARGS},
2912 {"_parse", (PyCFunction) xmlparser_parse, METH_VARARGS},
2913 {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
2914 {NULL, NULL}
2915};
2916
2917static PyObject*
2918xmlparser_getattr(XMLParserObject* self, char* name)
2919{
2920 PyObject* res;
2921
2922 res = Py_FindMethod(xmlparser_methods, (PyObject*) self, name);
2923 if (res)
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002924 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002925
2926 PyErr_Clear();
2927
2928 if (strcmp(name, "entity") == 0)
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002929 res = self->entity;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002930 else if (strcmp(name, "target") == 0)
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002931 res = self->target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002932 else if (strcmp(name, "version") == 0) {
2933 char buffer[100];
2934 sprintf(buffer, "Expat %d.%d.%d", XML_MAJOR_VERSION,
2935 XML_MINOR_VERSION, XML_MICRO_VERSION);
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002936 return PyString_FromString(buffer);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002937 } else {
2938 PyErr_SetString(PyExc_AttributeError, name);
2939 return NULL;
2940 }
2941
2942 Py_INCREF(res);
2943 return res;
2944}
2945
2946statichere PyTypeObject XMLParser_Type = {
Benjamin Petersona72d15c2017-09-13 21:20:29 -07002947 PyVarObject_HEAD_INIT(NULL, 0)
2948 "XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002949 /* methods */
2950 (destructor)xmlparser_dealloc, /* tp_dealloc */
2951 0, /* tp_print */
2952 (getattrfunc)xmlparser_getattr, /* tp_getattr */
2953};
2954
2955#endif
2956
2957/* ==================================================================== */
2958/* python module interface */
2959
2960static PyMethodDef _functions[] = {
2961 {"Element", (PyCFunction) element, METH_VARARGS|METH_KEYWORDS},
2962 {"SubElement", (PyCFunction) subelement, METH_VARARGS|METH_KEYWORDS},
2963 {"TreeBuilder", (PyCFunction) treebuilder, METH_VARARGS},
2964#if defined(USE_EXPAT)
2965 {"XMLParser", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
2966 {"XMLTreeBuilder", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
2967#endif
2968 {NULL, NULL}
2969};
2970
2971DL_EXPORT(void)
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002972init_elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002973{
2974 PyObject* m;
2975 PyObject* g;
2976 char* bootstrap;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002977
2978 /* Patch object type */
Christian Heimese93237d2007-12-19 02:37:44 +00002979 Py_TYPE(&Element_Type) = Py_TYPE(&TreeBuilder_Type) = &PyType_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002980#if defined(USE_EXPAT)
Christian Heimese93237d2007-12-19 02:37:44 +00002981 Py_TYPE(&XMLParser_Type) = &PyType_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002982#endif
2983
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002984 m = Py_InitModule("_elementtree", _functions);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002985 if (!m)
2986 return;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002987
2988 /* python glue code */
2989
2990 g = PyDict_New();
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002991 if (!g)
2992 return;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002993
2994 PyDict_SetItemString(g, "__builtins__", PyEval_GetBuiltins());
2995
2996 bootstrap = (
2997
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002998 "from copy import copy, deepcopy\n"
2999
3000 "try:\n"
Fredrik Lundh6d52b552005-12-16 22:06:43 +00003001 " from xml.etree import ElementTree\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003002 "except ImportError:\n"
3003 " import ElementTree\n"
3004 "ET = ElementTree\n"
3005 "del ElementTree\n"
3006
Fredrik Lundh6d52b552005-12-16 22:06:43 +00003007 "import _elementtree as cElementTree\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003008
3009 "try:\n" /* check if copy works as is */
3010 " copy(cElementTree.Element('x'))\n"
3011 "except:\n"
3012 " def copyelement(elem):\n"
3013 " return elem\n"
3014
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003015 "class CommentProxy:\n"
3016 " def __call__(self, text=None):\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003017 " element = cElementTree.Element(ET.Comment)\n"
3018 " element.text = text\n"
3019 " return element\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003020 " def __cmp__(self, other):\n"
3021 " return cmp(ET.Comment, other)\n"
3022 "cElementTree.Comment = CommentProxy()\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003023
3024 "class ElementTree(ET.ElementTree):\n" /* public */
3025 " def parse(self, source, parser=None):\n"
Florent Xicluna67d5d0e2011-10-29 03:38:56 +02003026 " close_source = False\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003027 " if not hasattr(source, 'read'):\n"
3028 " source = open(source, 'rb')\n"
Florent Xicluna67d5d0e2011-10-29 03:38:56 +02003029 " close_source = False\n"
3030 " try:\n"
3031 " if parser is not None:\n"
3032 " while 1:\n"
3033 " data = source.read(65536)\n"
3034 " if not data:\n"
3035 " break\n"
3036 " parser.feed(data)\n"
3037 " self._root = parser.close()\n"
3038 " else:\n"
3039 " parser = cElementTree.XMLParser()\n"
3040 " self._root = parser._parse(source)\n"
3041 " return self._root\n"
3042 " finally:\n"
3043 " if close_source:\n"
3044 " source.close()\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003045 "cElementTree.ElementTree = ElementTree\n"
3046
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003047 "def iter(node, tag=None):\n" /* helper */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003048 " if tag == '*':\n"
3049 " tag = None\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003050 " if tag is None or node.tag == tag:\n"
3051 " yield node\n"
3052 " for node in node:\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003053 " for node in iter(node, tag):\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003054 " yield node\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003055
3056 "def itertext(node):\n" /* helper */
3057 " if node.text:\n"
3058 " yield node.text\n"
3059 " for e in node:\n"
3060 " for s in e.itertext():\n"
3061 " yield s\n"
3062 " if e.tail:\n"
3063 " yield e.tail\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003064
3065 "def parse(source, parser=None):\n" /* public */
3066 " tree = ElementTree()\n"
3067 " tree.parse(source, parser)\n"
3068 " return tree\n"
3069 "cElementTree.parse = parse\n"
3070
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003071 "class iterparse(object):\n"
3072 " root = None\n"
3073 " def __init__(self, file, events=None):\n"
Florent Xicluna67d5d0e2011-10-29 03:38:56 +02003074 " self._close_file = False\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003075 " if not hasattr(file, 'read'):\n"
3076 " file = open(file, 'rb')\n"
Florent Xicluna67d5d0e2011-10-29 03:38:56 +02003077 " self._close_file = True\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003078 " self._file = file\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003079 " self._events = []\n"
3080 " self._index = 0\n"
Florent Xicluna0965ee22011-11-01 23:34:41 +01003081 " self._error = None\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003082 " self.root = self._root = None\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003083 " b = cElementTree.TreeBuilder()\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003084 " self._parser = cElementTree.XMLParser(b)\n"
3085 " self._parser._setevents(self._events, events)\n"
3086 " def next(self):\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003087 " while 1:\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003088 " try:\n"
3089 " item = self._events[self._index]\n"
Florent Xicluna0965ee22011-11-01 23:34:41 +01003090 " self._index += 1\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003091 " return item\n"
Florent Xicluna0965ee22011-11-01 23:34:41 +01003092 " except IndexError:\n"
3093 " pass\n"
3094 " if self._error:\n"
3095 " e = self._error\n"
3096 " self._error = None\n"
3097 " raise e\n"
3098 " if self._parser is None:\n"
3099 " self.root = self._root\n"
3100 " if self._close_file:\n"
3101 " self._file.close()\n"
3102 " raise StopIteration\n"
3103 " # load event buffer\n"
3104 " del self._events[:]\n"
3105 " self._index = 0\n"
3106 " data = self._file.read(16384)\n"
3107 " if data:\n"
3108 " try:\n"
3109 " self._parser.feed(data)\n"
3110 " except SyntaxError as exc:\n"
3111 " self._error = exc\n"
3112 " else:\n"
3113 " self._root = self._parser.close()\n"
3114 " self._parser = None\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003115 " def __iter__(self):\n"
3116 " return self\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003117 "cElementTree.iterparse = iterparse\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003118
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003119 "class PIProxy:\n"
3120 " def __call__(self, target, text=None):\n"
3121 " element = cElementTree.Element(ET.PI)\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003122 " element.text = target\n"
3123 " if text:\n"
3124 " element.text = element.text + ' ' + text\n"
3125 " return element\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003126 " def __cmp__(self, other):\n"
3127 " return cmp(ET.PI, other)\n"
3128 "cElementTree.PI = cElementTree.ProcessingInstruction = PIProxy()\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003129
3130 "def XML(text):\n" /* public */
3131 " parser = cElementTree.XMLParser()\n"
3132 " parser.feed(text)\n"
3133 " return parser.close()\n"
3134 "cElementTree.XML = cElementTree.fromstring = XML\n"
3135
3136 "def XMLID(text):\n" /* public */
3137 " tree = XML(text)\n"
3138 " ids = {}\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003139 " for elem in tree.iter():\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003140 " id = elem.get('id')\n"
3141 " if id:\n"
3142 " ids[id] = elem\n"
3143 " return tree, ids\n"
3144 "cElementTree.XMLID = XMLID\n"
3145
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003146 "try:\n"
3147 " register_namespace = ET.register_namespace\n"
3148 "except AttributeError:\n"
3149 " def register_namespace(prefix, uri):\n"
3150 " ET._namespace_map[uri] = prefix\n"
3151 "cElementTree.register_namespace = register_namespace\n"
3152
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003153 "cElementTree.dump = ET.dump\n"
3154 "cElementTree.ElementPath = ElementPath = ET.ElementPath\n"
3155 "cElementTree.iselement = ET.iselement\n"
3156 "cElementTree.QName = ET.QName\n"
3157 "cElementTree.tostring = ET.tostring\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003158 "cElementTree.fromstringlist = ET.fromstringlist\n"
3159 "cElementTree.tostringlist = ET.tostringlist\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003160 "cElementTree.VERSION = '" VERSION "'\n"
3161 "cElementTree.__version__ = '" VERSION "'\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003162
3163 );
3164
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003165 if (!PyRun_String(bootstrap, Py_file_input, g, NULL))
3166 return;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003167
3168 elementpath_obj = PyDict_GetItemString(g, "ElementPath");
3169
3170 elementtree_copyelement_obj = PyDict_GetItemString(g, "copyelement");
3171 if (elementtree_copyelement_obj) {
3172 /* reduce hack needed; enable reduce method */
3173 PyMethodDef* mp;
3174 for (mp = element_methods; mp->ml_name; mp++)
3175 if (mp->ml_meth == (PyCFunction) element_reduce) {
3176 mp->ml_name = "__reduce__";
3177 break;
3178 }
3179 } else
3180 PyErr_Clear();
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003181
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003182 elementtree_deepcopy_obj = PyDict_GetItemString(g, "deepcopy");
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003183 elementtree_iter_obj = PyDict_GetItemString(g, "iter");
3184 elementtree_itertext_obj = PyDict_GetItemString(g, "itertext");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003185
3186#if defined(USE_PYEXPAT_CAPI)
3187 /* link against pyexpat, if possible */
Larry Hastings402b73f2010-03-25 00:54:54 +00003188 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003189 if (expat_capi) {
3190 /* check that it's usable */
3191 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
3192 expat_capi->size < sizeof(struct PyExpat_CAPI) ||
3193 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
3194 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
3195 expat_capi->MICRO_VERSION != XML_MICRO_VERSION)
3196 expat_capi = NULL;
3197 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003198#endif
3199
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003200 elementtree_parseerror_obj = PyErr_NewException(
3201 "cElementTree.ParseError", PyExc_SyntaxError, NULL
3202 );
3203 Py_INCREF(elementtree_parseerror_obj);
3204 PyModule_AddObject(m, "ParseError", elementtree_parseerror_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003205}