blob: 94dc5b7ca35e334a09625974d332a6f409a6e074 [file] [log] [blame]
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001/*
2 * ElementTree
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003 * $Id: _elementtree.c 3473 2009-01-11 22:53:55Z fredrik $
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
5 * elementtree accelerator
6 *
7 * History:
8 * 1999-06-20 fl created (as part of sgmlop)
9 * 2001-05-29 fl effdom edition
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000010 * 2003-02-27 fl elementtree edition (alpha)
11 * 2004-06-03 fl updates for elementtree 1.2
Florent Xicluna3e8c1892010-03-11 14:36:19 +000012 * 2005-01-05 fl major optimization effort
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000013 * 2005-01-11 fl first public release (cElementTree 0.8)
14 * 2005-01-12 fl split element object into base and extras
15 * 2005-01-13 fl use tagged pointers for tail/text (cElementTree 0.9)
16 * 2005-01-17 fl added treebuilder close method
17 * 2005-01-17 fl fixed crash in getchildren
18 * 2005-01-18 fl removed observer api, added iterparse (cElementTree 0.9.3)
19 * 2005-01-23 fl revised iterparse api; added namespace event support (0.9.8)
20 * 2005-01-26 fl added VERSION module property (cElementTree 1.0)
21 * 2005-01-28 fl added remove method (1.0.1)
22 * 2005-03-01 fl added iselement function; fixed makeelement aliasing (1.0.2)
23 * 2005-03-13 fl export Comment and ProcessingInstruction/PI helpers
24 * 2005-03-26 fl added Comment and PI support to XMLParser
25 * 2005-03-27 fl event optimizations; complain about bogus events
26 * 2005-08-08 fl fixed read error handling in parse
27 * 2005-08-11 fl added runtime test for copy workaround (1.0.3)
28 * 2005-12-13 fl added expat_capi support (for xml.etree) (1.0.4)
29 * 2005-12-16 fl added support for non-standard encodings
Fredrik Lundh44ed4db2006-03-12 21:06:35 +000030 * 2006-03-08 fl fixed a couple of potential null-refs and leaks
31 * 2006-03-12 fl merge in 2.5 ssize_t changes
Florent Xicluna3e8c1892010-03-11 14:36:19 +000032 * 2007-08-25 fl call custom builder's close method from XMLParser
33 * 2007-08-31 fl added iter, extend from ET 1.3
34 * 2007-09-01 fl fixed ParseError exception, setslice source type, etc
35 * 2007-09-03 fl fixed handling of negative insert indexes
36 * 2007-09-04 fl added itertext from ET 1.3
37 * 2007-09-06 fl added position attribute to ParseError exception
38 * 2008-06-06 fl delay error reporting in iterparse (from Hrvoje Niksic)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000039 *
Florent Xicluna3e8c1892010-03-11 14:36:19 +000040 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
41 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000042 *
43 * info@pythonware.com
44 * http://www.pythonware.com
45 */
46
Fredrik Lundh6d52b552005-12-16 22:06:43 +000047/* Licensed to PSF under a Contributor Agreement. */
Florent Xicluna3e8c1892010-03-11 14:36:19 +000048/* See http://www.python.org/psf/license for licensing details. */
Fredrik Lundh6d52b552005-12-16 22:06:43 +000049
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000050#include "Python.h"
51
Fredrik Lundhdc075b92006-08-16 16:47:07 +000052#define VERSION "1.0.6"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000053
54/* -------------------------------------------------------------------- */
55/* configuration */
56
57/* Leave defined to include the expat-based XMLParser type */
58#define USE_EXPAT
59
Florent Xicluna3e8c1892010-03-11 14:36:19 +000060/* Define to do all expat calls via pyexpat's embedded expat library */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000061/* #define USE_PYEXPAT_CAPI */
62
63/* An element can hold this many children without extra memory
64 allocations. */
65#define STATIC_CHILDREN 4
66
67/* For best performance, chose a value so that 80-90% of all nodes
68 have no more than the given number of children. Set this to zero
69 to minimize the size of the element structure itself (this only
70 helps if you have lots of leaf nodes with attributes). */
71
72/* Also note that pymalloc always allocates blocks in multiples of
73 eight bytes. For the current version of cElementTree, this means
74 that the number of children should be an even number, at least on
75 32-bit platforms. */
76
77/* -------------------------------------------------------------------- */
78
79#if 0
80static int memory = 0;
81#define ALLOC(size, comment)\
82do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
83#define RELEASE(size, comment)\
84do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
85#else
86#define ALLOC(size, comment)
87#define RELEASE(size, comment)
88#endif
89
90/* compiler tweaks */
91#if defined(_MSC_VER)
92#define LOCAL(type) static __inline type __fastcall
93#else
94#define LOCAL(type) static type
95#endif
96
97/* compatibility macros */
Florent Xicluna3e8c1892010-03-11 14:36:19 +000098#if (PY_VERSION_HEX < 0x02060000)
99#define Py_REFCNT(ob) (((PyObject*)(ob))->ob_refcnt)
100#define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)
101#endif
102
Martin v. Löwis18e16552006-02-15 17:27:45 +0000103#if (PY_VERSION_HEX < 0x02050000)
104typedef int Py_ssize_t;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000105#define lenfunc inquiry
Martin v. Löwis18e16552006-02-15 17:27:45 +0000106#endif
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000107
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000108#if (PY_VERSION_HEX < 0x02040000)
109#define PyDict_CheckExact PyDict_Check
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000110
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000111#if !defined(Py_RETURN_NONE)
112#define Py_RETURN_NONE return Py_INCREF(Py_None), Py_None
113#endif
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000114#endif
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000115
116/* macros used to store 'join' flags in string object pointers. note
117 that all use of text and tail as object pointers must be wrapped in
118 JOIN_OBJ. see comments in the ElementObject definition for more
119 info. */
120#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
121#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
122#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~1))
123
124/* glue functions (see the init function for details) */
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000125static PyObject* elementtree_parseerror_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000126static PyObject* elementtree_copyelement_obj;
127static PyObject* elementtree_deepcopy_obj;
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000128static PyObject* elementtree_iter_obj;
129static PyObject* elementtree_itertext_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000130static PyObject* elementpath_obj;
131
132/* helpers */
133
134LOCAL(PyObject*)
135deepcopy(PyObject* object, PyObject* memo)
136{
137 /* do a deep copy of the given object */
138
139 PyObject* args;
140 PyObject* result;
141
142 if (!elementtree_deepcopy_obj) {
143 PyErr_SetString(
144 PyExc_RuntimeError,
145 "deepcopy helper not found"
146 );
147 return NULL;
148 }
149
150 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000151 if (!args)
152 return NULL;
153
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000154 Py_INCREF(object); PyTuple_SET_ITEM(args, 0, (PyObject*) object);
155 Py_INCREF(memo); PyTuple_SET_ITEM(args, 1, (PyObject*) memo);
156
157 result = PyObject_CallObject(elementtree_deepcopy_obj, args);
158
159 Py_DECREF(args);
160
161 return result;
162}
163
164LOCAL(PyObject*)
165list_join(PyObject* list)
166{
167 /* join list elements (destroying the list in the process) */
168
169 PyObject* joiner;
170 PyObject* function;
171 PyObject* args;
172 PyObject* result;
173
174 switch (PyList_GET_SIZE(list)) {
175 case 0:
176 Py_DECREF(list);
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000177 return PyString_FromString("");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000178 case 1:
179 result = PyList_GET_ITEM(list, 0);
180 Py_INCREF(result);
181 Py_DECREF(list);
182 return result;
183 }
184
185 /* two or more elements: slice out a suitable separator from the
186 first member, and use that to join the entire list */
187
188 joiner = PySequence_GetSlice(PyList_GET_ITEM(list, 0), 0, 0);
189 if (!joiner)
190 return NULL;
191
192 function = PyObject_GetAttrString(joiner, "join");
193 if (!function) {
194 Py_DECREF(joiner);
195 return NULL;
196 }
197
198 args = PyTuple_New(1);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000199 if (!args)
200 return NULL;
201
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000202 PyTuple_SET_ITEM(args, 0, list);
203
204 result = PyObject_CallObject(function, args);
205
206 Py_DECREF(args); /* also removes list */
207 Py_DECREF(function);
208 Py_DECREF(joiner);
209
210 return result;
211}
212
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000213/* -------------------------------------------------------------------- */
214/* the element type */
215
216typedef struct {
217
218 /* attributes (a dictionary object), or None if no attributes */
219 PyObject* attrib;
220
221 /* child elements */
222 int length; /* actual number of items */
223 int allocated; /* allocated items */
224
225 /* this either points to _children or to a malloced buffer */
226 PyObject* *children;
227
228 PyObject* _children[STATIC_CHILDREN];
229
230} ElementObjectExtra;
231
232typedef struct {
233 PyObject_HEAD
234
235 /* element tag (a string). */
236 PyObject* tag;
237
238 /* text before first child. note that this is a tagged pointer;
239 use JOIN_OBJ to get the object pointer. the join flag is used
240 to distinguish lists created by the tree builder from lists
241 assigned to the attribute by application code; the former
242 should be joined before being returned to the user, the latter
243 should be left intact. */
244 PyObject* text;
245
246 /* text after this element, in parent. note that this is a tagged
247 pointer; use JOIN_OBJ to get the object pointer. */
248 PyObject* tail;
249
250 ElementObjectExtra* extra;
251
252} ElementObject;
253
254staticforward PyTypeObject Element_Type;
255
Christian Heimese93237d2007-12-19 02:37:44 +0000256#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000257
258/* -------------------------------------------------------------------- */
259/* element constructor and destructor */
260
261LOCAL(int)
262element_new_extra(ElementObject* self, PyObject* attrib)
263{
264 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
265 if (!self->extra)
266 return -1;
267
268 if (!attrib)
269 attrib = Py_None;
270
271 Py_INCREF(attrib);
272 self->extra->attrib = attrib;
273
274 self->extra->length = 0;
275 self->extra->allocated = STATIC_CHILDREN;
276 self->extra->children = self->extra->_children;
277
278 return 0;
279}
280
281LOCAL(void)
282element_dealloc_extra(ElementObject* self)
283{
284 int i;
285
286 Py_DECREF(self->extra->attrib);
287
288 for (i = 0; i < self->extra->length; i++)
289 Py_DECREF(self->extra->children[i]);
290
291 if (self->extra->children != self->extra->_children)
292 PyObject_Free(self->extra->children);
293
294 PyObject_Free(self->extra);
295}
296
297LOCAL(PyObject*)
298element_new(PyObject* tag, PyObject* attrib)
299{
300 ElementObject* self;
301
302 self = PyObject_New(ElementObject, &Element_Type);
303 if (self == NULL)
304 return NULL;
305
306 /* use None for empty dictionaries */
307 if (PyDict_CheckExact(attrib) && !PyDict_Size(attrib))
308 attrib = Py_None;
309
310 self->extra = NULL;
311
312 if (attrib != Py_None) {
313
Neal Norwitzc6a989a2006-05-10 06:57:58 +0000314 if (element_new_extra(self, attrib) < 0) {
315 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000316 return NULL;
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000317 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000318
319 self->extra->length = 0;
320 self->extra->allocated = STATIC_CHILDREN;
321 self->extra->children = self->extra->_children;
322
323 }
324
325 Py_INCREF(tag);
326 self->tag = tag;
327
328 Py_INCREF(Py_None);
329 self->text = Py_None;
330
331 Py_INCREF(Py_None);
332 self->tail = Py_None;
333
334 ALLOC(sizeof(ElementObject), "create element");
335
336 return (PyObject*) self;
337}
338
339LOCAL(int)
Serhiy Storchakac4c64be2015-11-25 20:12:58 +0200340element_resize(ElementObject* self, Py_ssize_t extra)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000341{
Serhiy Storchakac4c64be2015-11-25 20:12:58 +0200342 Py_ssize_t size;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000343 PyObject* *children;
344
345 /* make sure self->children can hold the given number of extra
346 elements. set an exception and return -1 if allocation failed */
347
348 if (!self->extra)
349 element_new_extra(self, NULL);
350
351 size = self->extra->length + extra;
352
353 if (size > self->extra->allocated) {
354 /* use Python 2.4's list growth strategy */
355 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes87dcf3d2008-01-18 08:04:57 +0000356 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
357 * which needs at least 4 bytes.
358 * Although it's a false alarm always assume at least one child to
359 * be safe.
360 */
361 size = size ? size : 1;
Serhiy Storchakac4c64be2015-11-25 20:12:58 +0200362 if ((size_t)size > PY_SSIZE_T_MAX/sizeof(PyObject*))
363 goto nomemory;
364 if (size > INT_MAX) {
365 PyErr_SetString(PyExc_OverflowError,
366 "too many children");
367 return -1;
368 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000369 if (self->extra->children != self->extra->_children) {
Christian Heimes87dcf3d2008-01-18 08:04:57 +0000370 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
371 * "children", which needs at least 4 bytes. Although it's a
372 * false alarm always assume at least one child to be safe.
373 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000374 children = PyObject_Realloc(self->extra->children,
375 size * sizeof(PyObject*));
376 if (!children)
377 goto nomemory;
378 } else {
379 children = PyObject_Malloc(size * sizeof(PyObject*));
380 if (!children)
381 goto nomemory;
382 /* copy existing children from static area to malloc buffer */
383 memcpy(children, self->extra->children,
384 self->extra->length * sizeof(PyObject*));
385 }
386 self->extra->children = children;
387 self->extra->allocated = size;
388 }
389
390 return 0;
391
392 nomemory:
393 PyErr_NoMemory();
394 return -1;
395}
396
397LOCAL(int)
398element_add_subelement(ElementObject* self, PyObject* element)
399{
400 /* add a child element to a parent */
401
402 if (element_resize(self, 1) < 0)
403 return -1;
404
405 Py_INCREF(element);
406 self->extra->children[self->extra->length] = element;
407
408 self->extra->length++;
409
410 return 0;
411}
412
413LOCAL(PyObject*)
414element_get_attrib(ElementObject* self)
415{
416 /* return borrowed reference to attrib dictionary */
417 /* note: this function assumes that the extra section exists */
418
419 PyObject* res = self->extra->attrib;
420
421 if (res == Py_None) {
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000422 Py_DECREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000423 /* create missing dictionary */
424 res = PyDict_New();
425 if (!res)
426 return NULL;
427 self->extra->attrib = res;
428 }
429
430 return res;
431}
432
433LOCAL(PyObject*)
434element_get_text(ElementObject* self)
435{
436 /* return borrowed reference to text attribute */
437
438 PyObject* res = self->text;
439
440 if (JOIN_GET(res)) {
441 res = JOIN_OBJ(res);
442 if (PyList_CheckExact(res)) {
443 res = list_join(res);
444 if (!res)
445 return NULL;
446 self->text = res;
447 }
448 }
449
450 return res;
451}
452
453LOCAL(PyObject*)
454element_get_tail(ElementObject* self)
455{
456 /* return borrowed reference to text attribute */
457
458 PyObject* res = self->tail;
459
460 if (JOIN_GET(res)) {
461 res = JOIN_OBJ(res);
462 if (PyList_CheckExact(res)) {
463 res = list_join(res);
464 if (!res)
465 return NULL;
466 self->tail = res;
467 }
468 }
469
470 return res;
471}
472
473static PyObject*
474element(PyObject* self, PyObject* args, PyObject* kw)
475{
476 PyObject* elem;
477
478 PyObject* tag;
479 PyObject* attrib = NULL;
480 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag,
481 &PyDict_Type, &attrib))
482 return NULL;
483
484 if (attrib || kw) {
485 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
486 if (!attrib)
487 return NULL;
488 if (kw)
489 PyDict_Update(attrib, kw);
490 } else {
491 Py_INCREF(Py_None);
492 attrib = Py_None;
493 }
494
495 elem = element_new(tag, attrib);
496
497 Py_DECREF(attrib);
498
499 return elem;
500}
501
502static PyObject*
503subelement(PyObject* self, PyObject* args, PyObject* kw)
504{
505 PyObject* elem;
506
507 ElementObject* parent;
508 PyObject* tag;
509 PyObject* attrib = NULL;
510 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
511 &Element_Type, &parent, &tag,
512 &PyDict_Type, &attrib))
513 return NULL;
514
515 if (attrib || kw) {
516 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
517 if (!attrib)
518 return NULL;
519 if (kw)
520 PyDict_Update(attrib, kw);
521 } else {
522 Py_INCREF(Py_None);
523 attrib = Py_None;
524 }
525
526 elem = element_new(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000527 Py_DECREF(attrib);
Xiang Zhang9c0408d2017-03-22 14:32:52 +0800528 if (elem == NULL)
529 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000530
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000531 if (element_add_subelement(parent, elem) < 0) {
532 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000533 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000534 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000535
536 return elem;
537}
538
539static void
540element_dealloc(ElementObject* self)
541{
Serhiy Storchaka14518742016-12-28 09:23:17 +0200542 if (self->extra)
543 element_dealloc_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000544
545 /* discard attributes */
546 Py_DECREF(self->tag);
Serhiy Storchaka14518742016-12-28 09:23:17 +0200547 Py_DECREF(JOIN_OBJ(self->text));
548 Py_DECREF(JOIN_OBJ(self->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000549
550 RELEASE(sizeof(ElementObject), "destroy element");
551
552 PyObject_Del(self);
553}
554
555/* -------------------------------------------------------------------- */
556/* methods (in alphabetical order) */
557
558static PyObject*
559element_append(ElementObject* self, PyObject* args)
560{
561 PyObject* element;
562 if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element))
563 return NULL;
564
565 if (element_add_subelement(self, element) < 0)
566 return NULL;
567
568 Py_RETURN_NONE;
569}
570
571static PyObject*
572element_clear(ElementObject* self, PyObject* args)
573{
574 if (!PyArg_ParseTuple(args, ":clear"))
575 return NULL;
576
577 if (self->extra) {
578 element_dealloc_extra(self);
579 self->extra = NULL;
580 }
581
582 Py_INCREF(Py_None);
583 Py_DECREF(JOIN_OBJ(self->text));
584 self->text = Py_None;
585
586 Py_INCREF(Py_None);
587 Py_DECREF(JOIN_OBJ(self->tail));
588 self->tail = Py_None;
589
590 Py_RETURN_NONE;
591}
592
593static PyObject*
594element_copy(ElementObject* self, PyObject* args)
595{
596 int i;
597 ElementObject* element;
598
599 if (!PyArg_ParseTuple(args, ":__copy__"))
600 return NULL;
601
602 element = (ElementObject*) element_new(
603 self->tag, (self->extra) ? self->extra->attrib : Py_None
604 );
605 if (!element)
606 return NULL;
607
608 Py_DECREF(JOIN_OBJ(element->text));
609 element->text = self->text;
610 Py_INCREF(JOIN_OBJ(element->text));
611
612 Py_DECREF(JOIN_OBJ(element->tail));
613 element->tail = self->tail;
614 Py_INCREF(JOIN_OBJ(element->tail));
615
616 if (self->extra) {
617
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000618 if (element_resize(element, self->extra->length) < 0) {
619 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000620 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000621 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000622
623 for (i = 0; i < self->extra->length; i++) {
624 Py_INCREF(self->extra->children[i]);
625 element->extra->children[i] = self->extra->children[i];
626 }
627
628 element->extra->length = self->extra->length;
629
630 }
631
632 return (PyObject*) element;
633}
634
635static PyObject*
636element_deepcopy(ElementObject* self, PyObject* args)
637{
638 int i;
639 ElementObject* element;
640 PyObject* tag;
641 PyObject* attrib;
642 PyObject* text;
643 PyObject* tail;
644 PyObject* id;
645
646 PyObject* memo;
647 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
648 return NULL;
649
650 tag = deepcopy(self->tag, memo);
651 if (!tag)
652 return NULL;
653
654 if (self->extra) {
655 attrib = deepcopy(self->extra->attrib, memo);
656 if (!attrib) {
657 Py_DECREF(tag);
658 return NULL;
659 }
660 } else {
661 Py_INCREF(Py_None);
662 attrib = Py_None;
663 }
664
665 element = (ElementObject*) element_new(tag, attrib);
666
667 Py_DECREF(tag);
668 Py_DECREF(attrib);
669
670 if (!element)
671 return NULL;
672
673 text = deepcopy(JOIN_OBJ(self->text), memo);
674 if (!text)
675 goto error;
676 Py_DECREF(element->text);
677 element->text = JOIN_SET(text, JOIN_GET(self->text));
678
679 tail = deepcopy(JOIN_OBJ(self->tail), memo);
680 if (!tail)
681 goto error;
682 Py_DECREF(element->tail);
683 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
684
685 if (self->extra) {
686
687 if (element_resize(element, self->extra->length) < 0)
688 goto error;
689
690 for (i = 0; i < self->extra->length; i++) {
691 PyObject* child = deepcopy(self->extra->children[i], memo);
692 if (!child) {
693 element->extra->length = i;
694 goto error;
695 }
696 element->extra->children[i] = child;
697 }
698
699 element->extra->length = self->extra->length;
700
701 }
702
703 /* add object to memo dictionary (so deepcopy won't visit it again) */
704 id = PyInt_FromLong((Py_uintptr_t) self);
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000705 if (!id)
706 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000707
708 i = PyDict_SetItem(memo, id, (PyObject*) element);
709
710 Py_DECREF(id);
711
712 if (i < 0)
713 goto error;
714
715 return (PyObject*) element;
716
717 error:
718 Py_DECREF(element);
719 return NULL;
720}
721
722LOCAL(int)
723checkpath(PyObject* tag)
724{
Neal Norwitzc7074382006-06-12 02:06:17 +0000725 Py_ssize_t i;
726 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000727
728 /* check if a tag contains an xpath character */
729
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000730#define PATHCHAR(ch) \
731 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000732
733#if defined(Py_USING_UNICODE)
734 if (PyUnicode_Check(tag)) {
735 Py_UNICODE *p = PyUnicode_AS_UNICODE(tag);
736 for (i = 0; i < PyUnicode_GET_SIZE(tag); i++) {
737 if (p[i] == '{')
738 check = 0;
739 else if (p[i] == '}')
740 check = 1;
741 else if (check && PATHCHAR(p[i]))
742 return 1;
743 }
744 return 0;
745 }
746#endif
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000747 if (PyString_Check(tag)) {
748 char *p = PyString_AS_STRING(tag);
749 for (i = 0; i < PyString_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000750 if (p[i] == '{')
751 check = 0;
752 else if (p[i] == '}')
753 check = 1;
754 else if (check && PATHCHAR(p[i]))
755 return 1;
756 }
757 return 0;
758 }
759
760 return 1; /* unknown type; might be path expression */
761}
762
763static PyObject*
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000764element_extend(ElementObject* self, PyObject* args)
765{
766 PyObject* seq;
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300767 Py_ssize_t i;
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000768
769 PyObject* seq_in;
770 if (!PyArg_ParseTuple(args, "O:extend", &seq_in))
771 return NULL;
772
773 seq = PySequence_Fast(seq_in, "");
774 if (!seq) {
775 PyErr_Format(
776 PyExc_TypeError,
777 "expected sequence, not \"%.200s\"", Py_TYPE(seq_in)->tp_name
778 );
779 return NULL;
780 }
781
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300782 for (i = 0; i < PySequence_Fast_GET_SIZE(seq); i++) {
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000783 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
784 if (element_add_subelement(self, element) < 0) {
785 Py_DECREF(seq);
786 return NULL;
787 }
788 }
789
790 Py_DECREF(seq);
791
792 Py_RETURN_NONE;
793}
794
795static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000796element_find(ElementObject* self, PyObject* args)
797{
798 int i;
799
800 PyObject* tag;
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000801 PyObject* namespaces = Py_None;
802 if (!PyArg_ParseTuple(args, "O|O:find", &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000803 return NULL;
804
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000805 if (checkpath(tag) || namespaces != Py_None)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000806 return PyObject_CallMethod(
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000807 elementpath_obj, "find", "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000808 );
809
810 if (!self->extra)
811 Py_RETURN_NONE;
812
813 for (i = 0; i < self->extra->length; i++) {
814 PyObject* item = self->extra->children[i];
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300815 int rc;
816 if (!Element_CheckExact(item))
817 continue;
818 Py_INCREF(item);
819 rc = PyObject_Compare(((ElementObject*)item)->tag, tag);
820 if (rc == 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000821 return item;
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300822 Py_DECREF(item);
823 if (rc < 0 && PyErr_Occurred())
824 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000825 }
826
827 Py_RETURN_NONE;
828}
829
830static PyObject*
831element_findtext(ElementObject* self, PyObject* args)
832{
833 int i;
834
835 PyObject* tag;
836 PyObject* default_value = Py_None;
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000837 PyObject* namespaces = Py_None;
838 if (!PyArg_ParseTuple(args, "O|OO:findtext", &tag, &default_value, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000839 return NULL;
840
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000841 if (checkpath(tag) || namespaces != Py_None)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000842 return PyObject_CallMethod(
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000843 elementpath_obj, "findtext", "OOOO", self, tag, default_value, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000844 );
845
846 if (!self->extra) {
847 Py_INCREF(default_value);
848 return default_value;
849 }
850
851 for (i = 0; i < self->extra->length; i++) {
852 ElementObject* item = (ElementObject*) self->extra->children[i];
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300853 int rc;
854 if (!Element_CheckExact(item))
855 continue;
856 Py_INCREF(item);
857 rc = PyObject_Compare(item->tag, tag);
858 if (rc == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000859 PyObject* text = element_get_text(item);
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300860 if (text == Py_None) {
861 Py_DECREF(item);
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000862 return PyString_FromString("");
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300863 }
Neal Norwitz6f5ff3f2006-08-12 01:43:40 +0000864 Py_XINCREF(text);
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300865 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000866 return text;
867 }
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300868 Py_DECREF(item);
869 if (rc < 0 && PyErr_Occurred())
870 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000871 }
872
873 Py_INCREF(default_value);
874 return default_value;
875}
876
877static PyObject*
878element_findall(ElementObject* self, PyObject* args)
879{
880 int i;
881 PyObject* out;
882
883 PyObject* tag;
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000884 PyObject* namespaces = Py_None;
885 if (!PyArg_ParseTuple(args, "O|O:findall", &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000886 return NULL;
887
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000888 if (checkpath(tag) || namespaces != Py_None)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000889 return PyObject_CallMethod(
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000890 elementpath_obj, "findall", "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000891 );
892
893 out = PyList_New(0);
894 if (!out)
895 return NULL;
896
897 if (!self->extra)
898 return out;
899
900 for (i = 0; i < self->extra->length; i++) {
901 PyObject* item = self->extra->children[i];
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300902 int rc;
903 if (!Element_CheckExact(item))
904 continue;
905 Py_INCREF(item);
906 rc = PyObject_Compare(((ElementObject*)item)->tag, tag);
907 if (rc == 0)
908 rc = PyList_Append(out, item);
909 Py_DECREF(item);
910 if (rc < 0 && PyErr_Occurred()) {
911 Py_DECREF(out);
912 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000913 }
914 }
915
916 return out;
917}
918
919static PyObject*
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000920element_iterfind(ElementObject* self, PyObject* args)
921{
922 PyObject* tag;
923 PyObject* namespaces = Py_None;
924 if (!PyArg_ParseTuple(args, "O|O:iterfind", &tag, &namespaces))
925 return NULL;
926
927 return PyObject_CallMethod(
928 elementpath_obj, "iterfind", "OOO", self, tag, namespaces
929 );
930}
931
932static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000933element_get(ElementObject* self, PyObject* args)
934{
935 PyObject* value;
936
937 PyObject* key;
938 PyObject* default_value = Py_None;
939 if (!PyArg_ParseTuple(args, "O|O:get", &key, &default_value))
940 return NULL;
941
942 if (!self->extra || self->extra->attrib == Py_None)
943 value = default_value;
944 else {
945 value = PyDict_GetItem(self->extra->attrib, key);
946 if (!value)
947 value = default_value;
948 }
949
950 Py_INCREF(value);
951 return value;
952}
953
954static PyObject*
955element_getchildren(ElementObject* self, PyObject* args)
956{
957 int i;
958 PyObject* list;
959
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000960 /* FIXME: report as deprecated? */
961
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000962 if (!PyArg_ParseTuple(args, ":getchildren"))
963 return NULL;
964
965 if (!self->extra)
966 return PyList_New(0);
967
968 list = PyList_New(self->extra->length);
969 if (!list)
970 return NULL;
971
972 for (i = 0; i < self->extra->length; i++) {
973 PyObject* item = self->extra->children[i];
974 Py_INCREF(item);
975 PyList_SET_ITEM(list, i, item);
976 }
977
978 return list;
979}
980
981static PyObject*
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000982element_iter(ElementObject* self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000983{
984 PyObject* result;
985
986 PyObject* tag = Py_None;
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000987 if (!PyArg_ParseTuple(args, "|O:iter", &tag))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000988 return NULL;
989
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000990 if (!elementtree_iter_obj) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000991 PyErr_SetString(
992 PyExc_RuntimeError,
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000993 "iter helper not found"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000994 );
995 return NULL;
996 }
997
998 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000999 if (!args)
1000 return NULL;
Neal Norwitz02876df2006-02-07 06:58:52 +00001001
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001002 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
1003 Py_INCREF(tag); PyTuple_SET_ITEM(args, 1, (PyObject*) tag);
1004
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001005 result = PyObject_CallObject(elementtree_iter_obj, args);
1006
1007 Py_DECREF(args);
1008
1009 return result;
1010}
1011
1012
1013static PyObject*
1014element_itertext(ElementObject* self, PyObject* args)
1015{
1016 PyObject* result;
1017
1018 if (!PyArg_ParseTuple(args, ":itertext"))
1019 return NULL;
1020
1021 if (!elementtree_itertext_obj) {
1022 PyErr_SetString(
1023 PyExc_RuntimeError,
1024 "itertext helper not found"
1025 );
1026 return NULL;
1027 }
1028
1029 args = PyTuple_New(1);
1030 if (!args)
1031 return NULL;
1032
1033 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
1034
1035 result = PyObject_CallObject(elementtree_itertext_obj, args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001036
1037 Py_DECREF(args);
1038
1039 return result;
1040}
1041
1042static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001043element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001044{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001045 ElementObject* self = (ElementObject*) self_;
1046
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001047 if (!self->extra || index < 0 || index >= self->extra->length) {
1048 PyErr_SetString(
1049 PyExc_IndexError,
1050 "child index out of range"
1051 );
1052 return NULL;
1053 }
1054
1055 Py_INCREF(self->extra->children[index]);
1056 return self->extra->children[index];
1057}
1058
1059static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001060element_insert(ElementObject* self, PyObject* args)
1061{
1062 int i;
1063
1064 int index;
1065 PyObject* element;
1066 if (!PyArg_ParseTuple(args, "iO!:insert", &index,
1067 &Element_Type, &element))
1068 return NULL;
1069
1070 if (!self->extra)
1071 element_new_extra(self, NULL);
1072
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001073 if (index < 0) {
1074 index += self->extra->length;
1075 if (index < 0)
1076 index = 0;
1077 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001078 if (index > self->extra->length)
1079 index = self->extra->length;
1080
1081 if (element_resize(self, 1) < 0)
1082 return NULL;
1083
1084 for (i = self->extra->length; i > index; i--)
1085 self->extra->children[i] = self->extra->children[i-1];
1086
1087 Py_INCREF(element);
1088 self->extra->children[index] = element;
1089
1090 self->extra->length++;
1091
1092 Py_RETURN_NONE;
1093}
1094
1095static PyObject*
1096element_items(ElementObject* self, PyObject* args)
1097{
1098 if (!PyArg_ParseTuple(args, ":items"))
1099 return NULL;
1100
1101 if (!self->extra || self->extra->attrib == Py_None)
1102 return PyList_New(0);
1103
1104 return PyDict_Items(self->extra->attrib);
1105}
1106
1107static PyObject*
1108element_keys(ElementObject* self, PyObject* args)
1109{
1110 if (!PyArg_ParseTuple(args, ":keys"))
1111 return NULL;
1112
1113 if (!self->extra || self->extra->attrib == Py_None)
1114 return PyList_New(0);
1115
1116 return PyDict_Keys(self->extra->attrib);
1117}
1118
Martin v. Löwis18e16552006-02-15 17:27:45 +00001119static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001120element_length(ElementObject* self)
1121{
1122 if (!self->extra)
1123 return 0;
1124
1125 return self->extra->length;
1126}
1127
1128static PyObject*
1129element_makeelement(PyObject* self, PyObject* args, PyObject* kw)
1130{
1131 PyObject* elem;
1132
1133 PyObject* tag;
1134 PyObject* attrib;
1135 if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib))
1136 return NULL;
1137
1138 attrib = PyDict_Copy(attrib);
1139 if (!attrib)
1140 return NULL;
1141
1142 elem = element_new(tag, attrib);
1143
1144 Py_DECREF(attrib);
1145
1146 return elem;
1147}
1148
1149static PyObject*
1150element_reduce(ElementObject* self, PyObject* args)
1151{
1152 if (!PyArg_ParseTuple(args, ":__reduce__"))
1153 return NULL;
1154
1155 /* Hack alert: This method is used to work around a __copy__
1156 problem on certain 2.3 and 2.4 versions. To save time and
1157 simplify the code, we create the copy in here, and use a dummy
1158 copyelement helper to trick the copy module into doing the
1159 right thing. */
1160
1161 if (!elementtree_copyelement_obj) {
1162 PyErr_SetString(
1163 PyExc_RuntimeError,
1164 "copyelement helper not found"
1165 );
1166 return NULL;
1167 }
1168
1169 return Py_BuildValue(
1170 "O(N)", elementtree_copyelement_obj, element_copy(self, args)
1171 );
1172}
1173
1174static PyObject*
1175element_remove(ElementObject* self, PyObject* args)
1176{
1177 int i;
Serhiy Storchaka25598f32015-05-18 18:28:57 +03001178 int rc;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001179 PyObject* element;
Serhiy Storchaka25598f32015-05-18 18:28:57 +03001180 PyObject* found;
1181
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001182 if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element))
1183 return NULL;
1184
1185 if (!self->extra) {
1186 /* element has no children, so raise exception */
1187 PyErr_SetString(
1188 PyExc_ValueError,
1189 "list.remove(x): x not in list"
1190 );
1191 return NULL;
1192 }
1193
1194 for (i = 0; i < self->extra->length; i++) {
1195 if (self->extra->children[i] == element)
1196 break;
Serhiy Storchaka25598f32015-05-18 18:28:57 +03001197 rc = PyObject_Compare(self->extra->children[i], element);
1198 if (rc == 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001199 break;
Serhiy Storchaka25598f32015-05-18 18:28:57 +03001200 if (rc < 0 && PyErr_Occurred())
1201 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001202 }
1203
Serhiy Storchaka25598f32015-05-18 18:28:57 +03001204 if (i >= self->extra->length) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001205 /* element is not in children, so raise exception */
1206 PyErr_SetString(
1207 PyExc_ValueError,
1208 "list.remove(x): x not in list"
1209 );
1210 return NULL;
1211 }
1212
Serhiy Storchaka25598f32015-05-18 18:28:57 +03001213 found = self->extra->children[i];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001214
1215 self->extra->length--;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001216 for (; i < self->extra->length; i++)
1217 self->extra->children[i] = self->extra->children[i+1];
1218
Serhiy Storchaka25598f32015-05-18 18:28:57 +03001219 Py_DECREF(found);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001220 Py_RETURN_NONE;
1221}
1222
1223static PyObject*
1224element_repr(ElementObject* self)
1225{
Serhiy Storchaka1f7586e2016-06-12 10:06:32 +03001226 int status;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001227
Serhiy Storchaka1f7586e2016-06-12 10:06:32 +03001228 if (self->tag == NULL)
1229 return PyUnicode_FromFormat("<Element at %p>", self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001230
Serhiy Storchaka1f7586e2016-06-12 10:06:32 +03001231 status = Py_ReprEnter((PyObject *)self);
1232 if (status == 0) {
1233 PyObject *repr, *tag;
1234 tag = PyObject_Repr(self->tag);
1235 if (!tag)
1236 return NULL;
Florent Xiclunae2e81e82010-03-11 15:55:11 +00001237
Serhiy Storchaka1f7586e2016-06-12 10:06:32 +03001238 repr = PyString_FromFormat("<Element %s at %p>",
1239 PyString_AS_STRING(tag), self);
Benjamin Petersond7324bc2016-12-03 11:30:04 -08001240 Py_ReprLeave((PyObject *)self);
Serhiy Storchaka1f7586e2016-06-12 10:06:32 +03001241 Py_DECREF(tag);
1242 return repr;
1243 }
1244 if (status > 0)
1245 PyErr_Format(PyExc_RuntimeError,
1246 "reentrant call inside %s.__repr__",
1247 Py_TYPE(self)->tp_name);
1248 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001249}
1250
1251static PyObject*
1252element_set(ElementObject* self, PyObject* args)
1253{
1254 PyObject* attrib;
1255
1256 PyObject* key;
1257 PyObject* value;
1258 if (!PyArg_ParseTuple(args, "OO:set", &key, &value))
1259 return NULL;
1260
1261 if (!self->extra)
1262 element_new_extra(self, NULL);
1263
1264 attrib = element_get_attrib(self);
1265 if (!attrib)
1266 return NULL;
1267
1268 if (PyDict_SetItem(attrib, key, value) < 0)
1269 return NULL;
1270
1271 Py_RETURN_NONE;
1272}
1273
1274static int
Serhiy Storchakab5b76c32015-11-26 11:21:47 +02001275element_setitem(PyObject* self_, Py_ssize_t index_, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001276{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001277 ElementObject* self = (ElementObject*) self_;
Serhiy Storchakac4c64be2015-11-25 20:12:58 +02001278 int i, index;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001279 PyObject* old;
1280
Serhiy Storchakac4c64be2015-11-25 20:12:58 +02001281 if (!self->extra || index_ < 0 || index_ >= self->extra->length) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001282 PyErr_SetString(
1283 PyExc_IndexError,
1284 "child assignment index out of range");
1285 return -1;
1286 }
Serhiy Storchakac4c64be2015-11-25 20:12:58 +02001287 index = (int)index_;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001288
1289 old = self->extra->children[index];
1290
1291 if (item) {
1292 Py_INCREF(item);
1293 self->extra->children[index] = item;
1294 } else {
1295 self->extra->length--;
1296 for (i = index; i < self->extra->length; i++)
1297 self->extra->children[i] = self->extra->children[i+1];
1298 }
1299
1300 Py_DECREF(old);
1301
1302 return 0;
1303}
1304
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001305static PyObject*
1306element_subscr(PyObject* self_, PyObject* item)
1307{
1308 ElementObject* self = (ElementObject*) self_;
1309
1310#if (PY_VERSION_HEX < 0x02050000)
1311 if (PyInt_Check(item) || PyLong_Check(item)) {
1312 long i = PyInt_AsLong(item);
1313#else
1314 if (PyIndex_Check(item)) {
1315 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1316#endif
1317
1318 if (i == -1 && PyErr_Occurred()) {
1319 return NULL;
1320 }
1321 if (i < 0 && self->extra)
1322 i += self->extra->length;
1323 return element_getitem(self_, i);
1324 }
1325 else if (PySlice_Check(item)) {
1326 Py_ssize_t start, stop, step, slicelen, cur, i;
1327 PyObject* list;
1328
1329 if (!self->extra)
1330 return PyList_New(0);
1331
1332 if (PySlice_GetIndicesEx((PySliceObject *)item,
1333 self->extra->length,
1334 &start, &stop, &step, &slicelen) < 0) {
1335 return NULL;
1336 }
1337
1338 if (slicelen <= 0)
1339 return PyList_New(0);
1340 else {
1341 list = PyList_New(slicelen);
1342 if (!list)
1343 return NULL;
1344
1345 for (cur = start, i = 0; i < slicelen;
1346 cur += step, i++) {
1347 PyObject* item = self->extra->children[cur];
1348 Py_INCREF(item);
1349 PyList_SET_ITEM(list, i, item);
1350 }
1351
1352 return list;
1353 }
1354 }
1355 else {
1356 PyErr_SetString(PyExc_TypeError,
1357 "element indices must be integers");
1358 return NULL;
1359 }
1360}
1361
1362static int
1363element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1364{
1365 ElementObject* self = (ElementObject*) self_;
1366
1367#if (PY_VERSION_HEX < 0x02050000)
1368 if (PyInt_Check(item) || PyLong_Check(item)) {
1369 long i = PyInt_AsLong(item);
1370#else
1371 if (PyIndex_Check(item)) {
1372 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1373#endif
1374
1375 if (i == -1 && PyErr_Occurred()) {
1376 return -1;
1377 }
1378 if (i < 0 && self->extra)
1379 i += self->extra->length;
1380 return element_setitem(self_, i, value);
1381 }
1382 else if (PySlice_Check(item)) {
1383 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1384
1385 PyObject* recycle = NULL;
1386 PyObject* seq = NULL;
1387
1388 if (!self->extra)
1389 element_new_extra(self, NULL);
1390
1391 if (PySlice_GetIndicesEx((PySliceObject *)item,
1392 self->extra->length,
1393 &start, &stop, &step, &slicelen) < 0) {
1394 return -1;
1395 }
Serhiy Storchakac4c64be2015-11-25 20:12:58 +02001396 assert(slicelen <= self->extra->length);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001397
1398 if (value == NULL)
1399 newlen = 0;
1400 else {
1401 seq = PySequence_Fast(value, "");
1402 if (!seq) {
1403 PyErr_Format(
1404 PyExc_TypeError,
1405 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1406 );
1407 return -1;
1408 }
1409 newlen = PySequence_Size(seq);
1410 }
1411
1412 if (step != 1 && newlen != slicelen)
1413 {
Serhiy Storchakaa0ae9ff2015-11-22 12:31:11 +02001414 Py_XDECREF(seq);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001415 PyErr_Format(PyExc_ValueError,
1416#if (PY_VERSION_HEX < 0x02050000)
1417 "attempt to assign sequence of size %d "
1418 "to extended slice of size %d",
Serhiy Storchakaa0ae9ff2015-11-22 12:31:11 +02001419 (int)newlen, (int)slicelen
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001420#else
1421 "attempt to assign sequence of size %zd "
1422 "to extended slice of size %zd",
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001423 newlen, slicelen
Serhiy Storchakaa0ae9ff2015-11-22 12:31:11 +02001424#endif
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001425 );
1426 return -1;
1427 }
1428
1429
1430 /* Resize before creating the recycle bin, to prevent refleaks. */
1431 if (newlen > slicelen) {
1432 if (element_resize(self, newlen - slicelen) < 0) {
Serhiy Storchakaa0ae9ff2015-11-22 12:31:11 +02001433 Py_XDECREF(seq);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001434 return -1;
1435 }
1436 }
Serhiy Storchakac4c64be2015-11-25 20:12:58 +02001437 assert(newlen - slicelen <= INT_MAX - self->extra->length);
1438 assert(newlen - slicelen >= -self->extra->length);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001439
1440 if (slicelen > 0) {
1441 /* to avoid recursive calls to this method (via decref), move
1442 old items to the recycle bin here, and get rid of them when
1443 we're done modifying the element */
1444 recycle = PyList_New(slicelen);
1445 if (!recycle) {
Serhiy Storchakaa0ae9ff2015-11-22 12:31:11 +02001446 Py_XDECREF(seq);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001447 return -1;
1448 }
1449 for (cur = start, i = 0; i < slicelen;
1450 cur += step, i++)
1451 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1452 }
1453
1454 if (newlen < slicelen) {
1455 /* delete slice */
1456 for (i = stop; i < self->extra->length; i++)
1457 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1458 } else if (newlen > slicelen) {
1459 /* insert slice */
1460 for (i = self->extra->length-1; i >= stop; i--)
1461 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1462 }
1463
1464 /* replace the slice */
1465 for (cur = start, i = 0; i < newlen;
1466 cur += step, i++) {
1467 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1468 Py_INCREF(element);
1469 self->extra->children[cur] = element;
1470 }
1471
Serhiy Storchakac4c64be2015-11-25 20:12:58 +02001472 self->extra->length += (int)(newlen - slicelen);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001473
Serhiy Storchakaa0ae9ff2015-11-22 12:31:11 +02001474 Py_XDECREF(seq);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001475
1476 /* discard the recycle bin, and everything in it */
1477 Py_XDECREF(recycle);
1478
1479 return 0;
1480 }
1481 else {
1482 PyErr_SetString(PyExc_TypeError,
1483 "element indices must be integers");
1484 return -1;
1485 }
1486}
1487
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001488static PyMethodDef element_methods[] = {
1489
1490 {"clear", (PyCFunction) element_clear, METH_VARARGS},
1491
1492 {"get", (PyCFunction) element_get, METH_VARARGS},
1493 {"set", (PyCFunction) element_set, METH_VARARGS},
1494
1495 {"find", (PyCFunction) element_find, METH_VARARGS},
1496 {"findtext", (PyCFunction) element_findtext, METH_VARARGS},
1497 {"findall", (PyCFunction) element_findall, METH_VARARGS},
1498
1499 {"append", (PyCFunction) element_append, METH_VARARGS},
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001500 {"extend", (PyCFunction) element_extend, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001501 {"insert", (PyCFunction) element_insert, METH_VARARGS},
1502 {"remove", (PyCFunction) element_remove, METH_VARARGS},
1503
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001504 {"iter", (PyCFunction) element_iter, METH_VARARGS},
1505 {"itertext", (PyCFunction) element_itertext, METH_VARARGS},
1506 {"iterfind", (PyCFunction) element_iterfind, METH_VARARGS},
1507
1508 {"getiterator", (PyCFunction) element_iter, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001509 {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS},
1510
1511 {"items", (PyCFunction) element_items, METH_VARARGS},
1512 {"keys", (PyCFunction) element_keys, METH_VARARGS},
1513
1514 {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS},
1515
1516 {"__copy__", (PyCFunction) element_copy, METH_VARARGS},
1517 {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS},
1518
1519 /* Some 2.3 and 2.4 versions do not handle the __copy__ method on
1520 C objects correctly, so we have to fake it using a __reduce__-
1521 based hack (see the element_reduce implementation above for
1522 details). */
1523
1524 /* The behaviour has been changed in 2.3.5 and 2.4.1, so we're
1525 using a runtime test to figure out if we need to fake things
1526 or now (see the init code below). The following entry is
1527 enabled only if the hack is needed. */
1528
1529 {"!__reduce__", (PyCFunction) element_reduce, METH_VARARGS},
1530
1531 {NULL, NULL}
1532};
1533
1534static PyObject*
1535element_getattr(ElementObject* self, char* name)
1536{
1537 PyObject* res;
1538
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001539 /* handle common attributes first */
1540 if (strcmp(name, "tag") == 0) {
1541 res = self->tag;
1542 Py_INCREF(res);
1543 return res;
1544 } else if (strcmp(name, "text") == 0) {
1545 res = element_get_text(self);
Xiang Zhang827c7832017-03-22 12:25:51 +08001546 Py_XINCREF(res);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001547 return res;
1548 }
1549
1550 /* methods */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001551 res = Py_FindMethod(element_methods, (PyObject*) self, name);
1552 if (res)
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001553 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001554
1555 PyErr_Clear();
1556
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001557 /* less common attributes */
1558 if (strcmp(name, "tail") == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001559 res = element_get_tail(self);
1560 } else if (strcmp(name, "attrib") == 0) {
1561 if (!self->extra)
1562 element_new_extra(self, NULL);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001563 res = element_get_attrib(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001564 } else {
1565 PyErr_SetString(PyExc_AttributeError, name);
1566 return NULL;
1567 }
1568
1569 if (!res)
1570 return NULL;
1571
1572 Py_INCREF(res);
1573 return res;
1574}
1575
1576static int
1577element_setattr(ElementObject* self, const char* name, PyObject* value)
1578{
1579 if (value == NULL) {
1580 PyErr_SetString(
1581 PyExc_AttributeError,
1582 "can't delete element attributes"
1583 );
1584 return -1;
1585 }
1586
1587 if (strcmp(name, "tag") == 0) {
Serhiy Storchaka2e6c8292015-12-27 15:41:58 +02001588 Py_INCREF(value);
Serhiy Storchaka763a61c2016-04-10 18:05:12 +03001589 Py_SETREF(self->tag, value);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001590 } else if (strcmp(name, "text") == 0) {
1591 Py_DECREF(JOIN_OBJ(self->text));
1592 self->text = value;
1593 Py_INCREF(self->text);
1594 } else if (strcmp(name, "tail") == 0) {
1595 Py_DECREF(JOIN_OBJ(self->tail));
1596 self->tail = value;
1597 Py_INCREF(self->tail);
1598 } else if (strcmp(name, "attrib") == 0) {
1599 if (!self->extra)
1600 element_new_extra(self, NULL);
Serhiy Storchaka2e6c8292015-12-27 15:41:58 +02001601 Py_INCREF(value);
Serhiy Storchaka763a61c2016-04-10 18:05:12 +03001602 Py_SETREF(self->extra->attrib, value);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001603 } else {
1604 PyErr_SetString(PyExc_AttributeError, name);
1605 return -1;
1606 }
1607
1608 return 0;
1609}
1610
1611static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001612 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001613 0, /* sq_concat */
1614 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001615 element_getitem,
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001616 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001617 element_setitem,
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001618 0,
1619};
1620
1621static PyMappingMethods element_as_mapping = {
1622 (lenfunc) element_length,
1623 (binaryfunc) element_subscr,
1624 (objobjargproc) element_ass_subscr,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001625};
1626
1627statichere PyTypeObject Element_Type = {
1628 PyObject_HEAD_INIT(NULL)
1629 0, "Element", sizeof(ElementObject), 0,
1630 /* methods */
1631 (destructor)element_dealloc, /* tp_dealloc */
1632 0, /* tp_print */
1633 (getattrfunc)element_getattr, /* tp_getattr */
1634 (setattrfunc)element_setattr, /* tp_setattr */
1635 0, /* tp_compare */
1636 (reprfunc)element_repr, /* tp_repr */
1637 0, /* tp_as_number */
1638 &element_as_sequence, /* tp_as_sequence */
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001639 &element_as_mapping, /* tp_as_mapping */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001640};
1641
1642/* ==================================================================== */
1643/* the tree builder type */
1644
1645typedef struct {
1646 PyObject_HEAD
1647
1648 PyObject* root; /* root node (first created node) */
1649
1650 ElementObject* this; /* current node */
1651 ElementObject* last; /* most recently created node */
1652
1653 PyObject* data; /* data collector (string or list), or NULL */
1654
1655 PyObject* stack; /* element stack */
Neal Norwitzc7074382006-06-12 02:06:17 +00001656 Py_ssize_t index; /* current stack size (0=empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001657
1658 /* element tracing */
1659 PyObject* events; /* list of events, or NULL if not collecting */
1660 PyObject* start_event_obj; /* event objects (NULL to ignore) */
1661 PyObject* end_event_obj;
1662 PyObject* start_ns_event_obj;
1663 PyObject* end_ns_event_obj;
1664
1665} TreeBuilderObject;
1666
1667staticforward PyTypeObject TreeBuilder_Type;
1668
Christian Heimese93237d2007-12-19 02:37:44 +00001669#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001670
1671/* -------------------------------------------------------------------- */
1672/* constructor and destructor */
1673
1674LOCAL(PyObject*)
1675treebuilder_new(void)
1676{
1677 TreeBuilderObject* self;
1678
1679 self = PyObject_New(TreeBuilderObject, &TreeBuilder_Type);
1680 if (self == NULL)
1681 return NULL;
1682
1683 self->root = NULL;
1684
1685 Py_INCREF(Py_None);
1686 self->this = (ElementObject*) Py_None;
1687
1688 Py_INCREF(Py_None);
1689 self->last = (ElementObject*) Py_None;
1690
1691 self->data = NULL;
1692
1693 self->stack = PyList_New(20);
1694 self->index = 0;
1695
1696 self->events = NULL;
1697 self->start_event_obj = self->end_event_obj = NULL;
1698 self->start_ns_event_obj = self->end_ns_event_obj = NULL;
1699
1700 ALLOC(sizeof(TreeBuilderObject), "create treebuilder");
1701
1702 return (PyObject*) self;
1703}
1704
1705static PyObject*
Fredrik Lundh81707f12006-06-03 21:56:05 +00001706treebuilder(PyObject* self_, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001707{
1708 if (!PyArg_ParseTuple(args, ":TreeBuilder"))
1709 return NULL;
1710
1711 return treebuilder_new();
1712}
1713
1714static void
1715treebuilder_dealloc(TreeBuilderObject* self)
1716{
1717 Py_XDECREF(self->end_ns_event_obj);
1718 Py_XDECREF(self->start_ns_event_obj);
1719 Py_XDECREF(self->end_event_obj);
1720 Py_XDECREF(self->start_event_obj);
1721 Py_XDECREF(self->events);
1722 Py_DECREF(self->stack);
1723 Py_XDECREF(self->data);
1724 Py_DECREF(self->last);
1725 Py_DECREF(self->this);
1726 Py_XDECREF(self->root);
1727
1728 RELEASE(sizeof(TreeBuilderObject), "destroy treebuilder");
1729
1730 PyObject_Del(self);
1731}
1732
Serhiy Storchaka45cf0b72015-12-06 23:51:53 +02001733LOCAL(int)
1734treebuilder_append_event(TreeBuilderObject *self, PyObject *action,
1735 PyObject *node)
1736{
1737 if (action != NULL) {
1738 PyObject *res = PyTuple_Pack(2, action, node);
1739 if (res == NULL)
1740 return -1;
1741 if (PyList_Append(self->events, res) < 0) {
1742 Py_DECREF(res);
1743 return -1;
1744 }
1745 Py_DECREF(res);
1746 }
1747 return 0;
1748}
1749
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001750/* -------------------------------------------------------------------- */
1751/* handlers */
1752
1753LOCAL(PyObject*)
1754treebuilder_handle_xml(TreeBuilderObject* self, PyObject* encoding,
1755 PyObject* standalone)
1756{
1757 Py_RETURN_NONE;
1758}
1759
1760LOCAL(PyObject*)
1761treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
1762 PyObject* attrib)
1763{
1764 PyObject* node;
1765 PyObject* this;
1766
1767 if (self->data) {
1768 if (self->this == self->last) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001769 Py_DECREF(JOIN_OBJ(self->last->text));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001770 self->last->text = JOIN_SET(
1771 self->data, PyList_CheckExact(self->data)
1772 );
1773 } else {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001774 Py_DECREF(JOIN_OBJ(self->last->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001775 self->last->tail = JOIN_SET(
1776 self->data, PyList_CheckExact(self->data)
1777 );
1778 }
1779 self->data = NULL;
1780 }
1781
1782 node = element_new(tag, attrib);
1783 if (!node)
1784 return NULL;
1785
1786 this = (PyObject*) self->this;
1787
1788 if (this != Py_None) {
1789 if (element_add_subelement((ElementObject*) this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001790 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001791 } else {
1792 if (self->root) {
1793 PyErr_SetString(
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001794 elementtree_parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001795 "multiple elements on top level"
1796 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001797 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001798 }
1799 Py_INCREF(node);
1800 self->root = node;
1801 }
1802
1803 if (self->index < PyList_GET_SIZE(self->stack)) {
1804 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001805 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001806 Py_INCREF(this);
1807 } else {
1808 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001809 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001810 }
1811 self->index++;
1812
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001813 Py_INCREF(node);
Serhiy Storchaka763a61c2016-04-10 18:05:12 +03001814 Py_SETREF(self->this, (ElementObject*) node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001815
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001816 Py_INCREF(node);
Serhiy Storchaka763a61c2016-04-10 18:05:12 +03001817 Py_SETREF(self->last, (ElementObject*) node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001818
Serhiy Storchaka45cf0b72015-12-06 23:51:53 +02001819 if (treebuilder_append_event(self, self->start_event_obj, node) < 0)
1820 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001821
1822 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001823
1824 error:
1825 Py_DECREF(node);
1826 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001827}
1828
1829LOCAL(PyObject*)
1830treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
1831{
1832 if (!self->data) {
Fredrik Lundhdc075b92006-08-16 16:47:07 +00001833 if (self->last == (ElementObject*) Py_None) {
1834 /* ignore calls to data before the first call to start */
1835 Py_RETURN_NONE;
1836 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001837 /* store the first item as is */
1838 Py_INCREF(data); self->data = data;
1839 } else {
1840 /* more than one item; use a list to collect items */
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001841 if (PyString_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
1842 PyString_CheckExact(data) && PyString_GET_SIZE(data) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001843 /* expat often generates single character data sections; handle
1844 the most common case by resizing the existing string... */
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001845 Py_ssize_t size = PyString_GET_SIZE(self->data);
1846 if (_PyString_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001847 return NULL;
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001848 PyString_AS_STRING(self->data)[size] = PyString_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001849 } else if (PyList_CheckExact(self->data)) {
1850 if (PyList_Append(self->data, data) < 0)
1851 return NULL;
1852 } else {
1853 PyObject* list = PyList_New(2);
1854 if (!list)
1855 return NULL;
1856 PyList_SET_ITEM(list, 0, self->data);
1857 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
1858 self->data = list;
1859 }
1860 }
1861
1862 Py_RETURN_NONE;
1863}
1864
1865LOCAL(PyObject*)
1866treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
1867{
Serhiy Storchaka2e6c8292015-12-27 15:41:58 +02001868 ElementObject *item;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001869
1870 if (self->data) {
1871 if (self->this == self->last) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001872 Py_DECREF(JOIN_OBJ(self->last->text));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001873 self->last->text = JOIN_SET(
1874 self->data, PyList_CheckExact(self->data)
1875 );
1876 } else {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001877 Py_DECREF(JOIN_OBJ(self->last->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001878 self->last->tail = JOIN_SET(
1879 self->data, PyList_CheckExact(self->data)
1880 );
1881 }
1882 self->data = NULL;
1883 }
1884
1885 if (self->index == 0) {
1886 PyErr_SetString(
1887 PyExc_IndexError,
1888 "pop from empty stack"
1889 );
1890 return NULL;
1891 }
1892
Serhiy Storchaka2e6c8292015-12-27 15:41:58 +02001893 item = self->last;
1894 self->last = self->this;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001895 self->index--;
Serhiy Storchaka2e6c8292015-12-27 15:41:58 +02001896 self->this = (ElementObject *) PyList_GET_ITEM(self->stack, self->index);
1897 Py_INCREF(self->this);
1898 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001899
Serhiy Storchaka45cf0b72015-12-06 23:51:53 +02001900 if (treebuilder_append_event(self, self->end_event_obj, (PyObject*)self->last) < 0)
1901 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001902
1903 Py_INCREF(self->last);
1904 return (PyObject*) self->last;
1905}
1906
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001907/* -------------------------------------------------------------------- */
1908/* methods (in alphabetical order) */
1909
1910static PyObject*
1911treebuilder_data(TreeBuilderObject* self, PyObject* args)
1912{
1913 PyObject* data;
1914 if (!PyArg_ParseTuple(args, "O:data", &data))
1915 return NULL;
1916
1917 return treebuilder_handle_data(self, data);
1918}
1919
1920static PyObject*
1921treebuilder_end(TreeBuilderObject* self, PyObject* args)
1922{
1923 PyObject* tag;
1924 if (!PyArg_ParseTuple(args, "O:end", &tag))
1925 return NULL;
1926
1927 return treebuilder_handle_end(self, tag);
1928}
1929
1930LOCAL(PyObject*)
1931treebuilder_done(TreeBuilderObject* self)
1932{
1933 PyObject* res;
1934
1935 /* FIXME: check stack size? */
1936
1937 if (self->root)
1938 res = self->root;
1939 else
1940 res = Py_None;
1941
1942 Py_INCREF(res);
1943 return res;
1944}
1945
1946static PyObject*
1947treebuilder_close(TreeBuilderObject* self, PyObject* args)
1948{
1949 if (!PyArg_ParseTuple(args, ":close"))
1950 return NULL;
1951
1952 return treebuilder_done(self);
1953}
1954
1955static PyObject*
1956treebuilder_start(TreeBuilderObject* self, PyObject* args)
1957{
1958 PyObject* tag;
1959 PyObject* attrib = Py_None;
1960 if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib))
1961 return NULL;
1962
1963 return treebuilder_handle_start(self, tag, attrib);
1964}
1965
1966static PyObject*
1967treebuilder_xml(TreeBuilderObject* self, PyObject* args)
1968{
1969 PyObject* encoding;
1970 PyObject* standalone;
1971 if (!PyArg_ParseTuple(args, "OO:xml", &encoding, &standalone))
1972 return NULL;
1973
1974 return treebuilder_handle_xml(self, encoding, standalone);
1975}
1976
1977static PyMethodDef treebuilder_methods[] = {
1978 {"data", (PyCFunction) treebuilder_data, METH_VARARGS},
1979 {"start", (PyCFunction) treebuilder_start, METH_VARARGS},
1980 {"end", (PyCFunction) treebuilder_end, METH_VARARGS},
1981 {"xml", (PyCFunction) treebuilder_xml, METH_VARARGS},
1982 {"close", (PyCFunction) treebuilder_close, METH_VARARGS},
1983 {NULL, NULL}
1984};
1985
1986static PyObject*
1987treebuilder_getattr(TreeBuilderObject* self, char* name)
1988{
1989 return Py_FindMethod(treebuilder_methods, (PyObject*) self, name);
1990}
1991
1992statichere PyTypeObject TreeBuilder_Type = {
1993 PyObject_HEAD_INIT(NULL)
1994 0, "TreeBuilder", sizeof(TreeBuilderObject), 0,
1995 /* methods */
1996 (destructor)treebuilder_dealloc, /* tp_dealloc */
1997 0, /* tp_print */
1998 (getattrfunc)treebuilder_getattr, /* tp_getattr */
1999};
2000
2001/* ==================================================================== */
2002/* the expat interface */
2003
2004#if defined(USE_EXPAT)
2005
2006#include "expat.h"
2007
2008#if defined(USE_PYEXPAT_CAPI)
2009#include "pyexpat.h"
2010static struct PyExpat_CAPI* expat_capi;
2011#define EXPAT(func) (expat_capi->func)
2012#else
2013#define EXPAT(func) (XML_##func)
2014#endif
2015
2016typedef struct {
2017 PyObject_HEAD
2018
2019 XML_Parser parser;
2020
2021 PyObject* target;
2022 PyObject* entity;
2023
2024 PyObject* names;
2025
2026 PyObject* handle_xml;
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002027
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002028 PyObject* handle_start;
2029 PyObject* handle_data;
2030 PyObject* handle_end;
2031
2032 PyObject* handle_comment;
2033 PyObject* handle_pi;
2034
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002035 PyObject* handle_close;
2036
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002037} XMLParserObject;
2038
2039staticforward PyTypeObject XMLParser_Type;
2040
2041/* helpers */
2042
2043#if defined(Py_USING_UNICODE)
2044LOCAL(int)
2045checkstring(const char* string, int size)
2046{
2047 int i;
2048
2049 /* check if an 8-bit string contains UTF-8 characters */
2050 for (i = 0; i < size; i++)
2051 if (string[i] & 0x80)
2052 return 1;
2053
2054 return 0;
2055}
2056#endif
2057
2058LOCAL(PyObject*)
2059makestring(const char* string, int size)
2060{
2061 /* convert a UTF-8 string to either a 7-bit ascii string or a
2062 Unicode string */
2063
2064#if defined(Py_USING_UNICODE)
2065 if (checkstring(string, size))
2066 return PyUnicode_DecodeUTF8(string, size, "strict");
2067#endif
2068
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002069 return PyString_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002070}
2071
2072LOCAL(PyObject*)
2073makeuniversal(XMLParserObject* self, const char* string)
2074{
2075 /* convert a UTF-8 tag/attribute name from the expat parser
2076 to a universal name string */
2077
2078 int size = strlen(string);
2079 PyObject* key;
2080 PyObject* value;
2081
2082 /* look the 'raw' name up in the names dictionary */
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002083 key = PyString_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002084 if (!key)
2085 return NULL;
2086
2087 value = PyDict_GetItem(self->names, key);
2088
2089 if (value) {
2090 Py_INCREF(value);
2091 } else {
2092 /* new name. convert to universal name, and decode as
2093 necessary */
2094
2095 PyObject* tag;
2096 char* p;
2097 int i;
2098
2099 /* look for namespace separator */
2100 for (i = 0; i < size; i++)
2101 if (string[i] == '}')
2102 break;
2103 if (i != size) {
2104 /* convert to universal name */
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002105 tag = PyString_FromStringAndSize(NULL, size+1);
2106 p = PyString_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002107 p[0] = '{';
2108 memcpy(p+1, string, size);
2109 size++;
2110 } else {
2111 /* plain name; use key as tag */
2112 Py_INCREF(key);
2113 tag = key;
2114 }
2115
2116 /* decode universal name */
2117#if defined(Py_USING_UNICODE)
2118 /* inline makestring, to avoid duplicating the source string if
Martin Panter6a8163a2016-04-15 02:14:19 +00002119 it's not a utf-8 string */
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002120 p = PyString_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002121 if (checkstring(p, size)) {
2122 value = PyUnicode_DecodeUTF8(p, size, "strict");
2123 Py_DECREF(tag);
2124 if (!value) {
2125 Py_DECREF(key);
2126 return NULL;
2127 }
2128 } else
2129#endif
2130 value = tag; /* use tag as is */
2131
2132 /* add to names dictionary */
2133 if (PyDict_SetItem(self->names, key, value) < 0) {
2134 Py_DECREF(key);
2135 Py_DECREF(value);
2136 return NULL;
2137 }
2138 }
2139
2140 Py_DECREF(key);
2141 return value;
2142}
2143
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002144static void
2145expat_set_error(const char* message, int line, int column)
2146{
2147 PyObject *error;
2148 PyObject *position;
2149 char buffer[256];
2150
2151 sprintf(buffer, "%s: line %d, column %d", message, line, column);
2152
2153 error = PyObject_CallFunction(elementtree_parseerror_obj, "s", buffer);
2154 if (!error)
2155 return;
2156
2157 /* add position attribute */
2158 position = Py_BuildValue("(ii)", line, column);
2159 if (!position) {
2160 Py_DECREF(error);
2161 return;
2162 }
2163 if (PyObject_SetAttrString(error, "position", position) == -1) {
2164 Py_DECREF(error);
2165 Py_DECREF(position);
2166 return;
2167 }
2168 Py_DECREF(position);
2169
2170 PyErr_SetObject(elementtree_parseerror_obj, error);
2171 Py_DECREF(error);
2172}
2173
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002174/* -------------------------------------------------------------------- */
2175/* handlers */
2176
2177static void
2178expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2179 int data_len)
2180{
2181 PyObject* key;
2182 PyObject* value;
2183 PyObject* res;
2184
2185 if (data_len < 2 || data_in[0] != '&')
2186 return;
2187
2188 key = makestring(data_in + 1, data_len - 2);
2189 if (!key)
2190 return;
2191
2192 value = PyDict_GetItem(self->entity, key);
2193
2194 if (value) {
2195 if (TreeBuilder_CheckExact(self->target))
2196 res = treebuilder_handle_data(
2197 (TreeBuilderObject*) self->target, value
2198 );
2199 else if (self->handle_data)
2200 res = PyObject_CallFunction(self->handle_data, "O", value);
2201 else
2202 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002203 Py_XDECREF(res);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002204 } else if (!PyErr_Occurred()) {
2205 /* Report the first error, not the last */
2206 char message[128];
2207 sprintf(message, "undefined entity &%.100s;", PyString_AS_STRING(key));
2208 expat_set_error(
2209 message,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002210 EXPAT(GetErrorLineNumber)(self->parser),
2211 EXPAT(GetErrorColumnNumber)(self->parser)
2212 );
2213 }
2214
2215 Py_DECREF(key);
2216}
2217
2218static void
2219expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2220 const XML_Char **attrib_in)
2221{
2222 PyObject* res;
2223 PyObject* tag;
2224 PyObject* attrib;
2225 int ok;
2226
2227 /* tag name */
2228 tag = makeuniversal(self, tag_in);
2229 if (!tag)
2230 return; /* parser will look for errors */
2231
2232 /* attributes */
2233 if (attrib_in[0]) {
2234 attrib = PyDict_New();
Serhiy Storchaka33ea2972015-12-09 19:44:30 +02002235 if (!attrib) {
2236 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002237 return;
Serhiy Storchaka33ea2972015-12-09 19:44:30 +02002238 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002239 while (attrib_in[0] && attrib_in[1]) {
2240 PyObject* key = makeuniversal(self, attrib_in[0]);
2241 PyObject* value = makestring(attrib_in[1], strlen(attrib_in[1]));
2242 if (!key || !value) {
2243 Py_XDECREF(value);
2244 Py_XDECREF(key);
2245 Py_DECREF(attrib);
Serhiy Storchaka33ea2972015-12-09 19:44:30 +02002246 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002247 return;
2248 }
2249 ok = PyDict_SetItem(attrib, key, value);
2250 Py_DECREF(value);
2251 Py_DECREF(key);
2252 if (ok < 0) {
2253 Py_DECREF(attrib);
Serhiy Storchaka33ea2972015-12-09 19:44:30 +02002254 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002255 return;
2256 }
2257 attrib_in += 2;
2258 }
2259 } else {
2260 Py_INCREF(Py_None);
2261 attrib = Py_None;
2262 }
2263
2264 if (TreeBuilder_CheckExact(self->target))
2265 /* shortcut */
2266 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2267 tag, attrib);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002268 else if (self->handle_start) {
2269 if (attrib == Py_None) {
2270 Py_DECREF(attrib);
2271 attrib = PyDict_New();
Serhiy Storchaka33ea2972015-12-09 19:44:30 +02002272 if (!attrib) {
2273 Py_DECREF(tag);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002274 return;
Serhiy Storchaka33ea2972015-12-09 19:44:30 +02002275 }
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002276 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002277 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002278 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002279 res = NULL;
2280
2281 Py_DECREF(tag);
2282 Py_DECREF(attrib);
2283
2284 Py_XDECREF(res);
2285}
2286
2287static void
2288expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2289 int data_len)
2290{
2291 PyObject* data;
2292 PyObject* res;
2293
2294 data = makestring(data_in, data_len);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002295 if (!data)
2296 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002297
2298 if (TreeBuilder_CheckExact(self->target))
2299 /* shortcut */
2300 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
2301 else if (self->handle_data)
2302 res = PyObject_CallFunction(self->handle_data, "O", data);
2303 else
2304 res = NULL;
2305
2306 Py_DECREF(data);
2307
2308 Py_XDECREF(res);
2309}
2310
2311static void
2312expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
2313{
2314 PyObject* tag;
2315 PyObject* res = NULL;
2316
2317 if (TreeBuilder_CheckExact(self->target))
2318 /* shortcut */
2319 /* the standard tree builder doesn't look at the end tag */
2320 res = treebuilder_handle_end(
2321 (TreeBuilderObject*) self->target, Py_None
2322 );
2323 else if (self->handle_end) {
2324 tag = makeuniversal(self, tag_in);
2325 if (tag) {
2326 res = PyObject_CallFunction(self->handle_end, "O", tag);
2327 Py_DECREF(tag);
2328 }
2329 }
2330
2331 Py_XDECREF(res);
2332}
2333
2334static void
2335expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
2336 const XML_Char *uri)
2337{
Serhiy Storchaka45cf0b72015-12-06 23:51:53 +02002338 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
2339 PyObject *parcel;
2340 PyObject *sprefix = NULL;
2341 PyObject *suri = NULL;
2342
2343 if (PyErr_Occurred())
2344 return;
2345
2346 if (!target->events || !target->start_ns_event_obj)
2347 return;
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002348
Eli Benderskyf933e082013-11-28 06:25:45 -08002349 if (uri)
Eli Bendersky71142c42013-11-28 06:37:25 -08002350 suri = makestring(uri, strlen(uri));
Eli Benderskyf933e082013-11-28 06:25:45 -08002351 else
Eli Bendersky71142c42013-11-28 06:37:25 -08002352 suri = PyString_FromStringAndSize("", 0);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002353 if (!suri)
2354 return;
2355
2356 if (prefix)
2357 sprefix = makestring(prefix, strlen(prefix));
2358 else
2359 sprefix = PyString_FromStringAndSize("", 0);
2360 if (!sprefix) {
2361 Py_DECREF(suri);
2362 return;
2363 }
2364
Serhiy Storchaka45cf0b72015-12-06 23:51:53 +02002365 parcel = PyTuple_Pack(2, sprefix, suri);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002366 Py_DECREF(sprefix);
2367 Py_DECREF(suri);
Serhiy Storchaka45cf0b72015-12-06 23:51:53 +02002368 if (!parcel)
2369 return;
2370 treebuilder_append_event(target, target->start_ns_event_obj, parcel);
2371 Py_DECREF(parcel);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002372}
2373
2374static void
2375expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
2376{
Serhiy Storchaka45cf0b72015-12-06 23:51:53 +02002377 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
2378
2379 if (PyErr_Occurred())
2380 return;
2381
2382 if (!target->events)
2383 return;
2384
2385 treebuilder_append_event(target, target->end_ns_event_obj, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002386}
2387
2388static void
2389expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
2390{
2391 PyObject* comment;
2392 PyObject* res;
2393
2394 if (self->handle_comment) {
2395 comment = makestring(comment_in, strlen(comment_in));
2396 if (comment) {
2397 res = PyObject_CallFunction(self->handle_comment, "O", comment);
2398 Py_XDECREF(res);
2399 Py_DECREF(comment);
2400 }
2401 }
2402}
2403
2404static void
2405expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
2406 const XML_Char* data_in)
2407{
2408 PyObject* target;
2409 PyObject* data;
2410 PyObject* res;
2411
2412 if (self->handle_pi) {
2413 target = makestring(target_in, strlen(target_in));
2414 data = makestring(data_in, strlen(data_in));
2415 if (target && data) {
2416 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
2417 Py_XDECREF(res);
2418 Py_DECREF(data);
2419 Py_DECREF(target);
2420 } else {
2421 Py_XDECREF(data);
2422 Py_XDECREF(target);
2423 }
2424 }
2425}
2426
2427#if defined(Py_USING_UNICODE)
2428static int
2429expat_unknown_encoding_handler(XMLParserObject *self, const XML_Char *name,
2430 XML_Encoding *info)
2431{
2432 PyObject* u;
2433 Py_UNICODE* p;
2434 unsigned char s[256];
2435 int i;
2436
2437 memset(info, 0, sizeof(XML_Encoding));
2438
2439 for (i = 0; i < 256; i++)
2440 s[i] = i;
2441
Fredrik Lundhc3389992005-12-25 11:40:19 +00002442 u = PyUnicode_Decode((char*) s, 256, name, "replace");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002443 if (!u)
2444 return XML_STATUS_ERROR;
2445
2446 if (PyUnicode_GET_SIZE(u) != 256) {
2447 Py_DECREF(u);
Eli Benderskyb6717012013-08-04 06:09:49 -07002448 PyErr_SetString(PyExc_ValueError,
2449 "multi-byte encodings are not supported");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002450 return XML_STATUS_ERROR;
2451 }
2452
2453 p = PyUnicode_AS_UNICODE(u);
2454
2455 for (i = 0; i < 256; i++) {
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002456 if (p[i] != Py_UNICODE_REPLACEMENT_CHARACTER)
2457 info->map[i] = p[i];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002458 else
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002459 info->map[i] = -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002460 }
2461
2462 Py_DECREF(u);
2463
2464 return XML_STATUS_OK;
2465}
2466#endif
2467
2468/* -------------------------------------------------------------------- */
2469/* constructor and destructor */
2470
2471static PyObject*
Fredrik Lundh81707f12006-06-03 21:56:05 +00002472xmlparser(PyObject* self_, PyObject* args, PyObject* kw)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002473{
2474 XMLParserObject* self;
2475 /* FIXME: does this need to be static? */
2476 static XML_Memory_Handling_Suite memory_handler;
2477
2478 PyObject* target = NULL;
2479 char* encoding = NULL;
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +00002480 static char* kwlist[] = { "target", "encoding", NULL };
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002481 if (!PyArg_ParseTupleAndKeywords(args, kw, "|Oz:XMLParser", kwlist,
2482 &target, &encoding))
2483 return NULL;
2484
2485#if defined(USE_PYEXPAT_CAPI)
2486 if (!expat_capi) {
2487 PyErr_SetString(
2488 PyExc_RuntimeError, "cannot load dispatch table from pyexpat"
2489 );
2490 return NULL;
2491 }
2492#endif
2493
2494 self = PyObject_New(XMLParserObject, &XMLParser_Type);
2495 if (self == NULL)
2496 return NULL;
2497
2498 self->entity = PyDict_New();
2499 if (!self->entity) {
2500 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002501 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002502 }
2503
2504 self->names = PyDict_New();
2505 if (!self->names) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002506 PyObject_Del(self->entity);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002507 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002508 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002509 }
2510
2511 memory_handler.malloc_fcn = PyObject_Malloc;
2512 memory_handler.realloc_fcn = PyObject_Realloc;
2513 memory_handler.free_fcn = PyObject_Free;
2514
2515 self->parser = EXPAT(ParserCreate_MM)(encoding, &memory_handler, "}");
2516 if (!self->parser) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002517 PyObject_Del(self->names);
2518 PyObject_Del(self->entity);
2519 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002520 PyErr_NoMemory();
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002521 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002522 }
2523
2524 /* setup target handlers */
2525 if (!target) {
2526 target = treebuilder_new();
2527 if (!target) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002528 EXPAT(ParserFree)(self->parser);
2529 PyObject_Del(self->names);
2530 PyObject_Del(self->entity);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002531 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002532 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002533 }
2534 } else
2535 Py_INCREF(target);
2536 self->target = target;
2537
2538 self->handle_xml = PyObject_GetAttrString(target, "xml");
2539 self->handle_start = PyObject_GetAttrString(target, "start");
2540 self->handle_data = PyObject_GetAttrString(target, "data");
2541 self->handle_end = PyObject_GetAttrString(target, "end");
2542 self->handle_comment = PyObject_GetAttrString(target, "comment");
2543 self->handle_pi = PyObject_GetAttrString(target, "pi");
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002544 self->handle_close = PyObject_GetAttrString(target, "close");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002545
2546 PyErr_Clear();
2547
2548 /* configure parser */
2549 EXPAT(SetUserData)(self->parser, self);
2550 EXPAT(SetElementHandler)(
2551 self->parser,
2552 (XML_StartElementHandler) expat_start_handler,
2553 (XML_EndElementHandler) expat_end_handler
2554 );
2555 EXPAT(SetDefaultHandlerExpand)(
2556 self->parser,
2557 (XML_DefaultHandler) expat_default_handler
2558 );
2559 EXPAT(SetCharacterDataHandler)(
2560 self->parser,
2561 (XML_CharacterDataHandler) expat_data_handler
2562 );
2563 if (self->handle_comment)
2564 EXPAT(SetCommentHandler)(
2565 self->parser,
2566 (XML_CommentHandler) expat_comment_handler
2567 );
2568 if (self->handle_pi)
2569 EXPAT(SetProcessingInstructionHandler)(
2570 self->parser,
2571 (XML_ProcessingInstructionHandler) expat_pi_handler
2572 );
2573#if defined(Py_USING_UNICODE)
2574 EXPAT(SetUnknownEncodingHandler)(
2575 self->parser,
2576 (XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL
2577 );
2578#endif
2579
2580 ALLOC(sizeof(XMLParserObject), "create expatparser");
2581
2582 return (PyObject*) self;
2583}
2584
2585static void
2586xmlparser_dealloc(XMLParserObject* self)
2587{
2588 EXPAT(ParserFree)(self->parser);
2589
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002590 Py_XDECREF(self->handle_close);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002591 Py_XDECREF(self->handle_pi);
2592 Py_XDECREF(self->handle_comment);
2593 Py_XDECREF(self->handle_end);
2594 Py_XDECREF(self->handle_data);
2595 Py_XDECREF(self->handle_start);
2596 Py_XDECREF(self->handle_xml);
2597
2598 Py_DECREF(self->target);
2599 Py_DECREF(self->entity);
2600 Py_DECREF(self->names);
2601
2602 RELEASE(sizeof(XMLParserObject), "destroy expatparser");
2603
2604 PyObject_Del(self);
2605}
2606
2607/* -------------------------------------------------------------------- */
2608/* methods (in alphabetical order) */
2609
2610LOCAL(PyObject*)
2611expat_parse(XMLParserObject* self, char* data, int data_len, int final)
2612{
2613 int ok;
2614
2615 ok = EXPAT(Parse)(self->parser, data, data_len, final);
2616
2617 if (PyErr_Occurred())
2618 return NULL;
2619
2620 if (!ok) {
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002621 expat_set_error(
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002622 EXPAT(ErrorString)(EXPAT(GetErrorCode)(self->parser)),
2623 EXPAT(GetErrorLineNumber)(self->parser),
2624 EXPAT(GetErrorColumnNumber)(self->parser)
2625 );
2626 return NULL;
2627 }
2628
2629 Py_RETURN_NONE;
2630}
2631
2632static PyObject*
2633xmlparser_close(XMLParserObject* self, PyObject* args)
2634{
2635 /* end feeding data to parser */
2636
2637 PyObject* res;
2638 if (!PyArg_ParseTuple(args, ":close"))
2639 return NULL;
2640
2641 res = expat_parse(self, "", 0, 1);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002642 if (!res)
2643 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002644
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002645 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002646 Py_DECREF(res);
2647 return treebuilder_done((TreeBuilderObject*) self->target);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002648 } if (self->handle_close) {
2649 Py_DECREF(res);
2650 return PyObject_CallFunction(self->handle_close, "");
2651 } else
2652 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002653}
2654
2655static PyObject*
2656xmlparser_feed(XMLParserObject* self, PyObject* args)
2657{
2658 /* feed data to parser */
2659
2660 char* data;
2661 int data_len;
2662 if (!PyArg_ParseTuple(args, "s#:feed", &data, &data_len))
2663 return NULL;
2664
2665 return expat_parse(self, data, data_len, 0);
2666}
2667
2668static PyObject*
2669xmlparser_parse(XMLParserObject* self, PyObject* args)
2670{
2671 /* (internal) parse until end of input stream */
2672
2673 PyObject* reader;
2674 PyObject* buffer;
2675 PyObject* res;
2676
2677 PyObject* fileobj;
2678 if (!PyArg_ParseTuple(args, "O:_parse", &fileobj))
2679 return NULL;
2680
2681 reader = PyObject_GetAttrString(fileobj, "read");
2682 if (!reader)
2683 return NULL;
2684
2685 /* read from open file object */
2686 for (;;) {
2687
2688 buffer = PyObject_CallFunction(reader, "i", 64*1024);
2689
2690 if (!buffer) {
2691 /* read failed (e.g. due to KeyboardInterrupt) */
2692 Py_DECREF(reader);
2693 return NULL;
2694 }
2695
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002696 if (!PyString_CheckExact(buffer) || PyString_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002697 Py_DECREF(buffer);
2698 break;
2699 }
2700
Serhiy Storchakac4c64be2015-11-25 20:12:58 +02002701 if (PyString_GET_SIZE(buffer) > INT_MAX) {
2702 Py_DECREF(buffer);
2703 Py_DECREF(reader);
2704 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
2705 return NULL;
2706 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002707 res = expat_parse(
Serhiy Storchakac4c64be2015-11-25 20:12:58 +02002708 self, PyString_AS_STRING(buffer), (int)PyString_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002709 );
2710
2711 Py_DECREF(buffer);
2712
2713 if (!res) {
2714 Py_DECREF(reader);
2715 return NULL;
2716 }
2717 Py_DECREF(res);
2718
2719 }
2720
2721 Py_DECREF(reader);
2722
2723 res = expat_parse(self, "", 0, 1);
2724
2725 if (res && TreeBuilder_CheckExact(self->target)) {
2726 Py_DECREF(res);
2727 return treebuilder_done((TreeBuilderObject*) self->target);
2728 }
2729
2730 return res;
2731}
2732
2733static PyObject*
2734xmlparser_setevents(XMLParserObject* self, PyObject* args)
2735{
2736 /* activate element event reporting */
2737
Neal Norwitzc7074382006-06-12 02:06:17 +00002738 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002739 TreeBuilderObject* target;
2740
2741 PyObject* events; /* event collector */
2742 PyObject* event_set = Py_None;
2743 if (!PyArg_ParseTuple(args, "O!|O:_setevents", &PyList_Type, &events,
2744 &event_set))
2745 return NULL;
2746
2747 if (!TreeBuilder_CheckExact(self->target)) {
2748 PyErr_SetString(
2749 PyExc_TypeError,
2750 "event handling only supported for cElementTree.Treebuilder "
2751 "targets"
2752 );
2753 return NULL;
2754 }
2755
2756 target = (TreeBuilderObject*) self->target;
2757
2758 Py_INCREF(events);
Serhiy Storchakabc62af12016-04-06 09:51:18 +03002759 Py_XSETREF(target->events, events);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002760
2761 /* clear out existing events */
Serhiy Storchaka98a97222014-02-09 13:14:04 +02002762 Py_CLEAR(target->start_event_obj);
2763 Py_CLEAR(target->end_event_obj);
2764 Py_CLEAR(target->start_ns_event_obj);
2765 Py_CLEAR(target->end_ns_event_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002766
2767 if (event_set == Py_None) {
2768 /* default is "end" only */
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002769 target->end_event_obj = PyString_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002770 Py_RETURN_NONE;
2771 }
2772
2773 if (!PyTuple_Check(event_set)) /* FIXME: handle arbitrary sequences */
2774 goto error;
2775
2776 for (i = 0; i < PyTuple_GET_SIZE(event_set); i++) {
2777 PyObject* item = PyTuple_GET_ITEM(event_set, i);
2778 char* event;
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002779 if (!PyString_Check(item))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002780 goto error;
Serhiy Storchaka20a003b2015-12-24 11:51:24 +02002781 Py_INCREF(item);
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002782 event = PyString_AS_STRING(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002783 if (strcmp(event, "start") == 0) {
Serhiy Storchakabc62af12016-04-06 09:51:18 +03002784 Py_XSETREF(target->start_event_obj, item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002785 } else if (strcmp(event, "end") == 0) {
Serhiy Storchakabc62af12016-04-06 09:51:18 +03002786 Py_XSETREF(target->end_event_obj, item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002787 } else if (strcmp(event, "start-ns") == 0) {
Serhiy Storchakabc62af12016-04-06 09:51:18 +03002788 Py_XSETREF(target->start_ns_event_obj, item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002789 EXPAT(SetNamespaceDeclHandler)(
2790 self->parser,
2791 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2792 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2793 );
2794 } else if (strcmp(event, "end-ns") == 0) {
Serhiy Storchakabc62af12016-04-06 09:51:18 +03002795 Py_XSETREF(target->end_ns_event_obj, item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002796 EXPAT(SetNamespaceDeclHandler)(
2797 self->parser,
2798 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2799 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2800 );
2801 } else {
Serhiy Storchaka20a003b2015-12-24 11:51:24 +02002802 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002803 PyErr_Format(
2804 PyExc_ValueError,
2805 "unknown event '%s'", event
2806 );
2807 return NULL;
2808 }
2809 }
2810
2811 Py_RETURN_NONE;
2812
2813 error:
2814 PyErr_SetString(
2815 PyExc_TypeError,
2816 "invalid event tuple"
2817 );
2818 return NULL;
2819}
2820
2821static PyMethodDef xmlparser_methods[] = {
2822 {"feed", (PyCFunction) xmlparser_feed, METH_VARARGS},
2823 {"close", (PyCFunction) xmlparser_close, METH_VARARGS},
2824 {"_parse", (PyCFunction) xmlparser_parse, METH_VARARGS},
2825 {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
2826 {NULL, NULL}
2827};
2828
2829static PyObject*
2830xmlparser_getattr(XMLParserObject* self, char* name)
2831{
2832 PyObject* res;
2833
2834 res = Py_FindMethod(xmlparser_methods, (PyObject*) self, name);
2835 if (res)
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002836 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002837
2838 PyErr_Clear();
2839
2840 if (strcmp(name, "entity") == 0)
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002841 res = self->entity;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002842 else if (strcmp(name, "target") == 0)
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002843 res = self->target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002844 else if (strcmp(name, "version") == 0) {
2845 char buffer[100];
2846 sprintf(buffer, "Expat %d.%d.%d", XML_MAJOR_VERSION,
2847 XML_MINOR_VERSION, XML_MICRO_VERSION);
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002848 return PyString_FromString(buffer);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002849 } else {
2850 PyErr_SetString(PyExc_AttributeError, name);
2851 return NULL;
2852 }
2853
2854 Py_INCREF(res);
2855 return res;
2856}
2857
2858statichere PyTypeObject XMLParser_Type = {
2859 PyObject_HEAD_INIT(NULL)
2860 0, "XMLParser", sizeof(XMLParserObject), 0,
2861 /* methods */
2862 (destructor)xmlparser_dealloc, /* tp_dealloc */
2863 0, /* tp_print */
2864 (getattrfunc)xmlparser_getattr, /* tp_getattr */
2865};
2866
2867#endif
2868
2869/* ==================================================================== */
2870/* python module interface */
2871
2872static PyMethodDef _functions[] = {
2873 {"Element", (PyCFunction) element, METH_VARARGS|METH_KEYWORDS},
2874 {"SubElement", (PyCFunction) subelement, METH_VARARGS|METH_KEYWORDS},
2875 {"TreeBuilder", (PyCFunction) treebuilder, METH_VARARGS},
2876#if defined(USE_EXPAT)
2877 {"XMLParser", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
2878 {"XMLTreeBuilder", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
2879#endif
2880 {NULL, NULL}
2881};
2882
2883DL_EXPORT(void)
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002884init_elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002885{
2886 PyObject* m;
2887 PyObject* g;
2888 char* bootstrap;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002889
2890 /* Patch object type */
Christian Heimese93237d2007-12-19 02:37:44 +00002891 Py_TYPE(&Element_Type) = Py_TYPE(&TreeBuilder_Type) = &PyType_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002892#if defined(USE_EXPAT)
Christian Heimese93237d2007-12-19 02:37:44 +00002893 Py_TYPE(&XMLParser_Type) = &PyType_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002894#endif
2895
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002896 m = Py_InitModule("_elementtree", _functions);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002897 if (!m)
2898 return;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002899
2900 /* python glue code */
2901
2902 g = PyDict_New();
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002903 if (!g)
2904 return;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002905
2906 PyDict_SetItemString(g, "__builtins__", PyEval_GetBuiltins());
2907
2908 bootstrap = (
2909
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002910 "from copy import copy, deepcopy\n"
2911
2912 "try:\n"
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002913 " from xml.etree import ElementTree\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002914 "except ImportError:\n"
2915 " import ElementTree\n"
2916 "ET = ElementTree\n"
2917 "del ElementTree\n"
2918
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002919 "import _elementtree as cElementTree\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002920
2921 "try:\n" /* check if copy works as is */
2922 " copy(cElementTree.Element('x'))\n"
2923 "except:\n"
2924 " def copyelement(elem):\n"
2925 " return elem\n"
2926
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002927 "class CommentProxy:\n"
2928 " def __call__(self, text=None):\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002929 " element = cElementTree.Element(ET.Comment)\n"
2930 " element.text = text\n"
2931 " return element\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002932 " def __cmp__(self, other):\n"
2933 " return cmp(ET.Comment, other)\n"
2934 "cElementTree.Comment = CommentProxy()\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002935
2936 "class ElementTree(ET.ElementTree):\n" /* public */
2937 " def parse(self, source, parser=None):\n"
Florent Xicluna67d5d0e2011-10-29 03:38:56 +02002938 " close_source = False\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002939 " if not hasattr(source, 'read'):\n"
2940 " source = open(source, 'rb')\n"
Florent Xicluna67d5d0e2011-10-29 03:38:56 +02002941 " close_source = False\n"
2942 " try:\n"
2943 " if parser is not None:\n"
2944 " while 1:\n"
2945 " data = source.read(65536)\n"
2946 " if not data:\n"
2947 " break\n"
2948 " parser.feed(data)\n"
2949 " self._root = parser.close()\n"
2950 " else:\n"
2951 " parser = cElementTree.XMLParser()\n"
2952 " self._root = parser._parse(source)\n"
2953 " return self._root\n"
2954 " finally:\n"
2955 " if close_source:\n"
2956 " source.close()\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002957 "cElementTree.ElementTree = ElementTree\n"
2958
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002959 "def iter(node, tag=None):\n" /* helper */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002960 " if tag == '*':\n"
2961 " tag = None\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002962 " if tag is None or node.tag == tag:\n"
2963 " yield node\n"
2964 " for node in node:\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002965 " for node in iter(node, tag):\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002966 " yield node\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002967
2968 "def itertext(node):\n" /* helper */
2969 " if node.text:\n"
2970 " yield node.text\n"
2971 " for e in node:\n"
2972 " for s in e.itertext():\n"
2973 " yield s\n"
2974 " if e.tail:\n"
2975 " yield e.tail\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002976
2977 "def parse(source, parser=None):\n" /* public */
2978 " tree = ElementTree()\n"
2979 " tree.parse(source, parser)\n"
2980 " return tree\n"
2981 "cElementTree.parse = parse\n"
2982
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002983 "class iterparse(object):\n"
2984 " root = None\n"
2985 " def __init__(self, file, events=None):\n"
Florent Xicluna67d5d0e2011-10-29 03:38:56 +02002986 " self._close_file = False\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002987 " if not hasattr(file, 'read'):\n"
2988 " file = open(file, 'rb')\n"
Florent Xicluna67d5d0e2011-10-29 03:38:56 +02002989 " self._close_file = True\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002990 " self._file = file\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002991 " self._events = []\n"
2992 " self._index = 0\n"
Florent Xicluna0965ee22011-11-01 23:34:41 +01002993 " self._error = None\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002994 " self.root = self._root = None\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002995 " b = cElementTree.TreeBuilder()\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002996 " self._parser = cElementTree.XMLParser(b)\n"
2997 " self._parser._setevents(self._events, events)\n"
2998 " def next(self):\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002999 " while 1:\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003000 " try:\n"
3001 " item = self._events[self._index]\n"
Florent Xicluna0965ee22011-11-01 23:34:41 +01003002 " self._index += 1\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003003 " return item\n"
Florent Xicluna0965ee22011-11-01 23:34:41 +01003004 " except IndexError:\n"
3005 " pass\n"
3006 " if self._error:\n"
3007 " e = self._error\n"
3008 " self._error = None\n"
3009 " raise e\n"
3010 " if self._parser is None:\n"
3011 " self.root = self._root\n"
3012 " if self._close_file:\n"
3013 " self._file.close()\n"
3014 " raise StopIteration\n"
3015 " # load event buffer\n"
3016 " del self._events[:]\n"
3017 " self._index = 0\n"
3018 " data = self._file.read(16384)\n"
3019 " if data:\n"
3020 " try:\n"
3021 " self._parser.feed(data)\n"
3022 " except SyntaxError as exc:\n"
3023 " self._error = exc\n"
3024 " else:\n"
3025 " self._root = self._parser.close()\n"
3026 " self._parser = None\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003027 " def __iter__(self):\n"
3028 " return self\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003029 "cElementTree.iterparse = iterparse\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003030
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003031 "class PIProxy:\n"
3032 " def __call__(self, target, text=None):\n"
3033 " element = cElementTree.Element(ET.PI)\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003034 " element.text = target\n"
3035 " if text:\n"
3036 " element.text = element.text + ' ' + text\n"
3037 " return element\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003038 " def __cmp__(self, other):\n"
3039 " return cmp(ET.PI, other)\n"
3040 "cElementTree.PI = cElementTree.ProcessingInstruction = PIProxy()\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003041
3042 "def XML(text):\n" /* public */
3043 " parser = cElementTree.XMLParser()\n"
3044 " parser.feed(text)\n"
3045 " return parser.close()\n"
3046 "cElementTree.XML = cElementTree.fromstring = XML\n"
3047
3048 "def XMLID(text):\n" /* public */
3049 " tree = XML(text)\n"
3050 " ids = {}\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003051 " for elem in tree.iter():\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003052 " id = elem.get('id')\n"
3053 " if id:\n"
3054 " ids[id] = elem\n"
3055 " return tree, ids\n"
3056 "cElementTree.XMLID = XMLID\n"
3057
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003058 "try:\n"
3059 " register_namespace = ET.register_namespace\n"
3060 "except AttributeError:\n"
3061 " def register_namespace(prefix, uri):\n"
3062 " ET._namespace_map[uri] = prefix\n"
3063 "cElementTree.register_namespace = register_namespace\n"
3064
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003065 "cElementTree.dump = ET.dump\n"
3066 "cElementTree.ElementPath = ElementPath = ET.ElementPath\n"
3067 "cElementTree.iselement = ET.iselement\n"
3068 "cElementTree.QName = ET.QName\n"
3069 "cElementTree.tostring = ET.tostring\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003070 "cElementTree.fromstringlist = ET.fromstringlist\n"
3071 "cElementTree.tostringlist = ET.tostringlist\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003072 "cElementTree.VERSION = '" VERSION "'\n"
3073 "cElementTree.__version__ = '" VERSION "'\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003074
3075 );
3076
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003077 if (!PyRun_String(bootstrap, Py_file_input, g, NULL))
3078 return;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003079
3080 elementpath_obj = PyDict_GetItemString(g, "ElementPath");
3081
3082 elementtree_copyelement_obj = PyDict_GetItemString(g, "copyelement");
3083 if (elementtree_copyelement_obj) {
3084 /* reduce hack needed; enable reduce method */
3085 PyMethodDef* mp;
3086 for (mp = element_methods; mp->ml_name; mp++)
3087 if (mp->ml_meth == (PyCFunction) element_reduce) {
3088 mp->ml_name = "__reduce__";
3089 break;
3090 }
3091 } else
3092 PyErr_Clear();
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003093
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003094 elementtree_deepcopy_obj = PyDict_GetItemString(g, "deepcopy");
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003095 elementtree_iter_obj = PyDict_GetItemString(g, "iter");
3096 elementtree_itertext_obj = PyDict_GetItemString(g, "itertext");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003097
3098#if defined(USE_PYEXPAT_CAPI)
3099 /* link against pyexpat, if possible */
Larry Hastings402b73f2010-03-25 00:54:54 +00003100 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003101 if (expat_capi) {
3102 /* check that it's usable */
3103 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
3104 expat_capi->size < sizeof(struct PyExpat_CAPI) ||
3105 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
3106 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
3107 expat_capi->MICRO_VERSION != XML_MICRO_VERSION)
3108 expat_capi = NULL;
3109 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003110#endif
3111
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003112 elementtree_parseerror_obj = PyErr_NewException(
3113 "cElementTree.ParseError", PyExc_SyntaxError, NULL
3114 );
3115 Py_INCREF(elementtree_parseerror_obj);
3116 PyModule_AddObject(m, "ParseError", elementtree_parseerror_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003117}