blob: 8e5b5877030afa70aa606830e175d792769b5cc9 [file] [log] [blame]
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001/*
2 * ElementTree
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003 * $Id: _elementtree.c 3473 2009-01-11 22:53:55Z fredrik $
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
5 * elementtree accelerator
6 *
7 * History:
8 * 1999-06-20 fl created (as part of sgmlop)
9 * 2001-05-29 fl effdom edition
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000010 * 2003-02-27 fl elementtree edition (alpha)
11 * 2004-06-03 fl updates for elementtree 1.2
Florent Xicluna3e8c1892010-03-11 14:36:19 +000012 * 2005-01-05 fl major optimization effort
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000013 * 2005-01-11 fl first public release (cElementTree 0.8)
14 * 2005-01-12 fl split element object into base and extras
15 * 2005-01-13 fl use tagged pointers for tail/text (cElementTree 0.9)
16 * 2005-01-17 fl added treebuilder close method
17 * 2005-01-17 fl fixed crash in getchildren
18 * 2005-01-18 fl removed observer api, added iterparse (cElementTree 0.9.3)
19 * 2005-01-23 fl revised iterparse api; added namespace event support (0.9.8)
20 * 2005-01-26 fl added VERSION module property (cElementTree 1.0)
21 * 2005-01-28 fl added remove method (1.0.1)
22 * 2005-03-01 fl added iselement function; fixed makeelement aliasing (1.0.2)
23 * 2005-03-13 fl export Comment and ProcessingInstruction/PI helpers
24 * 2005-03-26 fl added Comment and PI support to XMLParser
25 * 2005-03-27 fl event optimizations; complain about bogus events
26 * 2005-08-08 fl fixed read error handling in parse
27 * 2005-08-11 fl added runtime test for copy workaround (1.0.3)
28 * 2005-12-13 fl added expat_capi support (for xml.etree) (1.0.4)
29 * 2005-12-16 fl added support for non-standard encodings
Fredrik Lundh44ed4db2006-03-12 21:06:35 +000030 * 2006-03-08 fl fixed a couple of potential null-refs and leaks
31 * 2006-03-12 fl merge in 2.5 ssize_t changes
Florent Xicluna3e8c1892010-03-11 14:36:19 +000032 * 2007-08-25 fl call custom builder's close method from XMLParser
33 * 2007-08-31 fl added iter, extend from ET 1.3
34 * 2007-09-01 fl fixed ParseError exception, setslice source type, etc
35 * 2007-09-03 fl fixed handling of negative insert indexes
36 * 2007-09-04 fl added itertext from ET 1.3
37 * 2007-09-06 fl added position attribute to ParseError exception
38 * 2008-06-06 fl delay error reporting in iterparse (from Hrvoje Niksic)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000039 *
Florent Xicluna3e8c1892010-03-11 14:36:19 +000040 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
41 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000042 *
43 * info@pythonware.com
44 * http://www.pythonware.com
45 */
46
Fredrik Lundh6d52b552005-12-16 22:06:43 +000047/* Licensed to PSF under a Contributor Agreement. */
Florent Xicluna3e8c1892010-03-11 14:36:19 +000048/* See http://www.python.org/psf/license for licensing details. */
Fredrik Lundh6d52b552005-12-16 22:06:43 +000049
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000050#include "Python.h"
51
Fredrik Lundhdc075b92006-08-16 16:47:07 +000052#define VERSION "1.0.6"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000053
54/* -------------------------------------------------------------------- */
55/* configuration */
56
57/* Leave defined to include the expat-based XMLParser type */
58#define USE_EXPAT
59
Florent Xicluna3e8c1892010-03-11 14:36:19 +000060/* Define to do all expat calls via pyexpat's embedded expat library */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000061/* #define USE_PYEXPAT_CAPI */
62
63/* An element can hold this many children without extra memory
64 allocations. */
65#define STATIC_CHILDREN 4
66
67/* For best performance, chose a value so that 80-90% of all nodes
68 have no more than the given number of children. Set this to zero
69 to minimize the size of the element structure itself (this only
70 helps if you have lots of leaf nodes with attributes). */
71
72/* Also note that pymalloc always allocates blocks in multiples of
73 eight bytes. For the current version of cElementTree, this means
74 that the number of children should be an even number, at least on
75 32-bit platforms. */
76
77/* -------------------------------------------------------------------- */
78
79#if 0
80static int memory = 0;
81#define ALLOC(size, comment)\
82do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
83#define RELEASE(size, comment)\
84do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
85#else
86#define ALLOC(size, comment)
87#define RELEASE(size, comment)
88#endif
89
90/* compiler tweaks */
91#if defined(_MSC_VER)
92#define LOCAL(type) static __inline type __fastcall
93#else
94#define LOCAL(type) static type
95#endif
96
97/* compatibility macros */
Florent Xicluna3e8c1892010-03-11 14:36:19 +000098#if (PY_VERSION_HEX < 0x02060000)
99#define Py_REFCNT(ob) (((PyObject*)(ob))->ob_refcnt)
100#define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)
101#endif
102
Martin v. Löwis18e16552006-02-15 17:27:45 +0000103#if (PY_VERSION_HEX < 0x02050000)
104typedef int Py_ssize_t;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000105#define lenfunc inquiry
Martin v. Löwis18e16552006-02-15 17:27:45 +0000106#endif
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000107
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000108#if (PY_VERSION_HEX < 0x02040000)
109#define PyDict_CheckExact PyDict_Check
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000110
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000111#if !defined(Py_RETURN_NONE)
112#define Py_RETURN_NONE return Py_INCREF(Py_None), Py_None
113#endif
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000114#endif
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000115
116/* macros used to store 'join' flags in string object pointers. note
117 that all use of text and tail as object pointers must be wrapped in
118 JOIN_OBJ. see comments in the ElementObject definition for more
119 info. */
120#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
121#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
122#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~1))
123
Serhiy Storchaka85add472016-12-21 12:55:28 +0200124/* Py_CLEAR for a PyObject* that uses a join flag. Pass the pointer by
125 * reference since this function sets it to NULL.
126*/
127static void _clear_joined_ptr(PyObject **p)
128{
129 if (*p) {
130 PyObject *tmp = JOIN_OBJ(*p);
131 *p = NULL;
132 Py_DECREF(tmp);
133 }
134}
135
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000136/* glue functions (see the init function for details) */
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000137static PyObject* elementtree_parseerror_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000138static PyObject* elementtree_copyelement_obj;
139static PyObject* elementtree_deepcopy_obj;
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000140static PyObject* elementtree_iter_obj;
141static PyObject* elementtree_itertext_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000142static PyObject* elementpath_obj;
143
144/* helpers */
145
146LOCAL(PyObject*)
147deepcopy(PyObject* object, PyObject* memo)
148{
149 /* do a deep copy of the given object */
150
151 PyObject* args;
152 PyObject* result;
153
154 if (!elementtree_deepcopy_obj) {
155 PyErr_SetString(
156 PyExc_RuntimeError,
157 "deepcopy helper not found"
158 );
159 return NULL;
160 }
161
162 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000163 if (!args)
164 return NULL;
165
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000166 Py_INCREF(object); PyTuple_SET_ITEM(args, 0, (PyObject*) object);
167 Py_INCREF(memo); PyTuple_SET_ITEM(args, 1, (PyObject*) memo);
168
169 result = PyObject_CallObject(elementtree_deepcopy_obj, args);
170
171 Py_DECREF(args);
172
173 return result;
174}
175
176LOCAL(PyObject*)
177list_join(PyObject* list)
178{
179 /* join list elements (destroying the list in the process) */
180
181 PyObject* joiner;
182 PyObject* function;
183 PyObject* args;
184 PyObject* result;
185
186 switch (PyList_GET_SIZE(list)) {
187 case 0:
188 Py_DECREF(list);
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000189 return PyString_FromString("");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000190 case 1:
191 result = PyList_GET_ITEM(list, 0);
192 Py_INCREF(result);
193 Py_DECREF(list);
194 return result;
195 }
196
197 /* two or more elements: slice out a suitable separator from the
198 first member, and use that to join the entire list */
199
200 joiner = PySequence_GetSlice(PyList_GET_ITEM(list, 0), 0, 0);
201 if (!joiner)
202 return NULL;
203
204 function = PyObject_GetAttrString(joiner, "join");
205 if (!function) {
206 Py_DECREF(joiner);
207 return NULL;
208 }
209
210 args = PyTuple_New(1);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000211 if (!args)
212 return NULL;
213
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000214 PyTuple_SET_ITEM(args, 0, list);
215
216 result = PyObject_CallObject(function, args);
217
218 Py_DECREF(args); /* also removes list */
219 Py_DECREF(function);
220 Py_DECREF(joiner);
221
222 return result;
223}
224
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000225/* -------------------------------------------------------------------- */
226/* the element type */
227
228typedef struct {
229
230 /* attributes (a dictionary object), or None if no attributes */
231 PyObject* attrib;
232
233 /* child elements */
234 int length; /* actual number of items */
235 int allocated; /* allocated items */
236
237 /* this either points to _children or to a malloced buffer */
238 PyObject* *children;
239
240 PyObject* _children[STATIC_CHILDREN];
241
242} ElementObjectExtra;
243
244typedef struct {
245 PyObject_HEAD
246
247 /* element tag (a string). */
248 PyObject* tag;
249
250 /* text before first child. note that this is a tagged pointer;
251 use JOIN_OBJ to get the object pointer. the join flag is used
252 to distinguish lists created by the tree builder from lists
253 assigned to the attribute by application code; the former
254 should be joined before being returned to the user, the latter
255 should be left intact. */
256 PyObject* text;
257
258 /* text after this element, in parent. note that this is a tagged
259 pointer; use JOIN_OBJ to get the object pointer. */
260 PyObject* tail;
261
262 ElementObjectExtra* extra;
263
264} ElementObject;
265
266staticforward PyTypeObject Element_Type;
267
Christian Heimese93237d2007-12-19 02:37:44 +0000268#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000269
270/* -------------------------------------------------------------------- */
271/* element constructor and destructor */
272
273LOCAL(int)
274element_new_extra(ElementObject* self, PyObject* attrib)
275{
276 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
277 if (!self->extra)
278 return -1;
279
280 if (!attrib)
281 attrib = Py_None;
282
283 Py_INCREF(attrib);
284 self->extra->attrib = attrib;
285
286 self->extra->length = 0;
287 self->extra->allocated = STATIC_CHILDREN;
288 self->extra->children = self->extra->_children;
289
290 return 0;
291}
292
293LOCAL(void)
294element_dealloc_extra(ElementObject* self)
295{
296 int i;
297
298 Py_DECREF(self->extra->attrib);
299
300 for (i = 0; i < self->extra->length; i++)
301 Py_DECREF(self->extra->children[i]);
302
303 if (self->extra->children != self->extra->_children)
304 PyObject_Free(self->extra->children);
305
306 PyObject_Free(self->extra);
307}
308
309LOCAL(PyObject*)
310element_new(PyObject* tag, PyObject* attrib)
311{
312 ElementObject* self;
313
314 self = PyObject_New(ElementObject, &Element_Type);
315 if (self == NULL)
316 return NULL;
317
318 /* use None for empty dictionaries */
319 if (PyDict_CheckExact(attrib) && !PyDict_Size(attrib))
320 attrib = Py_None;
321
322 self->extra = NULL;
323
324 if (attrib != Py_None) {
325
Neal Norwitzc6a989a2006-05-10 06:57:58 +0000326 if (element_new_extra(self, attrib) < 0) {
327 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000328 return NULL;
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000329 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000330
331 self->extra->length = 0;
332 self->extra->allocated = STATIC_CHILDREN;
333 self->extra->children = self->extra->_children;
334
335 }
336
337 Py_INCREF(tag);
338 self->tag = tag;
339
340 Py_INCREF(Py_None);
341 self->text = Py_None;
342
343 Py_INCREF(Py_None);
344 self->tail = Py_None;
345
346 ALLOC(sizeof(ElementObject), "create element");
347
348 return (PyObject*) self;
349}
350
351LOCAL(int)
Serhiy Storchakac4c64be2015-11-25 20:12:58 +0200352element_resize(ElementObject* self, Py_ssize_t extra)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000353{
Serhiy Storchakac4c64be2015-11-25 20:12:58 +0200354 Py_ssize_t size;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000355 PyObject* *children;
356
357 /* make sure self->children can hold the given number of extra
358 elements. set an exception and return -1 if allocation failed */
359
360 if (!self->extra)
361 element_new_extra(self, NULL);
362
363 size = self->extra->length + extra;
364
365 if (size > self->extra->allocated) {
366 /* use Python 2.4's list growth strategy */
367 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes87dcf3d2008-01-18 08:04:57 +0000368 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
369 * which needs at least 4 bytes.
370 * Although it's a false alarm always assume at least one child to
371 * be safe.
372 */
373 size = size ? size : 1;
Serhiy Storchakac4c64be2015-11-25 20:12:58 +0200374 if ((size_t)size > PY_SSIZE_T_MAX/sizeof(PyObject*))
375 goto nomemory;
376 if (size > INT_MAX) {
377 PyErr_SetString(PyExc_OverflowError,
378 "too many children");
379 return -1;
380 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000381 if (self->extra->children != self->extra->_children) {
Christian Heimes87dcf3d2008-01-18 08:04:57 +0000382 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
383 * "children", which needs at least 4 bytes. Although it's a
384 * false alarm always assume at least one child to be safe.
385 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000386 children = PyObject_Realloc(self->extra->children,
387 size * sizeof(PyObject*));
388 if (!children)
389 goto nomemory;
390 } else {
391 children = PyObject_Malloc(size * sizeof(PyObject*));
392 if (!children)
393 goto nomemory;
394 /* copy existing children from static area to malloc buffer */
395 memcpy(children, self->extra->children,
396 self->extra->length * sizeof(PyObject*));
397 }
398 self->extra->children = children;
399 self->extra->allocated = size;
400 }
401
402 return 0;
403
404 nomemory:
405 PyErr_NoMemory();
406 return -1;
407}
408
409LOCAL(int)
410element_add_subelement(ElementObject* self, PyObject* element)
411{
412 /* add a child element to a parent */
413
414 if (element_resize(self, 1) < 0)
415 return -1;
416
417 Py_INCREF(element);
418 self->extra->children[self->extra->length] = element;
419
420 self->extra->length++;
421
422 return 0;
423}
424
425LOCAL(PyObject*)
426element_get_attrib(ElementObject* self)
427{
428 /* return borrowed reference to attrib dictionary */
429 /* note: this function assumes that the extra section exists */
430
431 PyObject* res = self->extra->attrib;
432
433 if (res == Py_None) {
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000434 Py_DECREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000435 /* create missing dictionary */
436 res = PyDict_New();
437 if (!res)
438 return NULL;
439 self->extra->attrib = res;
440 }
441
442 return res;
443}
444
445LOCAL(PyObject*)
446element_get_text(ElementObject* self)
447{
448 /* return borrowed reference to text attribute */
449
450 PyObject* res = self->text;
451
452 if (JOIN_GET(res)) {
453 res = JOIN_OBJ(res);
454 if (PyList_CheckExact(res)) {
455 res = list_join(res);
456 if (!res)
457 return NULL;
458 self->text = res;
459 }
460 }
461
462 return res;
463}
464
465LOCAL(PyObject*)
466element_get_tail(ElementObject* self)
467{
468 /* return borrowed reference to text attribute */
469
470 PyObject* res = self->tail;
471
472 if (JOIN_GET(res)) {
473 res = JOIN_OBJ(res);
474 if (PyList_CheckExact(res)) {
475 res = list_join(res);
476 if (!res)
477 return NULL;
478 self->tail = res;
479 }
480 }
481
482 return res;
483}
484
485static PyObject*
486element(PyObject* self, PyObject* args, PyObject* kw)
487{
488 PyObject* elem;
489
490 PyObject* tag;
491 PyObject* attrib = NULL;
492 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag,
493 &PyDict_Type, &attrib))
494 return NULL;
495
496 if (attrib || kw) {
497 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
498 if (!attrib)
499 return NULL;
500 if (kw)
501 PyDict_Update(attrib, kw);
502 } else {
503 Py_INCREF(Py_None);
504 attrib = Py_None;
505 }
506
507 elem = element_new(tag, attrib);
508
509 Py_DECREF(attrib);
510
511 return elem;
512}
513
514static PyObject*
515subelement(PyObject* self, PyObject* args, PyObject* kw)
516{
517 PyObject* elem;
518
519 ElementObject* parent;
520 PyObject* tag;
521 PyObject* attrib = NULL;
522 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
523 &Element_Type, &parent, &tag,
524 &PyDict_Type, &attrib))
525 return NULL;
526
527 if (attrib || kw) {
528 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
529 if (!attrib)
530 return NULL;
531 if (kw)
532 PyDict_Update(attrib, kw);
533 } else {
534 Py_INCREF(Py_None);
535 attrib = Py_None;
536 }
537
538 elem = element_new(tag, attrib);
539
540 Py_DECREF(attrib);
541
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000542 if (element_add_subelement(parent, elem) < 0) {
543 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000544 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000545 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000546
547 return elem;
548}
549
550static void
551element_dealloc(ElementObject* self)
552{
Serhiy Storchaka85add472016-12-21 12:55:28 +0200553 Py_TRASHCAN_SAFE_BEGIN(self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000554
555 /* discard attributes */
556 Py_DECREF(self->tag);
Serhiy Storchaka85add472016-12-21 12:55:28 +0200557 _clear_joined_ptr(&self->text);
558 _clear_joined_ptr(&self->tail);
559
560 if (self->extra)
561 element_dealloc_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000562
563 RELEASE(sizeof(ElementObject), "destroy element");
564
565 PyObject_Del(self);
Serhiy Storchaka85add472016-12-21 12:55:28 +0200566 Py_TRASHCAN_SAFE_END(self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000567}
568
569/* -------------------------------------------------------------------- */
570/* methods (in alphabetical order) */
571
572static PyObject*
573element_append(ElementObject* self, PyObject* args)
574{
575 PyObject* element;
576 if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element))
577 return NULL;
578
579 if (element_add_subelement(self, element) < 0)
580 return NULL;
581
582 Py_RETURN_NONE;
583}
584
585static PyObject*
586element_clear(ElementObject* self, PyObject* args)
587{
588 if (!PyArg_ParseTuple(args, ":clear"))
589 return NULL;
590
591 if (self->extra) {
592 element_dealloc_extra(self);
593 self->extra = NULL;
594 }
595
596 Py_INCREF(Py_None);
597 Py_DECREF(JOIN_OBJ(self->text));
598 self->text = Py_None;
599
600 Py_INCREF(Py_None);
601 Py_DECREF(JOIN_OBJ(self->tail));
602 self->tail = Py_None;
603
604 Py_RETURN_NONE;
605}
606
607static PyObject*
608element_copy(ElementObject* self, PyObject* args)
609{
610 int i;
611 ElementObject* element;
612
613 if (!PyArg_ParseTuple(args, ":__copy__"))
614 return NULL;
615
616 element = (ElementObject*) element_new(
617 self->tag, (self->extra) ? self->extra->attrib : Py_None
618 );
619 if (!element)
620 return NULL;
621
622 Py_DECREF(JOIN_OBJ(element->text));
623 element->text = self->text;
624 Py_INCREF(JOIN_OBJ(element->text));
625
626 Py_DECREF(JOIN_OBJ(element->tail));
627 element->tail = self->tail;
628 Py_INCREF(JOIN_OBJ(element->tail));
629
630 if (self->extra) {
631
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000632 if (element_resize(element, self->extra->length) < 0) {
633 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000634 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000635 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000636
637 for (i = 0; i < self->extra->length; i++) {
638 Py_INCREF(self->extra->children[i]);
639 element->extra->children[i] = self->extra->children[i];
640 }
641
642 element->extra->length = self->extra->length;
643
644 }
645
646 return (PyObject*) element;
647}
648
649static PyObject*
650element_deepcopy(ElementObject* self, PyObject* args)
651{
652 int i;
653 ElementObject* element;
654 PyObject* tag;
655 PyObject* attrib;
656 PyObject* text;
657 PyObject* tail;
658 PyObject* id;
659
660 PyObject* memo;
661 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
662 return NULL;
663
664 tag = deepcopy(self->tag, memo);
665 if (!tag)
666 return NULL;
667
668 if (self->extra) {
669 attrib = deepcopy(self->extra->attrib, memo);
670 if (!attrib) {
671 Py_DECREF(tag);
672 return NULL;
673 }
674 } else {
675 Py_INCREF(Py_None);
676 attrib = Py_None;
677 }
678
679 element = (ElementObject*) element_new(tag, attrib);
680
681 Py_DECREF(tag);
682 Py_DECREF(attrib);
683
684 if (!element)
685 return NULL;
686
687 text = deepcopy(JOIN_OBJ(self->text), memo);
688 if (!text)
689 goto error;
690 Py_DECREF(element->text);
691 element->text = JOIN_SET(text, JOIN_GET(self->text));
692
693 tail = deepcopy(JOIN_OBJ(self->tail), memo);
694 if (!tail)
695 goto error;
696 Py_DECREF(element->tail);
697 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
698
699 if (self->extra) {
700
701 if (element_resize(element, self->extra->length) < 0)
702 goto error;
703
704 for (i = 0; i < self->extra->length; i++) {
705 PyObject* child = deepcopy(self->extra->children[i], memo);
706 if (!child) {
707 element->extra->length = i;
708 goto error;
709 }
710 element->extra->children[i] = child;
711 }
712
713 element->extra->length = self->extra->length;
714
715 }
716
717 /* add object to memo dictionary (so deepcopy won't visit it again) */
718 id = PyInt_FromLong((Py_uintptr_t) self);
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000719 if (!id)
720 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000721
722 i = PyDict_SetItem(memo, id, (PyObject*) element);
723
724 Py_DECREF(id);
725
726 if (i < 0)
727 goto error;
728
729 return (PyObject*) element;
730
731 error:
732 Py_DECREF(element);
733 return NULL;
734}
735
736LOCAL(int)
737checkpath(PyObject* tag)
738{
Neal Norwitzc7074382006-06-12 02:06:17 +0000739 Py_ssize_t i;
740 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000741
742 /* check if a tag contains an xpath character */
743
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000744#define PATHCHAR(ch) \
745 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000746
747#if defined(Py_USING_UNICODE)
748 if (PyUnicode_Check(tag)) {
749 Py_UNICODE *p = PyUnicode_AS_UNICODE(tag);
750 for (i = 0; i < PyUnicode_GET_SIZE(tag); i++) {
751 if (p[i] == '{')
752 check = 0;
753 else if (p[i] == '}')
754 check = 1;
755 else if (check && PATHCHAR(p[i]))
756 return 1;
757 }
758 return 0;
759 }
760#endif
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000761 if (PyString_Check(tag)) {
762 char *p = PyString_AS_STRING(tag);
763 for (i = 0; i < PyString_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000764 if (p[i] == '{')
765 check = 0;
766 else if (p[i] == '}')
767 check = 1;
768 else if (check && PATHCHAR(p[i]))
769 return 1;
770 }
771 return 0;
772 }
773
774 return 1; /* unknown type; might be path expression */
775}
776
777static PyObject*
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000778element_extend(ElementObject* self, PyObject* args)
779{
780 PyObject* seq;
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300781 Py_ssize_t i;
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000782
783 PyObject* seq_in;
784 if (!PyArg_ParseTuple(args, "O:extend", &seq_in))
785 return NULL;
786
787 seq = PySequence_Fast(seq_in, "");
788 if (!seq) {
789 PyErr_Format(
790 PyExc_TypeError,
791 "expected sequence, not \"%.200s\"", Py_TYPE(seq_in)->tp_name
792 );
793 return NULL;
794 }
795
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300796 for (i = 0; i < PySequence_Fast_GET_SIZE(seq); i++) {
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000797 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
798 if (element_add_subelement(self, element) < 0) {
799 Py_DECREF(seq);
800 return NULL;
801 }
802 }
803
804 Py_DECREF(seq);
805
806 Py_RETURN_NONE;
807}
808
809static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000810element_find(ElementObject* self, PyObject* args)
811{
812 int i;
813
814 PyObject* tag;
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000815 PyObject* namespaces = Py_None;
816 if (!PyArg_ParseTuple(args, "O|O:find", &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000817 return NULL;
818
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000819 if (checkpath(tag) || namespaces != Py_None)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000820 return PyObject_CallMethod(
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000821 elementpath_obj, "find", "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000822 );
823
824 if (!self->extra)
825 Py_RETURN_NONE;
826
827 for (i = 0; i < self->extra->length; i++) {
828 PyObject* item = self->extra->children[i];
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300829 int rc;
830 if (!Element_CheckExact(item))
831 continue;
832 Py_INCREF(item);
833 rc = PyObject_Compare(((ElementObject*)item)->tag, tag);
834 if (rc == 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000835 return item;
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300836 Py_DECREF(item);
837 if (rc < 0 && PyErr_Occurred())
838 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000839 }
840
841 Py_RETURN_NONE;
842}
843
844static PyObject*
845element_findtext(ElementObject* self, PyObject* args)
846{
847 int i;
848
849 PyObject* tag;
850 PyObject* default_value = Py_None;
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000851 PyObject* namespaces = Py_None;
852 if (!PyArg_ParseTuple(args, "O|OO:findtext", &tag, &default_value, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000853 return NULL;
854
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000855 if (checkpath(tag) || namespaces != Py_None)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000856 return PyObject_CallMethod(
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000857 elementpath_obj, "findtext", "OOOO", self, tag, default_value, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000858 );
859
860 if (!self->extra) {
861 Py_INCREF(default_value);
862 return default_value;
863 }
864
865 for (i = 0; i < self->extra->length; i++) {
866 ElementObject* item = (ElementObject*) self->extra->children[i];
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300867 int rc;
868 if (!Element_CheckExact(item))
869 continue;
870 Py_INCREF(item);
871 rc = PyObject_Compare(item->tag, tag);
872 if (rc == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000873 PyObject* text = element_get_text(item);
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300874 if (text == Py_None) {
875 Py_DECREF(item);
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000876 return PyString_FromString("");
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300877 }
Neal Norwitz6f5ff3f2006-08-12 01:43:40 +0000878 Py_XINCREF(text);
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300879 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000880 return text;
881 }
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300882 Py_DECREF(item);
883 if (rc < 0 && PyErr_Occurred())
884 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000885 }
886
887 Py_INCREF(default_value);
888 return default_value;
889}
890
891static PyObject*
892element_findall(ElementObject* self, PyObject* args)
893{
894 int i;
895 PyObject* out;
896
897 PyObject* tag;
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000898 PyObject* namespaces = Py_None;
899 if (!PyArg_ParseTuple(args, "O|O:findall", &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000900 return NULL;
901
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000902 if (checkpath(tag) || namespaces != Py_None)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000903 return PyObject_CallMethod(
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000904 elementpath_obj, "findall", "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000905 );
906
907 out = PyList_New(0);
908 if (!out)
909 return NULL;
910
911 if (!self->extra)
912 return out;
913
914 for (i = 0; i < self->extra->length; i++) {
915 PyObject* item = self->extra->children[i];
Serhiy Storchaka25598f32015-05-18 18:28:57 +0300916 int rc;
917 if (!Element_CheckExact(item))
918 continue;
919 Py_INCREF(item);
920 rc = PyObject_Compare(((ElementObject*)item)->tag, tag);
921 if (rc == 0)
922 rc = PyList_Append(out, item);
923 Py_DECREF(item);
924 if (rc < 0 && PyErr_Occurred()) {
925 Py_DECREF(out);
926 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000927 }
928 }
929
930 return out;
931}
932
933static PyObject*
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000934element_iterfind(ElementObject* self, PyObject* args)
935{
936 PyObject* tag;
937 PyObject* namespaces = Py_None;
938 if (!PyArg_ParseTuple(args, "O|O:iterfind", &tag, &namespaces))
939 return NULL;
940
941 return PyObject_CallMethod(
942 elementpath_obj, "iterfind", "OOO", self, tag, namespaces
943 );
944}
945
946static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000947element_get(ElementObject* self, PyObject* args)
948{
949 PyObject* value;
950
951 PyObject* key;
952 PyObject* default_value = Py_None;
953 if (!PyArg_ParseTuple(args, "O|O:get", &key, &default_value))
954 return NULL;
955
956 if (!self->extra || self->extra->attrib == Py_None)
957 value = default_value;
958 else {
959 value = PyDict_GetItem(self->extra->attrib, key);
960 if (!value)
961 value = default_value;
962 }
963
964 Py_INCREF(value);
965 return value;
966}
967
968static PyObject*
969element_getchildren(ElementObject* self, PyObject* args)
970{
971 int i;
972 PyObject* list;
973
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000974 /* FIXME: report as deprecated? */
975
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000976 if (!PyArg_ParseTuple(args, ":getchildren"))
977 return NULL;
978
979 if (!self->extra)
980 return PyList_New(0);
981
982 list = PyList_New(self->extra->length);
983 if (!list)
984 return NULL;
985
986 for (i = 0; i < self->extra->length; i++) {
987 PyObject* item = self->extra->children[i];
988 Py_INCREF(item);
989 PyList_SET_ITEM(list, i, item);
990 }
991
992 return list;
993}
994
995static PyObject*
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000996element_iter(ElementObject* self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000997{
998 PyObject* result;
999
1000 PyObject* tag = Py_None;
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001001 if (!PyArg_ParseTuple(args, "|O:iter", &tag))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001002 return NULL;
1003
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001004 if (!elementtree_iter_obj) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001005 PyErr_SetString(
1006 PyExc_RuntimeError,
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001007 "iter helper not found"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001008 );
1009 return NULL;
1010 }
1011
1012 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001013 if (!args)
1014 return NULL;
Neal Norwitz02876df2006-02-07 06:58:52 +00001015
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001016 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
1017 Py_INCREF(tag); PyTuple_SET_ITEM(args, 1, (PyObject*) tag);
1018
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001019 result = PyObject_CallObject(elementtree_iter_obj, args);
1020
1021 Py_DECREF(args);
1022
1023 return result;
1024}
1025
1026
1027static PyObject*
1028element_itertext(ElementObject* self, PyObject* args)
1029{
1030 PyObject* result;
1031
1032 if (!PyArg_ParseTuple(args, ":itertext"))
1033 return NULL;
1034
1035 if (!elementtree_itertext_obj) {
1036 PyErr_SetString(
1037 PyExc_RuntimeError,
1038 "itertext helper not found"
1039 );
1040 return NULL;
1041 }
1042
1043 args = PyTuple_New(1);
1044 if (!args)
1045 return NULL;
1046
1047 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
1048
1049 result = PyObject_CallObject(elementtree_itertext_obj, args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001050
1051 Py_DECREF(args);
1052
1053 return result;
1054}
1055
1056static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001057element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001058{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001059 ElementObject* self = (ElementObject*) self_;
1060
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001061 if (!self->extra || index < 0 || index >= self->extra->length) {
1062 PyErr_SetString(
1063 PyExc_IndexError,
1064 "child index out of range"
1065 );
1066 return NULL;
1067 }
1068
1069 Py_INCREF(self->extra->children[index]);
1070 return self->extra->children[index];
1071}
1072
1073static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001074element_insert(ElementObject* self, PyObject* args)
1075{
1076 int i;
1077
1078 int index;
1079 PyObject* element;
1080 if (!PyArg_ParseTuple(args, "iO!:insert", &index,
1081 &Element_Type, &element))
1082 return NULL;
1083
1084 if (!self->extra)
1085 element_new_extra(self, NULL);
1086
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001087 if (index < 0) {
1088 index += self->extra->length;
1089 if (index < 0)
1090 index = 0;
1091 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001092 if (index > self->extra->length)
1093 index = self->extra->length;
1094
1095 if (element_resize(self, 1) < 0)
1096 return NULL;
1097
1098 for (i = self->extra->length; i > index; i--)
1099 self->extra->children[i] = self->extra->children[i-1];
1100
1101 Py_INCREF(element);
1102 self->extra->children[index] = element;
1103
1104 self->extra->length++;
1105
1106 Py_RETURN_NONE;
1107}
1108
1109static PyObject*
1110element_items(ElementObject* self, PyObject* args)
1111{
1112 if (!PyArg_ParseTuple(args, ":items"))
1113 return NULL;
1114
1115 if (!self->extra || self->extra->attrib == Py_None)
1116 return PyList_New(0);
1117
1118 return PyDict_Items(self->extra->attrib);
1119}
1120
1121static PyObject*
1122element_keys(ElementObject* self, PyObject* args)
1123{
1124 if (!PyArg_ParseTuple(args, ":keys"))
1125 return NULL;
1126
1127 if (!self->extra || self->extra->attrib == Py_None)
1128 return PyList_New(0);
1129
1130 return PyDict_Keys(self->extra->attrib);
1131}
1132
Martin v. Löwis18e16552006-02-15 17:27:45 +00001133static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001134element_length(ElementObject* self)
1135{
1136 if (!self->extra)
1137 return 0;
1138
1139 return self->extra->length;
1140}
1141
1142static PyObject*
1143element_makeelement(PyObject* self, PyObject* args, PyObject* kw)
1144{
1145 PyObject* elem;
1146
1147 PyObject* tag;
1148 PyObject* attrib;
1149 if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib))
1150 return NULL;
1151
1152 attrib = PyDict_Copy(attrib);
1153 if (!attrib)
1154 return NULL;
1155
1156 elem = element_new(tag, attrib);
1157
1158 Py_DECREF(attrib);
1159
1160 return elem;
1161}
1162
1163static PyObject*
1164element_reduce(ElementObject* self, PyObject* args)
1165{
1166 if (!PyArg_ParseTuple(args, ":__reduce__"))
1167 return NULL;
1168
1169 /* Hack alert: This method is used to work around a __copy__
1170 problem on certain 2.3 and 2.4 versions. To save time and
1171 simplify the code, we create the copy in here, and use a dummy
1172 copyelement helper to trick the copy module into doing the
1173 right thing. */
1174
1175 if (!elementtree_copyelement_obj) {
1176 PyErr_SetString(
1177 PyExc_RuntimeError,
1178 "copyelement helper not found"
1179 );
1180 return NULL;
1181 }
1182
1183 return Py_BuildValue(
1184 "O(N)", elementtree_copyelement_obj, element_copy(self, args)
1185 );
1186}
1187
1188static PyObject*
1189element_remove(ElementObject* self, PyObject* args)
1190{
1191 int i;
Serhiy Storchaka25598f32015-05-18 18:28:57 +03001192 int rc;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001193 PyObject* element;
Serhiy Storchaka25598f32015-05-18 18:28:57 +03001194 PyObject* found;
1195
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001196 if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element))
1197 return NULL;
1198
1199 if (!self->extra) {
1200 /* element has no children, so raise exception */
1201 PyErr_SetString(
1202 PyExc_ValueError,
1203 "list.remove(x): x not in list"
1204 );
1205 return NULL;
1206 }
1207
1208 for (i = 0; i < self->extra->length; i++) {
1209 if (self->extra->children[i] == element)
1210 break;
Serhiy Storchaka25598f32015-05-18 18:28:57 +03001211 rc = PyObject_Compare(self->extra->children[i], element);
1212 if (rc == 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001213 break;
Serhiy Storchaka25598f32015-05-18 18:28:57 +03001214 if (rc < 0 && PyErr_Occurred())
1215 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001216 }
1217
Serhiy Storchaka25598f32015-05-18 18:28:57 +03001218 if (i >= self->extra->length) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001219 /* element is not in children, so raise exception */
1220 PyErr_SetString(
1221 PyExc_ValueError,
1222 "list.remove(x): x not in list"
1223 );
1224 return NULL;
1225 }
1226
Serhiy Storchaka25598f32015-05-18 18:28:57 +03001227 found = self->extra->children[i];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001228
1229 self->extra->length--;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001230 for (; i < self->extra->length; i++)
1231 self->extra->children[i] = self->extra->children[i+1];
1232
Serhiy Storchaka25598f32015-05-18 18:28:57 +03001233 Py_DECREF(found);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001234 Py_RETURN_NONE;
1235}
1236
1237static PyObject*
1238element_repr(ElementObject* self)
1239{
Serhiy Storchaka1f7586e2016-06-12 10:06:32 +03001240 int status;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001241
Serhiy Storchaka1f7586e2016-06-12 10:06:32 +03001242 if (self->tag == NULL)
1243 return PyUnicode_FromFormat("<Element at %p>", self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001244
Serhiy Storchaka1f7586e2016-06-12 10:06:32 +03001245 status = Py_ReprEnter((PyObject *)self);
1246 if (status == 0) {
1247 PyObject *repr, *tag;
1248 tag = PyObject_Repr(self->tag);
1249 if (!tag)
1250 return NULL;
Florent Xiclunae2e81e82010-03-11 15:55:11 +00001251
Serhiy Storchaka1f7586e2016-06-12 10:06:32 +03001252 repr = PyString_FromFormat("<Element %s at %p>",
1253 PyString_AS_STRING(tag), self);
Benjamin Petersond7324bc2016-12-03 11:30:04 -08001254 Py_ReprLeave((PyObject *)self);
Serhiy Storchaka1f7586e2016-06-12 10:06:32 +03001255 Py_DECREF(tag);
1256 return repr;
1257 }
1258 if (status > 0)
1259 PyErr_Format(PyExc_RuntimeError,
1260 "reentrant call inside %s.__repr__",
1261 Py_TYPE(self)->tp_name);
1262 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001263}
1264
1265static PyObject*
1266element_set(ElementObject* self, PyObject* args)
1267{
1268 PyObject* attrib;
1269
1270 PyObject* key;
1271 PyObject* value;
1272 if (!PyArg_ParseTuple(args, "OO:set", &key, &value))
1273 return NULL;
1274
1275 if (!self->extra)
1276 element_new_extra(self, NULL);
1277
1278 attrib = element_get_attrib(self);
1279 if (!attrib)
1280 return NULL;
1281
1282 if (PyDict_SetItem(attrib, key, value) < 0)
1283 return NULL;
1284
1285 Py_RETURN_NONE;
1286}
1287
1288static int
Serhiy Storchakab5b76c32015-11-26 11:21:47 +02001289element_setitem(PyObject* self_, Py_ssize_t index_, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001290{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001291 ElementObject* self = (ElementObject*) self_;
Serhiy Storchakac4c64be2015-11-25 20:12:58 +02001292 int i, index;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001293 PyObject* old;
1294
Serhiy Storchakac4c64be2015-11-25 20:12:58 +02001295 if (!self->extra || index_ < 0 || index_ >= self->extra->length) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001296 PyErr_SetString(
1297 PyExc_IndexError,
1298 "child assignment index out of range");
1299 return -1;
1300 }
Serhiy Storchakac4c64be2015-11-25 20:12:58 +02001301 index = (int)index_;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001302
1303 old = self->extra->children[index];
1304
1305 if (item) {
1306 Py_INCREF(item);
1307 self->extra->children[index] = item;
1308 } else {
1309 self->extra->length--;
1310 for (i = index; i < self->extra->length; i++)
1311 self->extra->children[i] = self->extra->children[i+1];
1312 }
1313
1314 Py_DECREF(old);
1315
1316 return 0;
1317}
1318
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001319static PyObject*
1320element_subscr(PyObject* self_, PyObject* item)
1321{
1322 ElementObject* self = (ElementObject*) self_;
1323
1324#if (PY_VERSION_HEX < 0x02050000)
1325 if (PyInt_Check(item) || PyLong_Check(item)) {
1326 long i = PyInt_AsLong(item);
1327#else
1328 if (PyIndex_Check(item)) {
1329 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1330#endif
1331
1332 if (i == -1 && PyErr_Occurred()) {
1333 return NULL;
1334 }
1335 if (i < 0 && self->extra)
1336 i += self->extra->length;
1337 return element_getitem(self_, i);
1338 }
1339 else if (PySlice_Check(item)) {
1340 Py_ssize_t start, stop, step, slicelen, cur, i;
1341 PyObject* list;
1342
1343 if (!self->extra)
1344 return PyList_New(0);
1345
1346 if (PySlice_GetIndicesEx((PySliceObject *)item,
1347 self->extra->length,
1348 &start, &stop, &step, &slicelen) < 0) {
1349 return NULL;
1350 }
1351
1352 if (slicelen <= 0)
1353 return PyList_New(0);
1354 else {
1355 list = PyList_New(slicelen);
1356 if (!list)
1357 return NULL;
1358
1359 for (cur = start, i = 0; i < slicelen;
1360 cur += step, i++) {
1361 PyObject* item = self->extra->children[cur];
1362 Py_INCREF(item);
1363 PyList_SET_ITEM(list, i, item);
1364 }
1365
1366 return list;
1367 }
1368 }
1369 else {
1370 PyErr_SetString(PyExc_TypeError,
1371 "element indices must be integers");
1372 return NULL;
1373 }
1374}
1375
1376static int
1377element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1378{
1379 ElementObject* self = (ElementObject*) self_;
1380
1381#if (PY_VERSION_HEX < 0x02050000)
1382 if (PyInt_Check(item) || PyLong_Check(item)) {
1383 long i = PyInt_AsLong(item);
1384#else
1385 if (PyIndex_Check(item)) {
1386 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1387#endif
1388
1389 if (i == -1 && PyErr_Occurred()) {
1390 return -1;
1391 }
1392 if (i < 0 && self->extra)
1393 i += self->extra->length;
1394 return element_setitem(self_, i, value);
1395 }
1396 else if (PySlice_Check(item)) {
1397 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1398
1399 PyObject* recycle = NULL;
1400 PyObject* seq = NULL;
1401
1402 if (!self->extra)
1403 element_new_extra(self, NULL);
1404
1405 if (PySlice_GetIndicesEx((PySliceObject *)item,
1406 self->extra->length,
1407 &start, &stop, &step, &slicelen) < 0) {
1408 return -1;
1409 }
Serhiy Storchakac4c64be2015-11-25 20:12:58 +02001410 assert(slicelen <= self->extra->length);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001411
1412 if (value == NULL)
1413 newlen = 0;
1414 else {
1415 seq = PySequence_Fast(value, "");
1416 if (!seq) {
1417 PyErr_Format(
1418 PyExc_TypeError,
1419 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1420 );
1421 return -1;
1422 }
1423 newlen = PySequence_Size(seq);
1424 }
1425
1426 if (step != 1 && newlen != slicelen)
1427 {
Serhiy Storchakaa0ae9ff2015-11-22 12:31:11 +02001428 Py_XDECREF(seq);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001429 PyErr_Format(PyExc_ValueError,
1430#if (PY_VERSION_HEX < 0x02050000)
1431 "attempt to assign sequence of size %d "
1432 "to extended slice of size %d",
Serhiy Storchakaa0ae9ff2015-11-22 12:31:11 +02001433 (int)newlen, (int)slicelen
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001434#else
1435 "attempt to assign sequence of size %zd "
1436 "to extended slice of size %zd",
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001437 newlen, slicelen
Serhiy Storchakaa0ae9ff2015-11-22 12:31:11 +02001438#endif
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001439 );
1440 return -1;
1441 }
1442
1443
1444 /* Resize before creating the recycle bin, to prevent refleaks. */
1445 if (newlen > slicelen) {
1446 if (element_resize(self, newlen - slicelen) < 0) {
Serhiy Storchakaa0ae9ff2015-11-22 12:31:11 +02001447 Py_XDECREF(seq);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001448 return -1;
1449 }
1450 }
Serhiy Storchakac4c64be2015-11-25 20:12:58 +02001451 assert(newlen - slicelen <= INT_MAX - self->extra->length);
1452 assert(newlen - slicelen >= -self->extra->length);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001453
1454 if (slicelen > 0) {
1455 /* to avoid recursive calls to this method (via decref), move
1456 old items to the recycle bin here, and get rid of them when
1457 we're done modifying the element */
1458 recycle = PyList_New(slicelen);
1459 if (!recycle) {
Serhiy Storchakaa0ae9ff2015-11-22 12:31:11 +02001460 Py_XDECREF(seq);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001461 return -1;
1462 }
1463 for (cur = start, i = 0; i < slicelen;
1464 cur += step, i++)
1465 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1466 }
1467
1468 if (newlen < slicelen) {
1469 /* delete slice */
1470 for (i = stop; i < self->extra->length; i++)
1471 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1472 } else if (newlen > slicelen) {
1473 /* insert slice */
1474 for (i = self->extra->length-1; i >= stop; i--)
1475 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1476 }
1477
1478 /* replace the slice */
1479 for (cur = start, i = 0; i < newlen;
1480 cur += step, i++) {
1481 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1482 Py_INCREF(element);
1483 self->extra->children[cur] = element;
1484 }
1485
Serhiy Storchakac4c64be2015-11-25 20:12:58 +02001486 self->extra->length += (int)(newlen - slicelen);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001487
Serhiy Storchakaa0ae9ff2015-11-22 12:31:11 +02001488 Py_XDECREF(seq);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001489
1490 /* discard the recycle bin, and everything in it */
1491 Py_XDECREF(recycle);
1492
1493 return 0;
1494 }
1495 else {
1496 PyErr_SetString(PyExc_TypeError,
1497 "element indices must be integers");
1498 return -1;
1499 }
1500}
1501
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001502static PyMethodDef element_methods[] = {
1503
1504 {"clear", (PyCFunction) element_clear, METH_VARARGS},
1505
1506 {"get", (PyCFunction) element_get, METH_VARARGS},
1507 {"set", (PyCFunction) element_set, METH_VARARGS},
1508
1509 {"find", (PyCFunction) element_find, METH_VARARGS},
1510 {"findtext", (PyCFunction) element_findtext, METH_VARARGS},
1511 {"findall", (PyCFunction) element_findall, METH_VARARGS},
1512
1513 {"append", (PyCFunction) element_append, METH_VARARGS},
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001514 {"extend", (PyCFunction) element_extend, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001515 {"insert", (PyCFunction) element_insert, METH_VARARGS},
1516 {"remove", (PyCFunction) element_remove, METH_VARARGS},
1517
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001518 {"iter", (PyCFunction) element_iter, METH_VARARGS},
1519 {"itertext", (PyCFunction) element_itertext, METH_VARARGS},
1520 {"iterfind", (PyCFunction) element_iterfind, METH_VARARGS},
1521
1522 {"getiterator", (PyCFunction) element_iter, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001523 {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS},
1524
1525 {"items", (PyCFunction) element_items, METH_VARARGS},
1526 {"keys", (PyCFunction) element_keys, METH_VARARGS},
1527
1528 {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS},
1529
1530 {"__copy__", (PyCFunction) element_copy, METH_VARARGS},
1531 {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS},
1532
1533 /* Some 2.3 and 2.4 versions do not handle the __copy__ method on
1534 C objects correctly, so we have to fake it using a __reduce__-
1535 based hack (see the element_reduce implementation above for
1536 details). */
1537
1538 /* The behaviour has been changed in 2.3.5 and 2.4.1, so we're
1539 using a runtime test to figure out if we need to fake things
1540 or now (see the init code below). The following entry is
1541 enabled only if the hack is needed. */
1542
1543 {"!__reduce__", (PyCFunction) element_reduce, METH_VARARGS},
1544
1545 {NULL, NULL}
1546};
1547
1548static PyObject*
1549element_getattr(ElementObject* self, char* name)
1550{
1551 PyObject* res;
1552
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001553 /* handle common attributes first */
1554 if (strcmp(name, "tag") == 0) {
1555 res = self->tag;
1556 Py_INCREF(res);
1557 return res;
1558 } else if (strcmp(name, "text") == 0) {
1559 res = element_get_text(self);
1560 Py_INCREF(res);
1561 return res;
1562 }
1563
1564 /* methods */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001565 res = Py_FindMethod(element_methods, (PyObject*) self, name);
1566 if (res)
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001567 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001568
1569 PyErr_Clear();
1570
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001571 /* less common attributes */
1572 if (strcmp(name, "tail") == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001573 res = element_get_tail(self);
1574 } else if (strcmp(name, "attrib") == 0) {
1575 if (!self->extra)
1576 element_new_extra(self, NULL);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001577 res = element_get_attrib(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001578 } else {
1579 PyErr_SetString(PyExc_AttributeError, name);
1580 return NULL;
1581 }
1582
1583 if (!res)
1584 return NULL;
1585
1586 Py_INCREF(res);
1587 return res;
1588}
1589
1590static int
1591element_setattr(ElementObject* self, const char* name, PyObject* value)
1592{
1593 if (value == NULL) {
1594 PyErr_SetString(
1595 PyExc_AttributeError,
1596 "can't delete element attributes"
1597 );
1598 return -1;
1599 }
1600
1601 if (strcmp(name, "tag") == 0) {
Serhiy Storchaka2e6c8292015-12-27 15:41:58 +02001602 Py_INCREF(value);
Serhiy Storchaka763a61c2016-04-10 18:05:12 +03001603 Py_SETREF(self->tag, value);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001604 } else if (strcmp(name, "text") == 0) {
1605 Py_DECREF(JOIN_OBJ(self->text));
1606 self->text = value;
1607 Py_INCREF(self->text);
1608 } else if (strcmp(name, "tail") == 0) {
1609 Py_DECREF(JOIN_OBJ(self->tail));
1610 self->tail = value;
1611 Py_INCREF(self->tail);
1612 } else if (strcmp(name, "attrib") == 0) {
1613 if (!self->extra)
1614 element_new_extra(self, NULL);
Serhiy Storchaka2e6c8292015-12-27 15:41:58 +02001615 Py_INCREF(value);
Serhiy Storchaka763a61c2016-04-10 18:05:12 +03001616 Py_SETREF(self->extra->attrib, value);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001617 } else {
1618 PyErr_SetString(PyExc_AttributeError, name);
1619 return -1;
1620 }
1621
1622 return 0;
1623}
1624
1625static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001626 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001627 0, /* sq_concat */
1628 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001629 element_getitem,
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001630 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001631 element_setitem,
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001632 0,
1633};
1634
1635static PyMappingMethods element_as_mapping = {
1636 (lenfunc) element_length,
1637 (binaryfunc) element_subscr,
1638 (objobjargproc) element_ass_subscr,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001639};
1640
1641statichere PyTypeObject Element_Type = {
1642 PyObject_HEAD_INIT(NULL)
1643 0, "Element", sizeof(ElementObject), 0,
1644 /* methods */
1645 (destructor)element_dealloc, /* tp_dealloc */
1646 0, /* tp_print */
1647 (getattrfunc)element_getattr, /* tp_getattr */
1648 (setattrfunc)element_setattr, /* tp_setattr */
1649 0, /* tp_compare */
1650 (reprfunc)element_repr, /* tp_repr */
1651 0, /* tp_as_number */
1652 &element_as_sequence, /* tp_as_sequence */
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001653 &element_as_mapping, /* tp_as_mapping */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001654};
1655
1656/* ==================================================================== */
1657/* the tree builder type */
1658
1659typedef struct {
1660 PyObject_HEAD
1661
1662 PyObject* root; /* root node (first created node) */
1663
1664 ElementObject* this; /* current node */
1665 ElementObject* last; /* most recently created node */
1666
1667 PyObject* data; /* data collector (string or list), or NULL */
1668
1669 PyObject* stack; /* element stack */
Neal Norwitzc7074382006-06-12 02:06:17 +00001670 Py_ssize_t index; /* current stack size (0=empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001671
1672 /* element tracing */
1673 PyObject* events; /* list of events, or NULL if not collecting */
1674 PyObject* start_event_obj; /* event objects (NULL to ignore) */
1675 PyObject* end_event_obj;
1676 PyObject* start_ns_event_obj;
1677 PyObject* end_ns_event_obj;
1678
1679} TreeBuilderObject;
1680
1681staticforward PyTypeObject TreeBuilder_Type;
1682
Christian Heimese93237d2007-12-19 02:37:44 +00001683#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001684
1685/* -------------------------------------------------------------------- */
1686/* constructor and destructor */
1687
1688LOCAL(PyObject*)
1689treebuilder_new(void)
1690{
1691 TreeBuilderObject* self;
1692
1693 self = PyObject_New(TreeBuilderObject, &TreeBuilder_Type);
1694 if (self == NULL)
1695 return NULL;
1696
1697 self->root = NULL;
1698
1699 Py_INCREF(Py_None);
1700 self->this = (ElementObject*) Py_None;
1701
1702 Py_INCREF(Py_None);
1703 self->last = (ElementObject*) Py_None;
1704
1705 self->data = NULL;
1706
1707 self->stack = PyList_New(20);
1708 self->index = 0;
1709
1710 self->events = NULL;
1711 self->start_event_obj = self->end_event_obj = NULL;
1712 self->start_ns_event_obj = self->end_ns_event_obj = NULL;
1713
1714 ALLOC(sizeof(TreeBuilderObject), "create treebuilder");
1715
1716 return (PyObject*) self;
1717}
1718
1719static PyObject*
Fredrik Lundh81707f12006-06-03 21:56:05 +00001720treebuilder(PyObject* self_, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001721{
1722 if (!PyArg_ParseTuple(args, ":TreeBuilder"))
1723 return NULL;
1724
1725 return treebuilder_new();
1726}
1727
1728static void
1729treebuilder_dealloc(TreeBuilderObject* self)
1730{
1731 Py_XDECREF(self->end_ns_event_obj);
1732 Py_XDECREF(self->start_ns_event_obj);
1733 Py_XDECREF(self->end_event_obj);
1734 Py_XDECREF(self->start_event_obj);
1735 Py_XDECREF(self->events);
1736 Py_DECREF(self->stack);
1737 Py_XDECREF(self->data);
1738 Py_DECREF(self->last);
1739 Py_DECREF(self->this);
1740 Py_XDECREF(self->root);
1741
1742 RELEASE(sizeof(TreeBuilderObject), "destroy treebuilder");
1743
1744 PyObject_Del(self);
1745}
1746
Serhiy Storchaka45cf0b72015-12-06 23:51:53 +02001747LOCAL(int)
1748treebuilder_append_event(TreeBuilderObject *self, PyObject *action,
1749 PyObject *node)
1750{
1751 if (action != NULL) {
1752 PyObject *res = PyTuple_Pack(2, action, node);
1753 if (res == NULL)
1754 return -1;
1755 if (PyList_Append(self->events, res) < 0) {
1756 Py_DECREF(res);
1757 return -1;
1758 }
1759 Py_DECREF(res);
1760 }
1761 return 0;
1762}
1763
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001764/* -------------------------------------------------------------------- */
1765/* handlers */
1766
1767LOCAL(PyObject*)
1768treebuilder_handle_xml(TreeBuilderObject* self, PyObject* encoding,
1769 PyObject* standalone)
1770{
1771 Py_RETURN_NONE;
1772}
1773
1774LOCAL(PyObject*)
1775treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
1776 PyObject* attrib)
1777{
1778 PyObject* node;
1779 PyObject* this;
1780
1781 if (self->data) {
1782 if (self->this == self->last) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001783 Py_DECREF(JOIN_OBJ(self->last->text));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001784 self->last->text = JOIN_SET(
1785 self->data, PyList_CheckExact(self->data)
1786 );
1787 } else {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001788 Py_DECREF(JOIN_OBJ(self->last->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001789 self->last->tail = JOIN_SET(
1790 self->data, PyList_CheckExact(self->data)
1791 );
1792 }
1793 self->data = NULL;
1794 }
1795
1796 node = element_new(tag, attrib);
1797 if (!node)
1798 return NULL;
1799
1800 this = (PyObject*) self->this;
1801
1802 if (this != Py_None) {
1803 if (element_add_subelement((ElementObject*) this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001804 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001805 } else {
1806 if (self->root) {
1807 PyErr_SetString(
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001808 elementtree_parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001809 "multiple elements on top level"
1810 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001811 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001812 }
1813 Py_INCREF(node);
1814 self->root = node;
1815 }
1816
1817 if (self->index < PyList_GET_SIZE(self->stack)) {
1818 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001819 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001820 Py_INCREF(this);
1821 } else {
1822 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001823 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001824 }
1825 self->index++;
1826
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001827 Py_INCREF(node);
Serhiy Storchaka763a61c2016-04-10 18:05:12 +03001828 Py_SETREF(self->this, (ElementObject*) node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001829
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001830 Py_INCREF(node);
Serhiy Storchaka763a61c2016-04-10 18:05:12 +03001831 Py_SETREF(self->last, (ElementObject*) node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001832
Serhiy Storchaka45cf0b72015-12-06 23:51:53 +02001833 if (treebuilder_append_event(self, self->start_event_obj, node) < 0)
1834 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001835
1836 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001837
1838 error:
1839 Py_DECREF(node);
1840 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001841}
1842
1843LOCAL(PyObject*)
1844treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
1845{
1846 if (!self->data) {
Fredrik Lundhdc075b92006-08-16 16:47:07 +00001847 if (self->last == (ElementObject*) Py_None) {
1848 /* ignore calls to data before the first call to start */
1849 Py_RETURN_NONE;
1850 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001851 /* store the first item as is */
1852 Py_INCREF(data); self->data = data;
1853 } else {
1854 /* more than one item; use a list to collect items */
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001855 if (PyString_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
1856 PyString_CheckExact(data) && PyString_GET_SIZE(data) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001857 /* expat often generates single character data sections; handle
1858 the most common case by resizing the existing string... */
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001859 Py_ssize_t size = PyString_GET_SIZE(self->data);
1860 if (_PyString_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001861 return NULL;
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001862 PyString_AS_STRING(self->data)[size] = PyString_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001863 } else if (PyList_CheckExact(self->data)) {
1864 if (PyList_Append(self->data, data) < 0)
1865 return NULL;
1866 } else {
1867 PyObject* list = PyList_New(2);
1868 if (!list)
1869 return NULL;
1870 PyList_SET_ITEM(list, 0, self->data);
1871 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
1872 self->data = list;
1873 }
1874 }
1875
1876 Py_RETURN_NONE;
1877}
1878
1879LOCAL(PyObject*)
1880treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
1881{
Serhiy Storchaka2e6c8292015-12-27 15:41:58 +02001882 ElementObject *item;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001883
1884 if (self->data) {
1885 if (self->this == self->last) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001886 Py_DECREF(JOIN_OBJ(self->last->text));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001887 self->last->text = JOIN_SET(
1888 self->data, PyList_CheckExact(self->data)
1889 );
1890 } else {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001891 Py_DECREF(JOIN_OBJ(self->last->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001892 self->last->tail = JOIN_SET(
1893 self->data, PyList_CheckExact(self->data)
1894 );
1895 }
1896 self->data = NULL;
1897 }
1898
1899 if (self->index == 0) {
1900 PyErr_SetString(
1901 PyExc_IndexError,
1902 "pop from empty stack"
1903 );
1904 return NULL;
1905 }
1906
Serhiy Storchaka2e6c8292015-12-27 15:41:58 +02001907 item = self->last;
1908 self->last = self->this;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001909 self->index--;
Serhiy Storchaka2e6c8292015-12-27 15:41:58 +02001910 self->this = (ElementObject *) PyList_GET_ITEM(self->stack, self->index);
1911 Py_INCREF(self->this);
1912 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001913
Serhiy Storchaka45cf0b72015-12-06 23:51:53 +02001914 if (treebuilder_append_event(self, self->end_event_obj, (PyObject*)self->last) < 0)
1915 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001916
1917 Py_INCREF(self->last);
1918 return (PyObject*) self->last;
1919}
1920
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001921/* -------------------------------------------------------------------- */
1922/* methods (in alphabetical order) */
1923
1924static PyObject*
1925treebuilder_data(TreeBuilderObject* self, PyObject* args)
1926{
1927 PyObject* data;
1928 if (!PyArg_ParseTuple(args, "O:data", &data))
1929 return NULL;
1930
1931 return treebuilder_handle_data(self, data);
1932}
1933
1934static PyObject*
1935treebuilder_end(TreeBuilderObject* self, PyObject* args)
1936{
1937 PyObject* tag;
1938 if (!PyArg_ParseTuple(args, "O:end", &tag))
1939 return NULL;
1940
1941 return treebuilder_handle_end(self, tag);
1942}
1943
1944LOCAL(PyObject*)
1945treebuilder_done(TreeBuilderObject* self)
1946{
1947 PyObject* res;
1948
1949 /* FIXME: check stack size? */
1950
1951 if (self->root)
1952 res = self->root;
1953 else
1954 res = Py_None;
1955
1956 Py_INCREF(res);
1957 return res;
1958}
1959
1960static PyObject*
1961treebuilder_close(TreeBuilderObject* self, PyObject* args)
1962{
1963 if (!PyArg_ParseTuple(args, ":close"))
1964 return NULL;
1965
1966 return treebuilder_done(self);
1967}
1968
1969static PyObject*
1970treebuilder_start(TreeBuilderObject* self, PyObject* args)
1971{
1972 PyObject* tag;
1973 PyObject* attrib = Py_None;
1974 if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib))
1975 return NULL;
1976
1977 return treebuilder_handle_start(self, tag, attrib);
1978}
1979
1980static PyObject*
1981treebuilder_xml(TreeBuilderObject* self, PyObject* args)
1982{
1983 PyObject* encoding;
1984 PyObject* standalone;
1985 if (!PyArg_ParseTuple(args, "OO:xml", &encoding, &standalone))
1986 return NULL;
1987
1988 return treebuilder_handle_xml(self, encoding, standalone);
1989}
1990
1991static PyMethodDef treebuilder_methods[] = {
1992 {"data", (PyCFunction) treebuilder_data, METH_VARARGS},
1993 {"start", (PyCFunction) treebuilder_start, METH_VARARGS},
1994 {"end", (PyCFunction) treebuilder_end, METH_VARARGS},
1995 {"xml", (PyCFunction) treebuilder_xml, METH_VARARGS},
1996 {"close", (PyCFunction) treebuilder_close, METH_VARARGS},
1997 {NULL, NULL}
1998};
1999
2000static PyObject*
2001treebuilder_getattr(TreeBuilderObject* self, char* name)
2002{
2003 return Py_FindMethod(treebuilder_methods, (PyObject*) self, name);
2004}
2005
2006statichere PyTypeObject TreeBuilder_Type = {
2007 PyObject_HEAD_INIT(NULL)
2008 0, "TreeBuilder", sizeof(TreeBuilderObject), 0,
2009 /* methods */
2010 (destructor)treebuilder_dealloc, /* tp_dealloc */
2011 0, /* tp_print */
2012 (getattrfunc)treebuilder_getattr, /* tp_getattr */
2013};
2014
2015/* ==================================================================== */
2016/* the expat interface */
2017
2018#if defined(USE_EXPAT)
2019
2020#include "expat.h"
2021
2022#if defined(USE_PYEXPAT_CAPI)
2023#include "pyexpat.h"
2024static struct PyExpat_CAPI* expat_capi;
2025#define EXPAT(func) (expat_capi->func)
2026#else
2027#define EXPAT(func) (XML_##func)
2028#endif
2029
2030typedef struct {
2031 PyObject_HEAD
2032
2033 XML_Parser parser;
2034
2035 PyObject* target;
2036 PyObject* entity;
2037
2038 PyObject* names;
2039
2040 PyObject* handle_xml;
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002041
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002042 PyObject* handle_start;
2043 PyObject* handle_data;
2044 PyObject* handle_end;
2045
2046 PyObject* handle_comment;
2047 PyObject* handle_pi;
2048
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002049 PyObject* handle_close;
2050
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002051} XMLParserObject;
2052
2053staticforward PyTypeObject XMLParser_Type;
2054
2055/* helpers */
2056
2057#if defined(Py_USING_UNICODE)
2058LOCAL(int)
2059checkstring(const char* string, int size)
2060{
2061 int i;
2062
2063 /* check if an 8-bit string contains UTF-8 characters */
2064 for (i = 0; i < size; i++)
2065 if (string[i] & 0x80)
2066 return 1;
2067
2068 return 0;
2069}
2070#endif
2071
2072LOCAL(PyObject*)
2073makestring(const char* string, int size)
2074{
2075 /* convert a UTF-8 string to either a 7-bit ascii string or a
2076 Unicode string */
2077
2078#if defined(Py_USING_UNICODE)
2079 if (checkstring(string, size))
2080 return PyUnicode_DecodeUTF8(string, size, "strict");
2081#endif
2082
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002083 return PyString_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002084}
2085
2086LOCAL(PyObject*)
2087makeuniversal(XMLParserObject* self, const char* string)
2088{
2089 /* convert a UTF-8 tag/attribute name from the expat parser
2090 to a universal name string */
2091
2092 int size = strlen(string);
2093 PyObject* key;
2094 PyObject* value;
2095
2096 /* look the 'raw' name up in the names dictionary */
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002097 key = PyString_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002098 if (!key)
2099 return NULL;
2100
2101 value = PyDict_GetItem(self->names, key);
2102
2103 if (value) {
2104 Py_INCREF(value);
2105 } else {
2106 /* new name. convert to universal name, and decode as
2107 necessary */
2108
2109 PyObject* tag;
2110 char* p;
2111 int i;
2112
2113 /* look for namespace separator */
2114 for (i = 0; i < size; i++)
2115 if (string[i] == '}')
2116 break;
2117 if (i != size) {
2118 /* convert to universal name */
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002119 tag = PyString_FromStringAndSize(NULL, size+1);
2120 p = PyString_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002121 p[0] = '{';
2122 memcpy(p+1, string, size);
2123 size++;
2124 } else {
2125 /* plain name; use key as tag */
2126 Py_INCREF(key);
2127 tag = key;
2128 }
2129
2130 /* decode universal name */
2131#if defined(Py_USING_UNICODE)
2132 /* inline makestring, to avoid duplicating the source string if
Martin Panter6a8163a2016-04-15 02:14:19 +00002133 it's not a utf-8 string */
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002134 p = PyString_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002135 if (checkstring(p, size)) {
2136 value = PyUnicode_DecodeUTF8(p, size, "strict");
2137 Py_DECREF(tag);
2138 if (!value) {
2139 Py_DECREF(key);
2140 return NULL;
2141 }
2142 } else
2143#endif
2144 value = tag; /* use tag as is */
2145
2146 /* add to names dictionary */
2147 if (PyDict_SetItem(self->names, key, value) < 0) {
2148 Py_DECREF(key);
2149 Py_DECREF(value);
2150 return NULL;
2151 }
2152 }
2153
2154 Py_DECREF(key);
2155 return value;
2156}
2157
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002158static void
2159expat_set_error(const char* message, int line, int column)
2160{
2161 PyObject *error;
2162 PyObject *position;
2163 char buffer[256];
2164
2165 sprintf(buffer, "%s: line %d, column %d", message, line, column);
2166
2167 error = PyObject_CallFunction(elementtree_parseerror_obj, "s", buffer);
2168 if (!error)
2169 return;
2170
2171 /* add position attribute */
2172 position = Py_BuildValue("(ii)", line, column);
2173 if (!position) {
2174 Py_DECREF(error);
2175 return;
2176 }
2177 if (PyObject_SetAttrString(error, "position", position) == -1) {
2178 Py_DECREF(error);
2179 Py_DECREF(position);
2180 return;
2181 }
2182 Py_DECREF(position);
2183
2184 PyErr_SetObject(elementtree_parseerror_obj, error);
2185 Py_DECREF(error);
2186}
2187
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002188/* -------------------------------------------------------------------- */
2189/* handlers */
2190
2191static void
2192expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2193 int data_len)
2194{
2195 PyObject* key;
2196 PyObject* value;
2197 PyObject* res;
2198
2199 if (data_len < 2 || data_in[0] != '&')
2200 return;
2201
2202 key = makestring(data_in + 1, data_len - 2);
2203 if (!key)
2204 return;
2205
2206 value = PyDict_GetItem(self->entity, key);
2207
2208 if (value) {
2209 if (TreeBuilder_CheckExact(self->target))
2210 res = treebuilder_handle_data(
2211 (TreeBuilderObject*) self->target, value
2212 );
2213 else if (self->handle_data)
2214 res = PyObject_CallFunction(self->handle_data, "O", value);
2215 else
2216 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002217 Py_XDECREF(res);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002218 } else if (!PyErr_Occurred()) {
2219 /* Report the first error, not the last */
2220 char message[128];
2221 sprintf(message, "undefined entity &%.100s;", PyString_AS_STRING(key));
2222 expat_set_error(
2223 message,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002224 EXPAT(GetErrorLineNumber)(self->parser),
2225 EXPAT(GetErrorColumnNumber)(self->parser)
2226 );
2227 }
2228
2229 Py_DECREF(key);
2230}
2231
2232static void
2233expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2234 const XML_Char **attrib_in)
2235{
2236 PyObject* res;
2237 PyObject* tag;
2238 PyObject* attrib;
2239 int ok;
2240
2241 /* tag name */
2242 tag = makeuniversal(self, tag_in);
2243 if (!tag)
2244 return; /* parser will look for errors */
2245
2246 /* attributes */
2247 if (attrib_in[0]) {
2248 attrib = PyDict_New();
Serhiy Storchaka33ea2972015-12-09 19:44:30 +02002249 if (!attrib) {
2250 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002251 return;
Serhiy Storchaka33ea2972015-12-09 19:44:30 +02002252 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002253 while (attrib_in[0] && attrib_in[1]) {
2254 PyObject* key = makeuniversal(self, attrib_in[0]);
2255 PyObject* value = makestring(attrib_in[1], strlen(attrib_in[1]));
2256 if (!key || !value) {
2257 Py_XDECREF(value);
2258 Py_XDECREF(key);
2259 Py_DECREF(attrib);
Serhiy Storchaka33ea2972015-12-09 19:44:30 +02002260 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002261 return;
2262 }
2263 ok = PyDict_SetItem(attrib, key, value);
2264 Py_DECREF(value);
2265 Py_DECREF(key);
2266 if (ok < 0) {
2267 Py_DECREF(attrib);
Serhiy Storchaka33ea2972015-12-09 19:44:30 +02002268 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002269 return;
2270 }
2271 attrib_in += 2;
2272 }
2273 } else {
2274 Py_INCREF(Py_None);
2275 attrib = Py_None;
2276 }
2277
2278 if (TreeBuilder_CheckExact(self->target))
2279 /* shortcut */
2280 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2281 tag, attrib);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002282 else if (self->handle_start) {
2283 if (attrib == Py_None) {
2284 Py_DECREF(attrib);
2285 attrib = PyDict_New();
Serhiy Storchaka33ea2972015-12-09 19:44:30 +02002286 if (!attrib) {
2287 Py_DECREF(tag);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002288 return;
Serhiy Storchaka33ea2972015-12-09 19:44:30 +02002289 }
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002290 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002291 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002292 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002293 res = NULL;
2294
2295 Py_DECREF(tag);
2296 Py_DECREF(attrib);
2297
2298 Py_XDECREF(res);
2299}
2300
2301static void
2302expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2303 int data_len)
2304{
2305 PyObject* data;
2306 PyObject* res;
2307
2308 data = makestring(data_in, data_len);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002309 if (!data)
2310 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002311
2312 if (TreeBuilder_CheckExact(self->target))
2313 /* shortcut */
2314 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
2315 else if (self->handle_data)
2316 res = PyObject_CallFunction(self->handle_data, "O", data);
2317 else
2318 res = NULL;
2319
2320 Py_DECREF(data);
2321
2322 Py_XDECREF(res);
2323}
2324
2325static void
2326expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
2327{
2328 PyObject* tag;
2329 PyObject* res = NULL;
2330
2331 if (TreeBuilder_CheckExact(self->target))
2332 /* shortcut */
2333 /* the standard tree builder doesn't look at the end tag */
2334 res = treebuilder_handle_end(
2335 (TreeBuilderObject*) self->target, Py_None
2336 );
2337 else if (self->handle_end) {
2338 tag = makeuniversal(self, tag_in);
2339 if (tag) {
2340 res = PyObject_CallFunction(self->handle_end, "O", tag);
2341 Py_DECREF(tag);
2342 }
2343 }
2344
2345 Py_XDECREF(res);
2346}
2347
2348static void
2349expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
2350 const XML_Char *uri)
2351{
Serhiy Storchaka45cf0b72015-12-06 23:51:53 +02002352 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
2353 PyObject *parcel;
2354 PyObject *sprefix = NULL;
2355 PyObject *suri = NULL;
2356
2357 if (PyErr_Occurred())
2358 return;
2359
2360 if (!target->events || !target->start_ns_event_obj)
2361 return;
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002362
Eli Benderskyf933e082013-11-28 06:25:45 -08002363 if (uri)
Eli Bendersky71142c42013-11-28 06:37:25 -08002364 suri = makestring(uri, strlen(uri));
Eli Benderskyf933e082013-11-28 06:25:45 -08002365 else
Eli Bendersky71142c42013-11-28 06:37:25 -08002366 suri = PyString_FromStringAndSize("", 0);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002367 if (!suri)
2368 return;
2369
2370 if (prefix)
2371 sprefix = makestring(prefix, strlen(prefix));
2372 else
2373 sprefix = PyString_FromStringAndSize("", 0);
2374 if (!sprefix) {
2375 Py_DECREF(suri);
2376 return;
2377 }
2378
Serhiy Storchaka45cf0b72015-12-06 23:51:53 +02002379 parcel = PyTuple_Pack(2, sprefix, suri);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002380 Py_DECREF(sprefix);
2381 Py_DECREF(suri);
Serhiy Storchaka45cf0b72015-12-06 23:51:53 +02002382 if (!parcel)
2383 return;
2384 treebuilder_append_event(target, target->start_ns_event_obj, parcel);
2385 Py_DECREF(parcel);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002386}
2387
2388static void
2389expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
2390{
Serhiy Storchaka45cf0b72015-12-06 23:51:53 +02002391 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
2392
2393 if (PyErr_Occurred())
2394 return;
2395
2396 if (!target->events)
2397 return;
2398
2399 treebuilder_append_event(target, target->end_ns_event_obj, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002400}
2401
2402static void
2403expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
2404{
2405 PyObject* comment;
2406 PyObject* res;
2407
2408 if (self->handle_comment) {
2409 comment = makestring(comment_in, strlen(comment_in));
2410 if (comment) {
2411 res = PyObject_CallFunction(self->handle_comment, "O", comment);
2412 Py_XDECREF(res);
2413 Py_DECREF(comment);
2414 }
2415 }
2416}
2417
2418static void
2419expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
2420 const XML_Char* data_in)
2421{
2422 PyObject* target;
2423 PyObject* data;
2424 PyObject* res;
2425
2426 if (self->handle_pi) {
2427 target = makestring(target_in, strlen(target_in));
2428 data = makestring(data_in, strlen(data_in));
2429 if (target && data) {
2430 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
2431 Py_XDECREF(res);
2432 Py_DECREF(data);
2433 Py_DECREF(target);
2434 } else {
2435 Py_XDECREF(data);
2436 Py_XDECREF(target);
2437 }
2438 }
2439}
2440
2441#if defined(Py_USING_UNICODE)
2442static int
2443expat_unknown_encoding_handler(XMLParserObject *self, const XML_Char *name,
2444 XML_Encoding *info)
2445{
2446 PyObject* u;
2447 Py_UNICODE* p;
2448 unsigned char s[256];
2449 int i;
2450
2451 memset(info, 0, sizeof(XML_Encoding));
2452
2453 for (i = 0; i < 256; i++)
2454 s[i] = i;
2455
Fredrik Lundhc3389992005-12-25 11:40:19 +00002456 u = PyUnicode_Decode((char*) s, 256, name, "replace");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002457 if (!u)
2458 return XML_STATUS_ERROR;
2459
2460 if (PyUnicode_GET_SIZE(u) != 256) {
2461 Py_DECREF(u);
Eli Benderskyb6717012013-08-04 06:09:49 -07002462 PyErr_SetString(PyExc_ValueError,
2463 "multi-byte encodings are not supported");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002464 return XML_STATUS_ERROR;
2465 }
2466
2467 p = PyUnicode_AS_UNICODE(u);
2468
2469 for (i = 0; i < 256; i++) {
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002470 if (p[i] != Py_UNICODE_REPLACEMENT_CHARACTER)
2471 info->map[i] = p[i];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002472 else
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002473 info->map[i] = -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002474 }
2475
2476 Py_DECREF(u);
2477
2478 return XML_STATUS_OK;
2479}
2480#endif
2481
2482/* -------------------------------------------------------------------- */
2483/* constructor and destructor */
2484
2485static PyObject*
Fredrik Lundh81707f12006-06-03 21:56:05 +00002486xmlparser(PyObject* self_, PyObject* args, PyObject* kw)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002487{
2488 XMLParserObject* self;
2489 /* FIXME: does this need to be static? */
2490 static XML_Memory_Handling_Suite memory_handler;
2491
2492 PyObject* target = NULL;
2493 char* encoding = NULL;
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +00002494 static char* kwlist[] = { "target", "encoding", NULL };
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002495 if (!PyArg_ParseTupleAndKeywords(args, kw, "|Oz:XMLParser", kwlist,
2496 &target, &encoding))
2497 return NULL;
2498
2499#if defined(USE_PYEXPAT_CAPI)
2500 if (!expat_capi) {
2501 PyErr_SetString(
2502 PyExc_RuntimeError, "cannot load dispatch table from pyexpat"
2503 );
2504 return NULL;
2505 }
2506#endif
2507
2508 self = PyObject_New(XMLParserObject, &XMLParser_Type);
2509 if (self == NULL)
2510 return NULL;
2511
2512 self->entity = PyDict_New();
2513 if (!self->entity) {
2514 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002515 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002516 }
2517
2518 self->names = PyDict_New();
2519 if (!self->names) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002520 PyObject_Del(self->entity);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002521 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002522 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002523 }
2524
2525 memory_handler.malloc_fcn = PyObject_Malloc;
2526 memory_handler.realloc_fcn = PyObject_Realloc;
2527 memory_handler.free_fcn = PyObject_Free;
2528
2529 self->parser = EXPAT(ParserCreate_MM)(encoding, &memory_handler, "}");
2530 if (!self->parser) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002531 PyObject_Del(self->names);
2532 PyObject_Del(self->entity);
2533 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002534 PyErr_NoMemory();
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002535 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002536 }
2537
2538 /* setup target handlers */
2539 if (!target) {
2540 target = treebuilder_new();
2541 if (!target) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002542 EXPAT(ParserFree)(self->parser);
2543 PyObject_Del(self->names);
2544 PyObject_Del(self->entity);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002545 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002546 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002547 }
2548 } else
2549 Py_INCREF(target);
2550 self->target = target;
2551
2552 self->handle_xml = PyObject_GetAttrString(target, "xml");
2553 self->handle_start = PyObject_GetAttrString(target, "start");
2554 self->handle_data = PyObject_GetAttrString(target, "data");
2555 self->handle_end = PyObject_GetAttrString(target, "end");
2556 self->handle_comment = PyObject_GetAttrString(target, "comment");
2557 self->handle_pi = PyObject_GetAttrString(target, "pi");
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002558 self->handle_close = PyObject_GetAttrString(target, "close");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002559
2560 PyErr_Clear();
2561
2562 /* configure parser */
2563 EXPAT(SetUserData)(self->parser, self);
2564 EXPAT(SetElementHandler)(
2565 self->parser,
2566 (XML_StartElementHandler) expat_start_handler,
2567 (XML_EndElementHandler) expat_end_handler
2568 );
2569 EXPAT(SetDefaultHandlerExpand)(
2570 self->parser,
2571 (XML_DefaultHandler) expat_default_handler
2572 );
2573 EXPAT(SetCharacterDataHandler)(
2574 self->parser,
2575 (XML_CharacterDataHandler) expat_data_handler
2576 );
2577 if (self->handle_comment)
2578 EXPAT(SetCommentHandler)(
2579 self->parser,
2580 (XML_CommentHandler) expat_comment_handler
2581 );
2582 if (self->handle_pi)
2583 EXPAT(SetProcessingInstructionHandler)(
2584 self->parser,
2585 (XML_ProcessingInstructionHandler) expat_pi_handler
2586 );
2587#if defined(Py_USING_UNICODE)
2588 EXPAT(SetUnknownEncodingHandler)(
2589 self->parser,
2590 (XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL
2591 );
2592#endif
2593
2594 ALLOC(sizeof(XMLParserObject), "create expatparser");
2595
2596 return (PyObject*) self;
2597}
2598
2599static void
2600xmlparser_dealloc(XMLParserObject* self)
2601{
2602 EXPAT(ParserFree)(self->parser);
2603
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002604 Py_XDECREF(self->handle_close);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002605 Py_XDECREF(self->handle_pi);
2606 Py_XDECREF(self->handle_comment);
2607 Py_XDECREF(self->handle_end);
2608 Py_XDECREF(self->handle_data);
2609 Py_XDECREF(self->handle_start);
2610 Py_XDECREF(self->handle_xml);
2611
2612 Py_DECREF(self->target);
2613 Py_DECREF(self->entity);
2614 Py_DECREF(self->names);
2615
2616 RELEASE(sizeof(XMLParserObject), "destroy expatparser");
2617
2618 PyObject_Del(self);
2619}
2620
2621/* -------------------------------------------------------------------- */
2622/* methods (in alphabetical order) */
2623
2624LOCAL(PyObject*)
2625expat_parse(XMLParserObject* self, char* data, int data_len, int final)
2626{
2627 int ok;
2628
2629 ok = EXPAT(Parse)(self->parser, data, data_len, final);
2630
2631 if (PyErr_Occurred())
2632 return NULL;
2633
2634 if (!ok) {
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002635 expat_set_error(
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002636 EXPAT(ErrorString)(EXPAT(GetErrorCode)(self->parser)),
2637 EXPAT(GetErrorLineNumber)(self->parser),
2638 EXPAT(GetErrorColumnNumber)(self->parser)
2639 );
2640 return NULL;
2641 }
2642
2643 Py_RETURN_NONE;
2644}
2645
2646static PyObject*
2647xmlparser_close(XMLParserObject* self, PyObject* args)
2648{
2649 /* end feeding data to parser */
2650
2651 PyObject* res;
2652 if (!PyArg_ParseTuple(args, ":close"))
2653 return NULL;
2654
2655 res = expat_parse(self, "", 0, 1);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002656 if (!res)
2657 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002658
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002659 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002660 Py_DECREF(res);
2661 return treebuilder_done((TreeBuilderObject*) self->target);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002662 } if (self->handle_close) {
2663 Py_DECREF(res);
2664 return PyObject_CallFunction(self->handle_close, "");
2665 } else
2666 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002667}
2668
2669static PyObject*
2670xmlparser_feed(XMLParserObject* self, PyObject* args)
2671{
2672 /* feed data to parser */
2673
2674 char* data;
2675 int data_len;
2676 if (!PyArg_ParseTuple(args, "s#:feed", &data, &data_len))
2677 return NULL;
2678
2679 return expat_parse(self, data, data_len, 0);
2680}
2681
2682static PyObject*
2683xmlparser_parse(XMLParserObject* self, PyObject* args)
2684{
2685 /* (internal) parse until end of input stream */
2686
2687 PyObject* reader;
2688 PyObject* buffer;
2689 PyObject* res;
2690
2691 PyObject* fileobj;
2692 if (!PyArg_ParseTuple(args, "O:_parse", &fileobj))
2693 return NULL;
2694
2695 reader = PyObject_GetAttrString(fileobj, "read");
2696 if (!reader)
2697 return NULL;
2698
2699 /* read from open file object */
2700 for (;;) {
2701
2702 buffer = PyObject_CallFunction(reader, "i", 64*1024);
2703
2704 if (!buffer) {
2705 /* read failed (e.g. due to KeyboardInterrupt) */
2706 Py_DECREF(reader);
2707 return NULL;
2708 }
2709
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002710 if (!PyString_CheckExact(buffer) || PyString_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002711 Py_DECREF(buffer);
2712 break;
2713 }
2714
Serhiy Storchakac4c64be2015-11-25 20:12:58 +02002715 if (PyString_GET_SIZE(buffer) > INT_MAX) {
2716 Py_DECREF(buffer);
2717 Py_DECREF(reader);
2718 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
2719 return NULL;
2720 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002721 res = expat_parse(
Serhiy Storchakac4c64be2015-11-25 20:12:58 +02002722 self, PyString_AS_STRING(buffer), (int)PyString_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002723 );
2724
2725 Py_DECREF(buffer);
2726
2727 if (!res) {
2728 Py_DECREF(reader);
2729 return NULL;
2730 }
2731 Py_DECREF(res);
2732
2733 }
2734
2735 Py_DECREF(reader);
2736
2737 res = expat_parse(self, "", 0, 1);
2738
2739 if (res && TreeBuilder_CheckExact(self->target)) {
2740 Py_DECREF(res);
2741 return treebuilder_done((TreeBuilderObject*) self->target);
2742 }
2743
2744 return res;
2745}
2746
2747static PyObject*
2748xmlparser_setevents(XMLParserObject* self, PyObject* args)
2749{
2750 /* activate element event reporting */
2751
Neal Norwitzc7074382006-06-12 02:06:17 +00002752 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002753 TreeBuilderObject* target;
2754
2755 PyObject* events; /* event collector */
2756 PyObject* event_set = Py_None;
2757 if (!PyArg_ParseTuple(args, "O!|O:_setevents", &PyList_Type, &events,
2758 &event_set))
2759 return NULL;
2760
2761 if (!TreeBuilder_CheckExact(self->target)) {
2762 PyErr_SetString(
2763 PyExc_TypeError,
2764 "event handling only supported for cElementTree.Treebuilder "
2765 "targets"
2766 );
2767 return NULL;
2768 }
2769
2770 target = (TreeBuilderObject*) self->target;
2771
2772 Py_INCREF(events);
Serhiy Storchakabc62af12016-04-06 09:51:18 +03002773 Py_XSETREF(target->events, events);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002774
2775 /* clear out existing events */
Serhiy Storchaka98a97222014-02-09 13:14:04 +02002776 Py_CLEAR(target->start_event_obj);
2777 Py_CLEAR(target->end_event_obj);
2778 Py_CLEAR(target->start_ns_event_obj);
2779 Py_CLEAR(target->end_ns_event_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002780
2781 if (event_set == Py_None) {
2782 /* default is "end" only */
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002783 target->end_event_obj = PyString_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002784 Py_RETURN_NONE;
2785 }
2786
2787 if (!PyTuple_Check(event_set)) /* FIXME: handle arbitrary sequences */
2788 goto error;
2789
2790 for (i = 0; i < PyTuple_GET_SIZE(event_set); i++) {
2791 PyObject* item = PyTuple_GET_ITEM(event_set, i);
2792 char* event;
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002793 if (!PyString_Check(item))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002794 goto error;
Serhiy Storchaka20a003b2015-12-24 11:51:24 +02002795 Py_INCREF(item);
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002796 event = PyString_AS_STRING(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002797 if (strcmp(event, "start") == 0) {
Serhiy Storchakabc62af12016-04-06 09:51:18 +03002798 Py_XSETREF(target->start_event_obj, item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002799 } else if (strcmp(event, "end") == 0) {
Serhiy Storchakabc62af12016-04-06 09:51:18 +03002800 Py_XSETREF(target->end_event_obj, item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002801 } else if (strcmp(event, "start-ns") == 0) {
Serhiy Storchakabc62af12016-04-06 09:51:18 +03002802 Py_XSETREF(target->start_ns_event_obj, item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002803 EXPAT(SetNamespaceDeclHandler)(
2804 self->parser,
2805 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2806 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2807 );
2808 } else if (strcmp(event, "end-ns") == 0) {
Serhiy Storchakabc62af12016-04-06 09:51:18 +03002809 Py_XSETREF(target->end_ns_event_obj, item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002810 EXPAT(SetNamespaceDeclHandler)(
2811 self->parser,
2812 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2813 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2814 );
2815 } else {
Serhiy Storchaka20a003b2015-12-24 11:51:24 +02002816 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002817 PyErr_Format(
2818 PyExc_ValueError,
2819 "unknown event '%s'", event
2820 );
2821 return NULL;
2822 }
2823 }
2824
2825 Py_RETURN_NONE;
2826
2827 error:
2828 PyErr_SetString(
2829 PyExc_TypeError,
2830 "invalid event tuple"
2831 );
2832 return NULL;
2833}
2834
2835static PyMethodDef xmlparser_methods[] = {
2836 {"feed", (PyCFunction) xmlparser_feed, METH_VARARGS},
2837 {"close", (PyCFunction) xmlparser_close, METH_VARARGS},
2838 {"_parse", (PyCFunction) xmlparser_parse, METH_VARARGS},
2839 {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
2840 {NULL, NULL}
2841};
2842
2843static PyObject*
2844xmlparser_getattr(XMLParserObject* self, char* name)
2845{
2846 PyObject* res;
2847
2848 res = Py_FindMethod(xmlparser_methods, (PyObject*) self, name);
2849 if (res)
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002850 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002851
2852 PyErr_Clear();
2853
2854 if (strcmp(name, "entity") == 0)
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002855 res = self->entity;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002856 else if (strcmp(name, "target") == 0)
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002857 res = self->target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002858 else if (strcmp(name, "version") == 0) {
2859 char buffer[100];
2860 sprintf(buffer, "Expat %d.%d.%d", XML_MAJOR_VERSION,
2861 XML_MINOR_VERSION, XML_MICRO_VERSION);
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002862 return PyString_FromString(buffer);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002863 } else {
2864 PyErr_SetString(PyExc_AttributeError, name);
2865 return NULL;
2866 }
2867
2868 Py_INCREF(res);
2869 return res;
2870}
2871
2872statichere PyTypeObject XMLParser_Type = {
2873 PyObject_HEAD_INIT(NULL)
2874 0, "XMLParser", sizeof(XMLParserObject), 0,
2875 /* methods */
2876 (destructor)xmlparser_dealloc, /* tp_dealloc */
2877 0, /* tp_print */
2878 (getattrfunc)xmlparser_getattr, /* tp_getattr */
2879};
2880
2881#endif
2882
2883/* ==================================================================== */
2884/* python module interface */
2885
2886static PyMethodDef _functions[] = {
2887 {"Element", (PyCFunction) element, METH_VARARGS|METH_KEYWORDS},
2888 {"SubElement", (PyCFunction) subelement, METH_VARARGS|METH_KEYWORDS},
2889 {"TreeBuilder", (PyCFunction) treebuilder, METH_VARARGS},
2890#if defined(USE_EXPAT)
2891 {"XMLParser", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
2892 {"XMLTreeBuilder", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
2893#endif
2894 {NULL, NULL}
2895};
2896
2897DL_EXPORT(void)
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002898init_elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002899{
2900 PyObject* m;
2901 PyObject* g;
2902 char* bootstrap;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002903
2904 /* Patch object type */
Christian Heimese93237d2007-12-19 02:37:44 +00002905 Py_TYPE(&Element_Type) = Py_TYPE(&TreeBuilder_Type) = &PyType_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002906#if defined(USE_EXPAT)
Christian Heimese93237d2007-12-19 02:37:44 +00002907 Py_TYPE(&XMLParser_Type) = &PyType_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002908#endif
2909
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002910 m = Py_InitModule("_elementtree", _functions);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002911 if (!m)
2912 return;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002913
2914 /* python glue code */
2915
2916 g = PyDict_New();
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002917 if (!g)
2918 return;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002919
2920 PyDict_SetItemString(g, "__builtins__", PyEval_GetBuiltins());
2921
2922 bootstrap = (
2923
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002924 "from copy import copy, deepcopy\n"
2925
2926 "try:\n"
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002927 " from xml.etree import ElementTree\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002928 "except ImportError:\n"
2929 " import ElementTree\n"
2930 "ET = ElementTree\n"
2931 "del ElementTree\n"
2932
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002933 "import _elementtree as cElementTree\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002934
2935 "try:\n" /* check if copy works as is */
2936 " copy(cElementTree.Element('x'))\n"
2937 "except:\n"
2938 " def copyelement(elem):\n"
2939 " return elem\n"
2940
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002941 "class CommentProxy:\n"
2942 " def __call__(self, text=None):\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002943 " element = cElementTree.Element(ET.Comment)\n"
2944 " element.text = text\n"
2945 " return element\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002946 " def __cmp__(self, other):\n"
2947 " return cmp(ET.Comment, other)\n"
2948 "cElementTree.Comment = CommentProxy()\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002949
2950 "class ElementTree(ET.ElementTree):\n" /* public */
2951 " def parse(self, source, parser=None):\n"
Florent Xicluna67d5d0e2011-10-29 03:38:56 +02002952 " close_source = False\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002953 " if not hasattr(source, 'read'):\n"
2954 " source = open(source, 'rb')\n"
Florent Xicluna67d5d0e2011-10-29 03:38:56 +02002955 " close_source = False\n"
2956 " try:\n"
2957 " if parser is not None:\n"
2958 " while 1:\n"
2959 " data = source.read(65536)\n"
2960 " if not data:\n"
2961 " break\n"
2962 " parser.feed(data)\n"
2963 " self._root = parser.close()\n"
2964 " else:\n"
2965 " parser = cElementTree.XMLParser()\n"
2966 " self._root = parser._parse(source)\n"
2967 " return self._root\n"
2968 " finally:\n"
2969 " if close_source:\n"
2970 " source.close()\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002971 "cElementTree.ElementTree = ElementTree\n"
2972
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002973 "def iter(node, tag=None):\n" /* helper */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002974 " if tag == '*':\n"
2975 " tag = None\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002976 " if tag is None or node.tag == tag:\n"
2977 " yield node\n"
2978 " for node in node:\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002979 " for node in iter(node, tag):\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002980 " yield node\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002981
2982 "def itertext(node):\n" /* helper */
2983 " if node.text:\n"
2984 " yield node.text\n"
2985 " for e in node:\n"
2986 " for s in e.itertext():\n"
2987 " yield s\n"
2988 " if e.tail:\n"
2989 " yield e.tail\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002990
2991 "def parse(source, parser=None):\n" /* public */
2992 " tree = ElementTree()\n"
2993 " tree.parse(source, parser)\n"
2994 " return tree\n"
2995 "cElementTree.parse = parse\n"
2996
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002997 "class iterparse(object):\n"
2998 " root = None\n"
2999 " def __init__(self, file, events=None):\n"
Florent Xicluna67d5d0e2011-10-29 03:38:56 +02003000 " self._close_file = False\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003001 " if not hasattr(file, 'read'):\n"
3002 " file = open(file, 'rb')\n"
Florent Xicluna67d5d0e2011-10-29 03:38:56 +02003003 " self._close_file = True\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003004 " self._file = file\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003005 " self._events = []\n"
3006 " self._index = 0\n"
Florent Xicluna0965ee22011-11-01 23:34:41 +01003007 " self._error = None\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003008 " self.root = self._root = None\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003009 " b = cElementTree.TreeBuilder()\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003010 " self._parser = cElementTree.XMLParser(b)\n"
3011 " self._parser._setevents(self._events, events)\n"
3012 " def next(self):\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003013 " while 1:\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003014 " try:\n"
3015 " item = self._events[self._index]\n"
Florent Xicluna0965ee22011-11-01 23:34:41 +01003016 " self._index += 1\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003017 " return item\n"
Florent Xicluna0965ee22011-11-01 23:34:41 +01003018 " except IndexError:\n"
3019 " pass\n"
3020 " if self._error:\n"
3021 " e = self._error\n"
3022 " self._error = None\n"
3023 " raise e\n"
3024 " if self._parser is None:\n"
3025 " self.root = self._root\n"
3026 " if self._close_file:\n"
3027 " self._file.close()\n"
3028 " raise StopIteration\n"
3029 " # load event buffer\n"
3030 " del self._events[:]\n"
3031 " self._index = 0\n"
3032 " data = self._file.read(16384)\n"
3033 " if data:\n"
3034 " try:\n"
3035 " self._parser.feed(data)\n"
3036 " except SyntaxError as exc:\n"
3037 " self._error = exc\n"
3038 " else:\n"
3039 " self._root = self._parser.close()\n"
3040 " self._parser = None\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003041 " def __iter__(self):\n"
3042 " return self\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003043 "cElementTree.iterparse = iterparse\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003044
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003045 "class PIProxy:\n"
3046 " def __call__(self, target, text=None):\n"
3047 " element = cElementTree.Element(ET.PI)\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003048 " element.text = target\n"
3049 " if text:\n"
3050 " element.text = element.text + ' ' + text\n"
3051 " return element\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003052 " def __cmp__(self, other):\n"
3053 " return cmp(ET.PI, other)\n"
3054 "cElementTree.PI = cElementTree.ProcessingInstruction = PIProxy()\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003055
3056 "def XML(text):\n" /* public */
3057 " parser = cElementTree.XMLParser()\n"
3058 " parser.feed(text)\n"
3059 " return parser.close()\n"
3060 "cElementTree.XML = cElementTree.fromstring = XML\n"
3061
3062 "def XMLID(text):\n" /* public */
3063 " tree = XML(text)\n"
3064 " ids = {}\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003065 " for elem in tree.iter():\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003066 " id = elem.get('id')\n"
3067 " if id:\n"
3068 " ids[id] = elem\n"
3069 " return tree, ids\n"
3070 "cElementTree.XMLID = XMLID\n"
3071
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003072 "try:\n"
3073 " register_namespace = ET.register_namespace\n"
3074 "except AttributeError:\n"
3075 " def register_namespace(prefix, uri):\n"
3076 " ET._namespace_map[uri] = prefix\n"
3077 "cElementTree.register_namespace = register_namespace\n"
3078
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003079 "cElementTree.dump = ET.dump\n"
3080 "cElementTree.ElementPath = ElementPath = ET.ElementPath\n"
3081 "cElementTree.iselement = ET.iselement\n"
3082 "cElementTree.QName = ET.QName\n"
3083 "cElementTree.tostring = ET.tostring\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003084 "cElementTree.fromstringlist = ET.fromstringlist\n"
3085 "cElementTree.tostringlist = ET.tostringlist\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003086 "cElementTree.VERSION = '" VERSION "'\n"
3087 "cElementTree.__version__ = '" VERSION "'\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003088
3089 );
3090
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003091 if (!PyRun_String(bootstrap, Py_file_input, g, NULL))
3092 return;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003093
3094 elementpath_obj = PyDict_GetItemString(g, "ElementPath");
3095
3096 elementtree_copyelement_obj = PyDict_GetItemString(g, "copyelement");
3097 if (elementtree_copyelement_obj) {
3098 /* reduce hack needed; enable reduce method */
3099 PyMethodDef* mp;
3100 for (mp = element_methods; mp->ml_name; mp++)
3101 if (mp->ml_meth == (PyCFunction) element_reduce) {
3102 mp->ml_name = "__reduce__";
3103 break;
3104 }
3105 } else
3106 PyErr_Clear();
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003107
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003108 elementtree_deepcopy_obj = PyDict_GetItemString(g, "deepcopy");
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003109 elementtree_iter_obj = PyDict_GetItemString(g, "iter");
3110 elementtree_itertext_obj = PyDict_GetItemString(g, "itertext");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003111
3112#if defined(USE_PYEXPAT_CAPI)
3113 /* link against pyexpat, if possible */
Larry Hastings402b73f2010-03-25 00:54:54 +00003114 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003115 if (expat_capi) {
3116 /* check that it's usable */
3117 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
3118 expat_capi->size < sizeof(struct PyExpat_CAPI) ||
3119 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
3120 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
3121 expat_capi->MICRO_VERSION != XML_MICRO_VERSION)
3122 expat_capi = NULL;
3123 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003124#endif
3125
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003126 elementtree_parseerror_obj = PyErr_NewException(
3127 "cElementTree.ParseError", PyExc_SyntaxError, NULL
3128 );
3129 Py_INCREF(elementtree_parseerror_obj);
3130 PyModule_AddObject(m, "ParseError", elementtree_parseerror_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003131}