blob: 348fefcf525ea357b0d919449bfb99211fb17ec6 [file] [log] [blame]
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001/*
2 * ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003 * $Id: _elementtree.c 3473 2009-01-11 22:53:55Z fredrik $
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
5 * elementtree accelerator
6 *
7 * History:
8 * 1999-06-20 fl created (as part of sgmlop)
9 * 2001-05-29 fl effdom edition
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000010 * 2003-02-27 fl elementtree edition (alpha)
11 * 2004-06-03 fl updates for elementtree 1.2
Florent Xiclunaf15351d2010-03-13 23:24:31 +000012 * 2005-01-05 fl major optimization effort
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000013 * 2005-01-11 fl first public release (cElementTree 0.8)
14 * 2005-01-12 fl split element object into base and extras
15 * 2005-01-13 fl use tagged pointers for tail/text (cElementTree 0.9)
16 * 2005-01-17 fl added treebuilder close method
17 * 2005-01-17 fl fixed crash in getchildren
18 * 2005-01-18 fl removed observer api, added iterparse (cElementTree 0.9.3)
19 * 2005-01-23 fl revised iterparse api; added namespace event support (0.9.8)
20 * 2005-01-26 fl added VERSION module property (cElementTree 1.0)
21 * 2005-01-28 fl added remove method (1.0.1)
22 * 2005-03-01 fl added iselement function; fixed makeelement aliasing (1.0.2)
23 * 2005-03-13 fl export Comment and ProcessingInstruction/PI helpers
24 * 2005-03-26 fl added Comment and PI support to XMLParser
25 * 2005-03-27 fl event optimizations; complain about bogus events
26 * 2005-08-08 fl fixed read error handling in parse
27 * 2005-08-11 fl added runtime test for copy workaround (1.0.3)
28 * 2005-12-13 fl added expat_capi support (for xml.etree) (1.0.4)
29 * 2005-12-16 fl added support for non-standard encodings
Fredrik Lundh44ed4db2006-03-12 21:06:35 +000030 * 2006-03-08 fl fixed a couple of potential null-refs and leaks
31 * 2006-03-12 fl merge in 2.5 ssize_t changes
Florent Xiclunaf15351d2010-03-13 23:24:31 +000032 * 2007-08-25 fl call custom builder's close method from XMLParser
33 * 2007-08-31 fl added iter, extend from ET 1.3
34 * 2007-09-01 fl fixed ParseError exception, setslice source type, etc
35 * 2007-09-03 fl fixed handling of negative insert indexes
36 * 2007-09-04 fl added itertext from ET 1.3
37 * 2007-09-06 fl added position attribute to ParseError exception
38 * 2008-06-06 fl delay error reporting in iterparse (from Hrvoje Niksic)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000039 *
Florent Xiclunaf15351d2010-03-13 23:24:31 +000040 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
41 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000042 *
43 * info@pythonware.com
44 * http://www.pythonware.com
45 */
46
Fredrik Lundh6d52b552005-12-16 22:06:43 +000047/* Licensed to PSF under a Contributor Agreement. */
Florent Xiclunaf15351d2010-03-13 23:24:31 +000048/* See http://www.python.org/psf/license for licensing details. */
Fredrik Lundh6d52b552005-12-16 22:06:43 +000049
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000050#include "Python.h"
51
Thomas Wouters00ee7ba2006-08-21 19:07:27 +000052#define VERSION "1.0.6"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000053
54/* -------------------------------------------------------------------- */
55/* configuration */
56
57/* Leave defined to include the expat-based XMLParser type */
58#define USE_EXPAT
59
Florent Xiclunaf15351d2010-03-13 23:24:31 +000060/* Define to do all expat calls via pyexpat's embedded expat library */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000061/* #define USE_PYEXPAT_CAPI */
62
63/* An element can hold this many children without extra memory
64 allocations. */
65#define STATIC_CHILDREN 4
66
67/* For best performance, chose a value so that 80-90% of all nodes
68 have no more than the given number of children. Set this to zero
69 to minimize the size of the element structure itself (this only
70 helps if you have lots of leaf nodes with attributes). */
71
72/* Also note that pymalloc always allocates blocks in multiples of
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010073 eight bytes. For the current C version of ElementTree, this means
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000074 that the number of children should be an even number, at least on
75 32-bit platforms. */
76
77/* -------------------------------------------------------------------- */
78
79#if 0
80static int memory = 0;
81#define ALLOC(size, comment)\
82do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
83#define RELEASE(size, comment)\
84do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
85#else
86#define ALLOC(size, comment)
87#define RELEASE(size, comment)
88#endif
89
90/* compiler tweaks */
91#if defined(_MSC_VER)
92#define LOCAL(type) static __inline type __fastcall
93#else
94#define LOCAL(type) static type
95#endif
96
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000097/* macros used to store 'join' flags in string object pointers. note
98 that all use of text and tail as object pointers must be wrapped in
99 JOIN_OBJ. see comments in the ElementObject definition for more
100 info. */
101#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
102#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
103#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~1))
104
105/* glue functions (see the init function for details) */
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000106static PyObject* elementtree_parseerror_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000107static PyObject* elementtree_deepcopy_obj;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000108static PyObject* elementtree_iter_obj;
109static PyObject* elementtree_itertext_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000110static PyObject* elementpath_obj;
111
112/* helpers */
113
114LOCAL(PyObject*)
115deepcopy(PyObject* object, PyObject* memo)
116{
117 /* do a deep copy of the given object */
118
119 PyObject* args;
120 PyObject* result;
121
122 if (!elementtree_deepcopy_obj) {
123 PyErr_SetString(
124 PyExc_RuntimeError,
125 "deepcopy helper not found"
126 );
127 return NULL;
128 }
129
130 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000131 if (!args)
132 return NULL;
133
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000134 Py_INCREF(object); PyTuple_SET_ITEM(args, 0, (PyObject*) object);
135 Py_INCREF(memo); PyTuple_SET_ITEM(args, 1, (PyObject*) memo);
136
137 result = PyObject_CallObject(elementtree_deepcopy_obj, args);
138
139 Py_DECREF(args);
140
141 return result;
142}
143
144LOCAL(PyObject*)
145list_join(PyObject* list)
146{
147 /* join list elements (destroying the list in the process) */
148
149 PyObject* joiner;
150 PyObject* function;
151 PyObject* args;
152 PyObject* result;
153
154 switch (PyList_GET_SIZE(list)) {
155 case 0:
156 Py_DECREF(list);
Christian Heimes72b710a2008-05-26 13:28:38 +0000157 return PyBytes_FromString("");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000158 case 1:
159 result = PyList_GET_ITEM(list, 0);
160 Py_INCREF(result);
161 Py_DECREF(list);
162 return result;
163 }
164
165 /* two or more elements: slice out a suitable separator from the
166 first member, and use that to join the entire list */
167
168 joiner = PySequence_GetSlice(PyList_GET_ITEM(list, 0), 0, 0);
169 if (!joiner)
170 return NULL;
171
172 function = PyObject_GetAttrString(joiner, "join");
173 if (!function) {
174 Py_DECREF(joiner);
175 return NULL;
176 }
177
178 args = PyTuple_New(1);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000179 if (!args)
180 return NULL;
181
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000182 PyTuple_SET_ITEM(args, 0, list);
183
184 result = PyObject_CallObject(function, args);
185
186 Py_DECREF(args); /* also removes list */
187 Py_DECREF(function);
188 Py_DECREF(joiner);
189
190 return result;
191}
192
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000193/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200194/* the Element type */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000195
196typedef struct {
197
198 /* attributes (a dictionary object), or None if no attributes */
199 PyObject* attrib;
200
201 /* child elements */
202 int length; /* actual number of items */
203 int allocated; /* allocated items */
204
205 /* this either points to _children or to a malloced buffer */
206 PyObject* *children;
207
208 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100209
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000210} ElementObjectExtra;
211
212typedef struct {
213 PyObject_HEAD
214
215 /* element tag (a string). */
216 PyObject* tag;
217
218 /* text before first child. note that this is a tagged pointer;
219 use JOIN_OBJ to get the object pointer. the join flag is used
220 to distinguish lists created by the tree builder from lists
221 assigned to the attribute by application code; the former
222 should be joined before being returned to the user, the latter
223 should be left intact. */
224 PyObject* text;
225
226 /* text after this element, in parent. note that this is a tagged
227 pointer; use JOIN_OBJ to get the object pointer. */
228 PyObject* tail;
229
230 ElementObjectExtra* extra;
231
232} ElementObject;
233
Neal Norwitz227b5332006-03-22 09:28:35 +0000234static PyTypeObject Element_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000235
Christian Heimes90aa7642007-12-19 02:45:37 +0000236#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000237
238/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200239/* Element constructors and destructor */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000240
241LOCAL(int)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200242create_extra(ElementObject* self, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000243{
244 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
245 if (!self->extra)
246 return -1;
247
248 if (!attrib)
249 attrib = Py_None;
250
251 Py_INCREF(attrib);
252 self->extra->attrib = attrib;
253
254 self->extra->length = 0;
255 self->extra->allocated = STATIC_CHILDREN;
256 self->extra->children = self->extra->_children;
257
258 return 0;
259}
260
261LOCAL(void)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200262dealloc_extra(ElementObject* self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000263{
264 int i;
265
266 Py_DECREF(self->extra->attrib);
267
268 for (i = 0; i < self->extra->length; i++)
269 Py_DECREF(self->extra->children[i]);
270
271 if (self->extra->children != self->extra->_children)
272 PyObject_Free(self->extra->children);
273
274 PyObject_Free(self->extra);
275}
276
Eli Bendersky092af1f2012-03-04 07:14:03 +0200277/* Convenience internal function to create new Element objects with the given
278 * tag and attributes.
279*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000280LOCAL(PyObject*)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200281create_new_element(PyObject* tag, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000282{
283 ElementObject* self;
284
285 self = PyObject_New(ElementObject, &Element_Type);
286 if (self == NULL)
287 return NULL;
288
289 /* use None for empty dictionaries */
290 if (PyDict_CheckExact(attrib) && !PyDict_Size(attrib))
291 attrib = Py_None;
292
293 self->extra = NULL;
294
295 if (attrib != Py_None) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200296 if (create_extra(self, attrib) < 0) {
Thomas Wouters477c8d52006-05-27 19:21:47 +0000297 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000298 return NULL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000299 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000300 }
301
302 Py_INCREF(tag);
303 self->tag = tag;
304
305 Py_INCREF(Py_None);
306 self->text = Py_None;
307
308 Py_INCREF(Py_None);
309 self->tail = Py_None;
310
311 ALLOC(sizeof(ElementObject), "create element");
312
313 return (PyObject*) self;
314}
315
Eli Bendersky092af1f2012-03-04 07:14:03 +0200316static PyObject *
317element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
318{
319 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
320 if (e != NULL) {
321 Py_INCREF(Py_None);
322 e->tag = Py_None;
323
324 Py_INCREF(Py_None);
325 e->text = Py_None;
326
327 Py_INCREF(Py_None);
328 e->tail = Py_None;
329
330 e->extra = NULL;
331 }
332 return (PyObject *)e;
333}
334
335static int
336element_init(PyObject *self, PyObject *args, PyObject *kwds)
337{
338 PyObject *tag;
339 PyObject *tmp;
340 PyObject *attrib = NULL;
341 ElementObject *self_elem;
342
343 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
344 return -1;
345
346 if (attrib || kwds) {
347 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
348 if (!attrib)
349 return -1;
350 if (kwds)
351 PyDict_Update(attrib, kwds);
352 } else {
353 Py_INCREF(Py_None);
354 attrib = Py_None;
355 }
356
357 self_elem = (ElementObject *)self;
358
359 /* Use None for empty dictionaries */
360 if (PyDict_CheckExact(attrib) && PyDict_Size(attrib) == 0) {
361 Py_INCREF(Py_None);
362 attrib = Py_None;
363 }
364
365 if (attrib != Py_None) {
366 if (create_extra(self_elem, attrib) < 0) {
367 PyObject_Del(self_elem);
368 return -1;
369 }
370 }
371
372 /* If create_extra needed attrib, it took a reference to it, so we can
373 * release ours anyway.
374 */
375 Py_DECREF(attrib);
376
377 /* Replace the objects already pointed to by tag, text and tail. */
378 tmp = self_elem->tag;
379 self_elem->tag = tag;
380 Py_INCREF(tag);
381 Py_DECREF(tmp);
382
383 tmp = self_elem->text;
384 self_elem->text = Py_None;
385 Py_INCREF(Py_None);
386 Py_DECREF(JOIN_OBJ(tmp));
387
388 tmp = self_elem->tail;
389 self_elem->tail = Py_None;
390 Py_INCREF(Py_None);
391 Py_DECREF(JOIN_OBJ(tmp));
392
393 return 0;
394}
395
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000396LOCAL(int)
397element_resize(ElementObject* self, int extra)
398{
399 int size;
400 PyObject* *children;
401
402 /* make sure self->children can hold the given number of extra
403 elements. set an exception and return -1 if allocation failed */
404
405 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200406 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000407
408 size = self->extra->length + extra;
409
410 if (size > self->extra->allocated) {
411 /* use Python 2.4's list growth strategy */
412 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000413 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100414 * which needs at least 4 bytes.
415 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000416 * be safe.
417 */
418 size = size ? size : 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000419 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000420 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100421 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000422 * false alarm always assume at least one child to be safe.
423 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000424 children = PyObject_Realloc(self->extra->children,
425 size * sizeof(PyObject*));
426 if (!children)
427 goto nomemory;
428 } else {
429 children = PyObject_Malloc(size * sizeof(PyObject*));
430 if (!children)
431 goto nomemory;
432 /* copy existing children from static area to malloc buffer */
433 memcpy(children, self->extra->children,
434 self->extra->length * sizeof(PyObject*));
435 }
436 self->extra->children = children;
437 self->extra->allocated = size;
438 }
439
440 return 0;
441
442 nomemory:
443 PyErr_NoMemory();
444 return -1;
445}
446
447LOCAL(int)
448element_add_subelement(ElementObject* self, PyObject* element)
449{
450 /* add a child element to a parent */
451
452 if (element_resize(self, 1) < 0)
453 return -1;
454
455 Py_INCREF(element);
456 self->extra->children[self->extra->length] = element;
457
458 self->extra->length++;
459
460 return 0;
461}
462
463LOCAL(PyObject*)
464element_get_attrib(ElementObject* self)
465{
466 /* return borrowed reference to attrib dictionary */
467 /* note: this function assumes that the extra section exists */
468
469 PyObject* res = self->extra->attrib;
470
471 if (res == Py_None) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000472 Py_DECREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000473 /* create missing dictionary */
474 res = PyDict_New();
475 if (!res)
476 return NULL;
477 self->extra->attrib = res;
478 }
479
480 return res;
481}
482
483LOCAL(PyObject*)
484element_get_text(ElementObject* self)
485{
486 /* return borrowed reference to text attribute */
487
488 PyObject* res = self->text;
489
490 if (JOIN_GET(res)) {
491 res = JOIN_OBJ(res);
492 if (PyList_CheckExact(res)) {
493 res = list_join(res);
494 if (!res)
495 return NULL;
496 self->text = res;
497 }
498 }
499
500 return res;
501}
502
503LOCAL(PyObject*)
504element_get_tail(ElementObject* self)
505{
506 /* return borrowed reference to text attribute */
507
508 PyObject* res = self->tail;
509
510 if (JOIN_GET(res)) {
511 res = JOIN_OBJ(res);
512 if (PyList_CheckExact(res)) {
513 res = list_join(res);
514 if (!res)
515 return NULL;
516 self->tail = res;
517 }
518 }
519
520 return res;
521}
522
523static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000524subelement(PyObject* self, PyObject* args, PyObject* kw)
525{
526 PyObject* elem;
527
528 ElementObject* parent;
529 PyObject* tag;
530 PyObject* attrib = NULL;
531 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
532 &Element_Type, &parent, &tag,
533 &PyDict_Type, &attrib))
534 return NULL;
535
536 if (attrib || kw) {
537 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
538 if (!attrib)
539 return NULL;
540 if (kw)
541 PyDict_Update(attrib, kw);
542 } else {
543 Py_INCREF(Py_None);
544 attrib = Py_None;
545 }
546
Eli Bendersky092af1f2012-03-04 07:14:03 +0200547 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000548
549 Py_DECREF(attrib);
550
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000551 if (element_add_subelement(parent, elem) < 0) {
552 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000553 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000554 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000555
556 return elem;
557}
558
559static void
560element_dealloc(ElementObject* self)
561{
562 if (self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200563 dealloc_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000564
565 /* discard attributes */
566 Py_DECREF(self->tag);
567 Py_DECREF(JOIN_OBJ(self->text));
568 Py_DECREF(JOIN_OBJ(self->tail));
569
570 RELEASE(sizeof(ElementObject), "destroy element");
571
Eli Bendersky092af1f2012-03-04 07:14:03 +0200572 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000573}
574
575/* -------------------------------------------------------------------- */
576/* methods (in alphabetical order) */
577
578static PyObject*
579element_append(ElementObject* self, PyObject* args)
580{
581 PyObject* element;
582 if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element))
583 return NULL;
584
585 if (element_add_subelement(self, element) < 0)
586 return NULL;
587
588 Py_RETURN_NONE;
589}
590
591static PyObject*
592element_clear(ElementObject* self, PyObject* args)
593{
594 if (!PyArg_ParseTuple(args, ":clear"))
595 return NULL;
596
597 if (self->extra) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200598 dealloc_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000599 self->extra = NULL;
600 }
601
602 Py_INCREF(Py_None);
603 Py_DECREF(JOIN_OBJ(self->text));
604 self->text = Py_None;
605
606 Py_INCREF(Py_None);
607 Py_DECREF(JOIN_OBJ(self->tail));
608 self->tail = Py_None;
609
610 Py_RETURN_NONE;
611}
612
613static PyObject*
614element_copy(ElementObject* self, PyObject* args)
615{
616 int i;
617 ElementObject* element;
618
619 if (!PyArg_ParseTuple(args, ":__copy__"))
620 return NULL;
621
Eli Bendersky092af1f2012-03-04 07:14:03 +0200622 element = (ElementObject*) create_new_element(
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000623 self->tag, (self->extra) ? self->extra->attrib : Py_None
624 );
625 if (!element)
626 return NULL;
627
628 Py_DECREF(JOIN_OBJ(element->text));
629 element->text = self->text;
630 Py_INCREF(JOIN_OBJ(element->text));
631
632 Py_DECREF(JOIN_OBJ(element->tail));
633 element->tail = self->tail;
634 Py_INCREF(JOIN_OBJ(element->tail));
635
636 if (self->extra) {
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100637
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000638 if (element_resize(element, self->extra->length) < 0) {
639 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000640 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000641 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000642
643 for (i = 0; i < self->extra->length; i++) {
644 Py_INCREF(self->extra->children[i]);
645 element->extra->children[i] = self->extra->children[i];
646 }
647
648 element->extra->length = self->extra->length;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100649
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000650 }
651
652 return (PyObject*) element;
653}
654
655static PyObject*
656element_deepcopy(ElementObject* self, PyObject* args)
657{
658 int i;
659 ElementObject* element;
660 PyObject* tag;
661 PyObject* attrib;
662 PyObject* text;
663 PyObject* tail;
664 PyObject* id;
665
666 PyObject* memo;
667 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
668 return NULL;
669
670 tag = deepcopy(self->tag, memo);
671 if (!tag)
672 return NULL;
673
674 if (self->extra) {
675 attrib = deepcopy(self->extra->attrib, memo);
676 if (!attrib) {
677 Py_DECREF(tag);
678 return NULL;
679 }
680 } else {
681 Py_INCREF(Py_None);
682 attrib = Py_None;
683 }
684
Eli Bendersky092af1f2012-03-04 07:14:03 +0200685 element = (ElementObject*) create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000686
687 Py_DECREF(tag);
688 Py_DECREF(attrib);
689
690 if (!element)
691 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100692
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000693 text = deepcopy(JOIN_OBJ(self->text), memo);
694 if (!text)
695 goto error;
696 Py_DECREF(element->text);
697 element->text = JOIN_SET(text, JOIN_GET(self->text));
698
699 tail = deepcopy(JOIN_OBJ(self->tail), memo);
700 if (!tail)
701 goto error;
702 Py_DECREF(element->tail);
703 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
704
705 if (self->extra) {
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100706
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000707 if (element_resize(element, self->extra->length) < 0)
708 goto error;
709
710 for (i = 0; i < self->extra->length; i++) {
711 PyObject* child = deepcopy(self->extra->children[i], memo);
712 if (!child) {
713 element->extra->length = i;
714 goto error;
715 }
716 element->extra->children[i] = child;
717 }
718
719 element->extra->length = self->extra->length;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100720
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000721 }
722
723 /* add object to memo dictionary (so deepcopy won't visit it again) */
Christian Heimes217cfd12007-12-02 14:31:20 +0000724 id = PyLong_FromLong((Py_uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000725 if (!id)
726 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000727
728 i = PyDict_SetItem(memo, id, (PyObject*) element);
729
730 Py_DECREF(id);
731
732 if (i < 0)
733 goto error;
734
735 return (PyObject*) element;
736
737 error:
738 Py_DECREF(element);
739 return NULL;
740}
741
742LOCAL(int)
743checkpath(PyObject* tag)
744{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000745 Py_ssize_t i;
746 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000747
748 /* check if a tag contains an xpath character */
749
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000750#define PATHCHAR(ch) \
751 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000752
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000753 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200754 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
755 void *data = PyUnicode_DATA(tag);
756 unsigned int kind = PyUnicode_KIND(tag);
757 for (i = 0; i < len; i++) {
758 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
759 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000760 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200761 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000762 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200763 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000764 return 1;
765 }
766 return 0;
767 }
Christian Heimes72b710a2008-05-26 13:28:38 +0000768 if (PyBytes_Check(tag)) {
769 char *p = PyBytes_AS_STRING(tag);
770 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000771 if (p[i] == '{')
772 check = 0;
773 else if (p[i] == '}')
774 check = 1;
775 else if (check && PATHCHAR(p[i]))
776 return 1;
777 }
778 return 0;
779 }
780
781 return 1; /* unknown type; might be path expression */
782}
783
784static PyObject*
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000785element_extend(ElementObject* self, PyObject* args)
786{
787 PyObject* seq;
788 Py_ssize_t i, seqlen = 0;
789
790 PyObject* seq_in;
791 if (!PyArg_ParseTuple(args, "O:extend", &seq_in))
792 return NULL;
793
794 seq = PySequence_Fast(seq_in, "");
795 if (!seq) {
796 PyErr_Format(
797 PyExc_TypeError,
798 "expected sequence, not \"%.200s\"", Py_TYPE(seq_in)->tp_name
799 );
800 return NULL;
801 }
802
803 seqlen = PySequence_Size(seq);
804 for (i = 0; i < seqlen; i++) {
805 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
806 if (element_add_subelement(self, element) < 0) {
807 Py_DECREF(seq);
808 return NULL;
809 }
810 }
811
812 Py_DECREF(seq);
813
814 Py_RETURN_NONE;
815}
816
817static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000818element_find(ElementObject* self, PyObject* args)
819{
820 int i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000821 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000822 PyObject* namespaces = Py_None;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200823
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000824 if (!PyArg_ParseTuple(args, "O|O:find", &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000825 return NULL;
826
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200827 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200828 _Py_IDENTIFIER(find);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200829 return _PyObject_CallMethodId(
830 elementpath_obj, &PyId_find, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000831 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200832 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000833
834 if (!self->extra)
835 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100836
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000837 for (i = 0; i < self->extra->length; i++) {
838 PyObject* item = self->extra->children[i];
839 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +0000840 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000841 Py_INCREF(item);
842 return item;
843 }
844 }
845
846 Py_RETURN_NONE;
847}
848
849static PyObject*
850element_findtext(ElementObject* self, PyObject* args)
851{
852 int i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000853 PyObject* tag;
854 PyObject* default_value = Py_None;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000855 PyObject* namespaces = Py_None;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200856 _Py_IDENTIFIER(findtext);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200857
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000858 if (!PyArg_ParseTuple(args, "O|OO:findtext", &tag, &default_value, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000859 return NULL;
860
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000861 if (checkpath(tag) || namespaces != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200862 return _PyObject_CallMethodId(
863 elementpath_obj, &PyId_findtext, "OOOO", self, tag, default_value, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000864 );
865
866 if (!self->extra) {
867 Py_INCREF(default_value);
868 return default_value;
869 }
870
871 for (i = 0; i < self->extra->length; i++) {
872 ElementObject* item = (ElementObject*) self->extra->children[i];
Mark Dickinson211c6252009-02-01 10:28:51 +0000873 if (Element_CheckExact(item) && (PyObject_RichCompareBool(item->tag, tag, Py_EQ) == 1)) {
874
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000875 PyObject* text = element_get_text(item);
876 if (text == Py_None)
Christian Heimes72b710a2008-05-26 13:28:38 +0000877 return PyBytes_FromString("");
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000878 Py_XINCREF(text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000879 return text;
880 }
881 }
882
883 Py_INCREF(default_value);
884 return default_value;
885}
886
887static PyObject*
888element_findall(ElementObject* self, PyObject* args)
889{
890 int i;
891 PyObject* out;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000892 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000893 PyObject* namespaces = Py_None;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200894
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000895 if (!PyArg_ParseTuple(args, "O|O:findall", &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000896 return NULL;
897
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200898 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200899 _Py_IDENTIFIER(findall);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200900 return _PyObject_CallMethodId(
901 elementpath_obj, &PyId_findall, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000902 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200903 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000904
905 out = PyList_New(0);
906 if (!out)
907 return NULL;
908
909 if (!self->extra)
910 return out;
911
912 for (i = 0; i < self->extra->length; i++) {
913 PyObject* item = self->extra->children[i];
914 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +0000915 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000916 if (PyList_Append(out, item) < 0) {
917 Py_DECREF(out);
918 return NULL;
919 }
920 }
921 }
922
923 return out;
924}
925
926static PyObject*
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000927element_iterfind(ElementObject* self, PyObject* args)
928{
929 PyObject* tag;
930 PyObject* namespaces = Py_None;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200931 _Py_IDENTIFIER(iterfind);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200932
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000933 if (!PyArg_ParseTuple(args, "O|O:iterfind", &tag, &namespaces))
934 return NULL;
935
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200936 return _PyObject_CallMethodId(
937 elementpath_obj, &PyId_iterfind, "OOO", self, tag, namespaces
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000938 );
939}
940
941static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000942element_get(ElementObject* self, PyObject* args)
943{
944 PyObject* value;
945
946 PyObject* key;
947 PyObject* default_value = Py_None;
948 if (!PyArg_ParseTuple(args, "O|O:get", &key, &default_value))
949 return NULL;
950
951 if (!self->extra || self->extra->attrib == Py_None)
952 value = default_value;
953 else {
954 value = PyDict_GetItem(self->extra->attrib, key);
955 if (!value)
956 value = default_value;
957 }
958
959 Py_INCREF(value);
960 return value;
961}
962
963static PyObject*
964element_getchildren(ElementObject* self, PyObject* args)
965{
966 int i;
967 PyObject* list;
968
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000969 /* FIXME: report as deprecated? */
970
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000971 if (!PyArg_ParseTuple(args, ":getchildren"))
972 return NULL;
973
974 if (!self->extra)
975 return PyList_New(0);
976
977 list = PyList_New(self->extra->length);
978 if (!list)
979 return NULL;
980
981 for (i = 0; i < self->extra->length; i++) {
982 PyObject* item = self->extra->children[i];
983 Py_INCREF(item);
984 PyList_SET_ITEM(list, i, item);
985 }
986
987 return list;
988}
989
990static PyObject*
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000991element_iter(ElementObject* self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000992{
993 PyObject* result;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100994
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000995 PyObject* tag = Py_None;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000996 if (!PyArg_ParseTuple(args, "|O:iter", &tag))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000997 return NULL;
998
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000999 if (!elementtree_iter_obj) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001000 PyErr_SetString(
1001 PyExc_RuntimeError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001002 "iter helper not found"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001003 );
1004 return NULL;
1005 }
1006
1007 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001008 if (!args)
1009 return NULL;
Neal Norwitz02876df2006-02-07 06:58:52 +00001010
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001011 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
1012 Py_INCREF(tag); PyTuple_SET_ITEM(args, 1, (PyObject*) tag);
1013
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001014 result = PyObject_CallObject(elementtree_iter_obj, args);
1015
1016 Py_DECREF(args);
1017
1018 return result;
1019}
1020
1021
1022static PyObject*
1023element_itertext(ElementObject* self, PyObject* args)
1024{
1025 PyObject* result;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001026
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001027 if (!PyArg_ParseTuple(args, ":itertext"))
1028 return NULL;
1029
1030 if (!elementtree_itertext_obj) {
1031 PyErr_SetString(
1032 PyExc_RuntimeError,
1033 "itertext helper not found"
1034 );
1035 return NULL;
1036 }
1037
1038 args = PyTuple_New(1);
1039 if (!args)
1040 return NULL;
1041
1042 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
1043
1044 result = PyObject_CallObject(elementtree_itertext_obj, args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001045
1046 Py_DECREF(args);
1047
1048 return result;
1049}
1050
1051static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001052element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001053{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001054 ElementObject* self = (ElementObject*) self_;
1055
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001056 if (!self->extra || index < 0 || index >= self->extra->length) {
1057 PyErr_SetString(
1058 PyExc_IndexError,
1059 "child index out of range"
1060 );
1061 return NULL;
1062 }
1063
1064 Py_INCREF(self->extra->children[index]);
1065 return self->extra->children[index];
1066}
1067
1068static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001069element_insert(ElementObject* self, PyObject* args)
1070{
1071 int i;
1072
1073 int index;
1074 PyObject* element;
1075 if (!PyArg_ParseTuple(args, "iO!:insert", &index,
1076 &Element_Type, &element))
1077 return NULL;
1078
1079 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001080 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001081
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001082 if (index < 0) {
1083 index += self->extra->length;
1084 if (index < 0)
1085 index = 0;
1086 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001087 if (index > self->extra->length)
1088 index = self->extra->length;
1089
1090 if (element_resize(self, 1) < 0)
1091 return NULL;
1092
1093 for (i = self->extra->length; i > index; i--)
1094 self->extra->children[i] = self->extra->children[i-1];
1095
1096 Py_INCREF(element);
1097 self->extra->children[index] = element;
1098
1099 self->extra->length++;
1100
1101 Py_RETURN_NONE;
1102}
1103
1104static PyObject*
1105element_items(ElementObject* self, PyObject* args)
1106{
1107 if (!PyArg_ParseTuple(args, ":items"))
1108 return NULL;
1109
1110 if (!self->extra || self->extra->attrib == Py_None)
1111 return PyList_New(0);
1112
1113 return PyDict_Items(self->extra->attrib);
1114}
1115
1116static PyObject*
1117element_keys(ElementObject* self, PyObject* args)
1118{
1119 if (!PyArg_ParseTuple(args, ":keys"))
1120 return NULL;
1121
1122 if (!self->extra || self->extra->attrib == Py_None)
1123 return PyList_New(0);
1124
1125 return PyDict_Keys(self->extra->attrib);
1126}
1127
Martin v. Löwis18e16552006-02-15 17:27:45 +00001128static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001129element_length(ElementObject* self)
1130{
1131 if (!self->extra)
1132 return 0;
1133
1134 return self->extra->length;
1135}
1136
1137static PyObject*
1138element_makeelement(PyObject* self, PyObject* args, PyObject* kw)
1139{
1140 PyObject* elem;
1141
1142 PyObject* tag;
1143 PyObject* attrib;
1144 if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib))
1145 return NULL;
1146
1147 attrib = PyDict_Copy(attrib);
1148 if (!attrib)
1149 return NULL;
1150
Eli Bendersky092af1f2012-03-04 07:14:03 +02001151 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001152
1153 Py_DECREF(attrib);
1154
1155 return elem;
1156}
1157
1158static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001159element_remove(ElementObject* self, PyObject* args)
1160{
1161 int i;
1162
1163 PyObject* element;
1164 if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element))
1165 return NULL;
1166
1167 if (!self->extra) {
1168 /* element has no children, so raise exception */
1169 PyErr_SetString(
1170 PyExc_ValueError,
1171 "list.remove(x): x not in list"
1172 );
1173 return NULL;
1174 }
1175
1176 for (i = 0; i < self->extra->length; i++) {
1177 if (self->extra->children[i] == element)
1178 break;
Mark Dickinson211c6252009-02-01 10:28:51 +00001179 if (PyObject_RichCompareBool(self->extra->children[i], element, Py_EQ) == 1)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001180 break;
1181 }
1182
1183 if (i == self->extra->length) {
1184 /* element is not in children, so raise exception */
1185 PyErr_SetString(
1186 PyExc_ValueError,
1187 "list.remove(x): x not in list"
1188 );
1189 return NULL;
1190 }
1191
1192 Py_DECREF(self->extra->children[i]);
1193
1194 self->extra->length--;
1195
1196 for (; i < self->extra->length; i++)
1197 self->extra->children[i] = self->extra->children[i+1];
1198
1199 Py_RETURN_NONE;
1200}
1201
1202static PyObject*
1203element_repr(ElementObject* self)
1204{
Eli Bendersky092af1f2012-03-04 07:14:03 +02001205 if (self->tag)
1206 return PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1207 else
1208 return PyUnicode_FromFormat("<Element at %p>", self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001209}
1210
1211static PyObject*
1212element_set(ElementObject* self, PyObject* args)
1213{
1214 PyObject* attrib;
1215
1216 PyObject* key;
1217 PyObject* value;
1218 if (!PyArg_ParseTuple(args, "OO:set", &key, &value))
1219 return NULL;
1220
1221 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001222 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001223
1224 attrib = element_get_attrib(self);
1225 if (!attrib)
1226 return NULL;
1227
1228 if (PyDict_SetItem(attrib, key, value) < 0)
1229 return NULL;
1230
1231 Py_RETURN_NONE;
1232}
1233
1234static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001235element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001236{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001237 ElementObject* self = (ElementObject*) self_;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001238 int i;
1239 PyObject* old;
1240
1241 if (!self->extra || index < 0 || index >= self->extra->length) {
1242 PyErr_SetString(
1243 PyExc_IndexError,
1244 "child assignment index out of range");
1245 return -1;
1246 }
1247
1248 old = self->extra->children[index];
1249
1250 if (item) {
1251 Py_INCREF(item);
1252 self->extra->children[index] = item;
1253 } else {
1254 self->extra->length--;
1255 for (i = index; i < self->extra->length; i++)
1256 self->extra->children[i] = self->extra->children[i+1];
1257 }
1258
1259 Py_DECREF(old);
1260
1261 return 0;
1262}
1263
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001264static PyObject*
1265element_subscr(PyObject* self_, PyObject* item)
1266{
1267 ElementObject* self = (ElementObject*) self_;
1268
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001269 if (PyIndex_Check(item)) {
1270 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001271
1272 if (i == -1 && PyErr_Occurred()) {
1273 return NULL;
1274 }
1275 if (i < 0 && self->extra)
1276 i += self->extra->length;
1277 return element_getitem(self_, i);
1278 }
1279 else if (PySlice_Check(item)) {
1280 Py_ssize_t start, stop, step, slicelen, cur, i;
1281 PyObject* list;
1282
1283 if (!self->extra)
1284 return PyList_New(0);
1285
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001286 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001287 self->extra->length,
1288 &start, &stop, &step, &slicelen) < 0) {
1289 return NULL;
1290 }
1291
1292 if (slicelen <= 0)
1293 return PyList_New(0);
1294 else {
1295 list = PyList_New(slicelen);
1296 if (!list)
1297 return NULL;
1298
1299 for (cur = start, i = 0; i < slicelen;
1300 cur += step, i++) {
1301 PyObject* item = self->extra->children[cur];
1302 Py_INCREF(item);
1303 PyList_SET_ITEM(list, i, item);
1304 }
1305
1306 return list;
1307 }
1308 }
1309 else {
1310 PyErr_SetString(PyExc_TypeError,
1311 "element indices must be integers");
1312 return NULL;
1313 }
1314}
1315
1316static int
1317element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1318{
1319 ElementObject* self = (ElementObject*) self_;
1320
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001321 if (PyIndex_Check(item)) {
1322 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001323
1324 if (i == -1 && PyErr_Occurred()) {
1325 return -1;
1326 }
1327 if (i < 0 && self->extra)
1328 i += self->extra->length;
1329 return element_setitem(self_, i, value);
1330 }
1331 else if (PySlice_Check(item)) {
1332 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1333
1334 PyObject* recycle = NULL;
1335 PyObject* seq = NULL;
1336
1337 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001338 create_extra(self, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001339
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001340 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001341 self->extra->length,
1342 &start, &stop, &step, &slicelen) < 0) {
1343 return -1;
1344 }
1345
1346 if (value == NULL)
1347 newlen = 0;
1348 else {
1349 seq = PySequence_Fast(value, "");
1350 if (!seq) {
1351 PyErr_Format(
1352 PyExc_TypeError,
1353 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1354 );
1355 return -1;
1356 }
1357 newlen = PySequence_Size(seq);
1358 }
1359
1360 if (step != 1 && newlen != slicelen)
1361 {
1362 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001363 "attempt to assign sequence of size %zd "
1364 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001365 newlen, slicelen
1366 );
1367 return -1;
1368 }
1369
1370
1371 /* Resize before creating the recycle bin, to prevent refleaks. */
1372 if (newlen > slicelen) {
1373 if (element_resize(self, newlen - slicelen) < 0) {
1374 if (seq) {
1375 Py_DECREF(seq);
1376 }
1377 return -1;
1378 }
1379 }
1380
1381 if (slicelen > 0) {
1382 /* to avoid recursive calls to this method (via decref), move
1383 old items to the recycle bin here, and get rid of them when
1384 we're done modifying the element */
1385 recycle = PyList_New(slicelen);
1386 if (!recycle) {
1387 if (seq) {
1388 Py_DECREF(seq);
1389 }
1390 return -1;
1391 }
1392 for (cur = start, i = 0; i < slicelen;
1393 cur += step, i++)
1394 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1395 }
1396
1397 if (newlen < slicelen) {
1398 /* delete slice */
1399 for (i = stop; i < self->extra->length; i++)
1400 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1401 } else if (newlen > slicelen) {
1402 /* insert slice */
1403 for (i = self->extra->length-1; i >= stop; i--)
1404 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1405 }
1406
1407 /* replace the slice */
1408 for (cur = start, i = 0; i < newlen;
1409 cur += step, i++) {
1410 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1411 Py_INCREF(element);
1412 self->extra->children[cur] = element;
1413 }
1414
1415 self->extra->length += newlen - slicelen;
1416
1417 if (seq) {
1418 Py_DECREF(seq);
1419 }
1420
1421 /* discard the recycle bin, and everything in it */
1422 Py_XDECREF(recycle);
1423
1424 return 0;
1425 }
1426 else {
1427 PyErr_SetString(PyExc_TypeError,
1428 "element indices must be integers");
1429 return -1;
1430 }
1431}
1432
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001433static PyMethodDef element_methods[] = {
1434
1435 {"clear", (PyCFunction) element_clear, METH_VARARGS},
1436
1437 {"get", (PyCFunction) element_get, METH_VARARGS},
1438 {"set", (PyCFunction) element_set, METH_VARARGS},
1439
1440 {"find", (PyCFunction) element_find, METH_VARARGS},
1441 {"findtext", (PyCFunction) element_findtext, METH_VARARGS},
1442 {"findall", (PyCFunction) element_findall, METH_VARARGS},
1443
1444 {"append", (PyCFunction) element_append, METH_VARARGS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001445 {"extend", (PyCFunction) element_extend, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001446 {"insert", (PyCFunction) element_insert, METH_VARARGS},
1447 {"remove", (PyCFunction) element_remove, METH_VARARGS},
1448
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001449 {"iter", (PyCFunction) element_iter, METH_VARARGS},
1450 {"itertext", (PyCFunction) element_itertext, METH_VARARGS},
1451 {"iterfind", (PyCFunction) element_iterfind, METH_VARARGS},
1452
1453 {"getiterator", (PyCFunction) element_iter, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001454 {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS},
1455
1456 {"items", (PyCFunction) element_items, METH_VARARGS},
1457 {"keys", (PyCFunction) element_keys, METH_VARARGS},
1458
1459 {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS},
1460
1461 {"__copy__", (PyCFunction) element_copy, METH_VARARGS},
1462 {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS},
1463
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001464 {NULL, NULL}
1465};
1466
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001467static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001468element_getattro(ElementObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001469{
1470 PyObject* res;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001471 char *name = "";
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001472
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001473 if (PyUnicode_Check(nameobj))
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001474 name = _PyUnicode_AsString(nameobj);
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001475
Alexander Belopolskye239d232010-12-08 23:31:48 +00001476 if (name == NULL)
1477 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001478
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001479 /* handle common attributes first */
1480 if (strcmp(name, "tag") == 0) {
1481 res = self->tag;
1482 Py_INCREF(res);
1483 return res;
1484 } else if (strcmp(name, "text") == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001485 res = element_get_text(self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001486 Py_INCREF(res);
1487 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001488 }
1489
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001490 /* methods */
1491 res = PyObject_GenericGetAttr((PyObject*) self, nameobj);
1492 if (res)
1493 return res;
1494
1495 /* less common attributes */
1496 if (strcmp(name, "tail") == 0) {
1497 PyErr_Clear();
1498 res = element_get_tail(self);
1499 } else if (strcmp(name, "attrib") == 0) {
1500 PyErr_Clear();
1501 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001502 create_extra(self, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001503 res = element_get_attrib(self);
1504 }
1505
1506 if (!res)
1507 return NULL;
1508
1509 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001510 return res;
1511}
1512
1513static int
1514element_setattr(ElementObject* self, const char* name, PyObject* value)
1515{
1516 if (value == NULL) {
1517 PyErr_SetString(
1518 PyExc_AttributeError,
1519 "can't delete element attributes"
1520 );
1521 return -1;
1522 }
1523
1524 if (strcmp(name, "tag") == 0) {
1525 Py_DECREF(self->tag);
1526 self->tag = value;
1527 Py_INCREF(self->tag);
1528 } else if (strcmp(name, "text") == 0) {
1529 Py_DECREF(JOIN_OBJ(self->text));
1530 self->text = value;
1531 Py_INCREF(self->text);
1532 } else if (strcmp(name, "tail") == 0) {
1533 Py_DECREF(JOIN_OBJ(self->tail));
1534 self->tail = value;
1535 Py_INCREF(self->tail);
1536 } else if (strcmp(name, "attrib") == 0) {
1537 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001538 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001539 Py_DECREF(self->extra->attrib);
1540 self->extra->attrib = value;
1541 Py_INCREF(self->extra->attrib);
1542 } else {
1543 PyErr_SetString(PyExc_AttributeError, name);
1544 return -1;
1545 }
1546
1547 return 0;
1548}
1549
1550static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001551 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001552 0, /* sq_concat */
1553 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001554 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001555 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001556 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001557 0,
1558};
1559
1560static PyMappingMethods element_as_mapping = {
1561 (lenfunc) element_length,
1562 (binaryfunc) element_subscr,
1563 (objobjargproc) element_ass_subscr,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001564};
1565
Neal Norwitz227b5332006-03-22 09:28:35 +00001566static PyTypeObject Element_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001567 PyVarObject_HEAD_INIT(NULL, 0)
1568 "Element", sizeof(ElementObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001569 /* methods */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001570 (destructor)element_dealloc, /* tp_dealloc */
1571 0, /* tp_print */
1572 0, /* tp_getattr */
1573 (setattrfunc)element_setattr, /* tp_setattr */
1574 0, /* tp_reserved */
1575 (reprfunc)element_repr, /* tp_repr */
1576 0, /* tp_as_number */
1577 &element_as_sequence, /* tp_as_sequence */
1578 &element_as_mapping, /* tp_as_mapping */
1579 0, /* tp_hash */
1580 0, /* tp_call */
1581 0, /* tp_str */
1582 (getattrofunc)element_getattro, /* tp_getattro */
1583 0, /* tp_setattro */
1584 0, /* tp_as_buffer */
1585 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
1586 0, /* tp_doc */
1587 0, /* tp_traverse */
1588 0, /* tp_clear */
1589 0, /* tp_richcompare */
1590 0, /* tp_weaklistoffset */
1591 0, /* tp_iter */
1592 0, /* tp_iternext */
1593 element_methods, /* tp_methods */
1594 0, /* tp_members */
1595 0, /* tp_getset */
1596 0, /* tp_base */
1597 0, /* tp_dict */
1598 0, /* tp_descr_get */
1599 0, /* tp_descr_set */
1600 0, /* tp_dictoffset */
1601 (initproc)element_init, /* tp_init */
1602 PyType_GenericAlloc, /* tp_alloc */
1603 element_new, /* tp_new */
1604 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001605};
1606
1607/* ==================================================================== */
1608/* the tree builder type */
1609
1610typedef struct {
1611 PyObject_HEAD
1612
1613 PyObject* root; /* root node (first created node) */
1614
1615 ElementObject* this; /* current node */
1616 ElementObject* last; /* most recently created node */
1617
1618 PyObject* data; /* data collector (string or list), or NULL */
1619
1620 PyObject* stack; /* element stack */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001621 Py_ssize_t index; /* current stack size (0=empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001622
1623 /* element tracing */
1624 PyObject* events; /* list of events, or NULL if not collecting */
1625 PyObject* start_event_obj; /* event objects (NULL to ignore) */
1626 PyObject* end_event_obj;
1627 PyObject* start_ns_event_obj;
1628 PyObject* end_ns_event_obj;
1629
1630} TreeBuilderObject;
1631
Neal Norwitz227b5332006-03-22 09:28:35 +00001632static PyTypeObject TreeBuilder_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001633
Christian Heimes90aa7642007-12-19 02:45:37 +00001634#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001635
1636/* -------------------------------------------------------------------- */
1637/* constructor and destructor */
1638
1639LOCAL(PyObject*)
1640treebuilder_new(void)
1641{
1642 TreeBuilderObject* self;
1643
1644 self = PyObject_New(TreeBuilderObject, &TreeBuilder_Type);
1645 if (self == NULL)
1646 return NULL;
1647
1648 self->root = NULL;
1649
1650 Py_INCREF(Py_None);
1651 self->this = (ElementObject*) Py_None;
1652
1653 Py_INCREF(Py_None);
1654 self->last = (ElementObject*) Py_None;
1655
1656 self->data = NULL;
1657
1658 self->stack = PyList_New(20);
1659 self->index = 0;
1660
1661 self->events = NULL;
1662 self->start_event_obj = self->end_event_obj = NULL;
1663 self->start_ns_event_obj = self->end_ns_event_obj = NULL;
1664
1665 ALLOC(sizeof(TreeBuilderObject), "create treebuilder");
1666
1667 return (PyObject*) self;
1668}
1669
1670static PyObject*
Thomas Wouters73e5a5b2006-06-08 15:35:45 +00001671treebuilder(PyObject* self_, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001672{
1673 if (!PyArg_ParseTuple(args, ":TreeBuilder"))
1674 return NULL;
1675
1676 return treebuilder_new();
1677}
1678
1679static void
1680treebuilder_dealloc(TreeBuilderObject* self)
1681{
1682 Py_XDECREF(self->end_ns_event_obj);
1683 Py_XDECREF(self->start_ns_event_obj);
1684 Py_XDECREF(self->end_event_obj);
1685 Py_XDECREF(self->start_event_obj);
1686 Py_XDECREF(self->events);
1687 Py_DECREF(self->stack);
1688 Py_XDECREF(self->data);
1689 Py_DECREF(self->last);
1690 Py_DECREF(self->this);
1691 Py_XDECREF(self->root);
1692
1693 RELEASE(sizeof(TreeBuilderObject), "destroy treebuilder");
1694
1695 PyObject_Del(self);
1696}
1697
1698/* -------------------------------------------------------------------- */
1699/* handlers */
1700
1701LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001702treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
1703 PyObject* attrib)
1704{
1705 PyObject* node;
1706 PyObject* this;
1707
1708 if (self->data) {
1709 if (self->this == self->last) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001710 Py_DECREF(JOIN_OBJ(self->last->text));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001711 self->last->text = JOIN_SET(
1712 self->data, PyList_CheckExact(self->data)
1713 );
1714 } else {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001715 Py_DECREF(JOIN_OBJ(self->last->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001716 self->last->tail = JOIN_SET(
1717 self->data, PyList_CheckExact(self->data)
1718 );
1719 }
1720 self->data = NULL;
1721 }
1722
Eli Bendersky092af1f2012-03-04 07:14:03 +02001723 node = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001724 if (!node)
1725 return NULL;
1726
1727 this = (PyObject*) self->this;
1728
1729 if (this != Py_None) {
1730 if (element_add_subelement((ElementObject*) this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001731 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001732 } else {
1733 if (self->root) {
1734 PyErr_SetString(
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001735 elementtree_parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001736 "multiple elements on top level"
1737 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001738 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001739 }
1740 Py_INCREF(node);
1741 self->root = node;
1742 }
1743
1744 if (self->index < PyList_GET_SIZE(self->stack)) {
1745 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001746 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001747 Py_INCREF(this);
1748 } else {
1749 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001750 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001751 }
1752 self->index++;
1753
1754 Py_DECREF(this);
1755 Py_INCREF(node);
1756 self->this = (ElementObject*) node;
1757
1758 Py_DECREF(self->last);
1759 Py_INCREF(node);
1760 self->last = (ElementObject*) node;
1761
1762 if (self->start_event_obj) {
1763 PyObject* res;
1764 PyObject* action = self->start_event_obj;
1765 res = PyTuple_New(2);
1766 if (res) {
1767 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
1768 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
1769 PyList_Append(self->events, res);
1770 Py_DECREF(res);
1771 } else
1772 PyErr_Clear(); /* FIXME: propagate error */
1773 }
1774
1775 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001776
1777 error:
1778 Py_DECREF(node);
1779 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001780}
1781
1782LOCAL(PyObject*)
1783treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
1784{
1785 if (!self->data) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001786 if (self->last == (ElementObject*) Py_None) {
1787 /* ignore calls to data before the first call to start */
1788 Py_RETURN_NONE;
1789 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001790 /* store the first item as is */
1791 Py_INCREF(data); self->data = data;
1792 } else {
1793 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00001794 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
1795 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001796 /* expat often generates single character data sections; handle
1797 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00001798 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
1799 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001800 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00001801 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001802 } else if (PyList_CheckExact(self->data)) {
1803 if (PyList_Append(self->data, data) < 0)
1804 return NULL;
1805 } else {
1806 PyObject* list = PyList_New(2);
1807 if (!list)
1808 return NULL;
1809 PyList_SET_ITEM(list, 0, self->data);
1810 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
1811 self->data = list;
1812 }
1813 }
1814
1815 Py_RETURN_NONE;
1816}
1817
1818LOCAL(PyObject*)
1819treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
1820{
1821 PyObject* item;
1822
1823 if (self->data) {
1824 if (self->this == self->last) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001825 Py_DECREF(JOIN_OBJ(self->last->text));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001826 self->last->text = JOIN_SET(
1827 self->data, PyList_CheckExact(self->data)
1828 );
1829 } else {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001830 Py_DECREF(JOIN_OBJ(self->last->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001831 self->last->tail = JOIN_SET(
1832 self->data, PyList_CheckExact(self->data)
1833 );
1834 }
1835 self->data = NULL;
1836 }
1837
1838 if (self->index == 0) {
1839 PyErr_SetString(
1840 PyExc_IndexError,
1841 "pop from empty stack"
1842 );
1843 return NULL;
1844 }
1845
1846 self->index--;
1847
1848 item = PyList_GET_ITEM(self->stack, self->index);
1849 Py_INCREF(item);
1850
1851 Py_DECREF(self->last);
1852
1853 self->last = (ElementObject*) self->this;
1854 self->this = (ElementObject*) item;
1855
1856 if (self->end_event_obj) {
1857 PyObject* res;
1858 PyObject* action = self->end_event_obj;
1859 PyObject* node = (PyObject*) self->last;
1860 res = PyTuple_New(2);
1861 if (res) {
1862 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
1863 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
1864 PyList_Append(self->events, res);
1865 Py_DECREF(res);
1866 } else
1867 PyErr_Clear(); /* FIXME: propagate error */
1868 }
1869
1870 Py_INCREF(self->last);
1871 return (PyObject*) self->last;
1872}
1873
1874LOCAL(void)
1875treebuilder_handle_namespace(TreeBuilderObject* self, int start,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001876 PyObject *prefix, PyObject *uri)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001877{
1878 PyObject* res;
1879 PyObject* action;
1880 PyObject* parcel;
1881
1882 if (!self->events)
1883 return;
1884
1885 if (start) {
1886 if (!self->start_ns_event_obj)
1887 return;
1888 action = self->start_ns_event_obj;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001889 parcel = Py_BuildValue("OO", prefix, uri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001890 if (!parcel)
1891 return;
1892 Py_INCREF(action);
1893 } else {
1894 if (!self->end_ns_event_obj)
1895 return;
1896 action = self->end_ns_event_obj;
1897 Py_INCREF(action);
1898 parcel = Py_None;
1899 Py_INCREF(parcel);
1900 }
1901
1902 res = PyTuple_New(2);
1903
1904 if (res) {
1905 PyTuple_SET_ITEM(res, 0, action);
1906 PyTuple_SET_ITEM(res, 1, parcel);
1907 PyList_Append(self->events, res);
1908 Py_DECREF(res);
1909 } else
1910 PyErr_Clear(); /* FIXME: propagate error */
1911}
1912
1913/* -------------------------------------------------------------------- */
1914/* methods (in alphabetical order) */
1915
1916static PyObject*
1917treebuilder_data(TreeBuilderObject* self, PyObject* args)
1918{
1919 PyObject* data;
1920 if (!PyArg_ParseTuple(args, "O:data", &data))
1921 return NULL;
1922
1923 return treebuilder_handle_data(self, data);
1924}
1925
1926static PyObject*
1927treebuilder_end(TreeBuilderObject* self, PyObject* args)
1928{
1929 PyObject* tag;
1930 if (!PyArg_ParseTuple(args, "O:end", &tag))
1931 return NULL;
1932
1933 return treebuilder_handle_end(self, tag);
1934}
1935
1936LOCAL(PyObject*)
1937treebuilder_done(TreeBuilderObject* self)
1938{
1939 PyObject* res;
1940
1941 /* FIXME: check stack size? */
1942
1943 if (self->root)
1944 res = self->root;
1945 else
1946 res = Py_None;
1947
1948 Py_INCREF(res);
1949 return res;
1950}
1951
1952static PyObject*
1953treebuilder_close(TreeBuilderObject* self, PyObject* args)
1954{
1955 if (!PyArg_ParseTuple(args, ":close"))
1956 return NULL;
1957
1958 return treebuilder_done(self);
1959}
1960
1961static PyObject*
1962treebuilder_start(TreeBuilderObject* self, PyObject* args)
1963{
1964 PyObject* tag;
1965 PyObject* attrib = Py_None;
1966 if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib))
1967 return NULL;
1968
1969 return treebuilder_handle_start(self, tag, attrib);
1970}
1971
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001972static PyMethodDef treebuilder_methods[] = {
1973 {"data", (PyCFunction) treebuilder_data, METH_VARARGS},
1974 {"start", (PyCFunction) treebuilder_start, METH_VARARGS},
1975 {"end", (PyCFunction) treebuilder_end, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001976 {"close", (PyCFunction) treebuilder_close, METH_VARARGS},
1977 {NULL, NULL}
1978};
1979
Neal Norwitz227b5332006-03-22 09:28:35 +00001980static PyTypeObject TreeBuilder_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001981 PyVarObject_HEAD_INIT(NULL, 0)
1982 "TreeBuilder", sizeof(TreeBuilderObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001983 /* methods */
1984 (destructor)treebuilder_dealloc, /* tp_dealloc */
1985 0, /* tp_print */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001986 0, /* tp_getattr */
1987 0, /* tp_setattr */
Mark Dickinsone94c6792009-02-02 20:36:42 +00001988 0, /* tp_reserved */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001989 0, /* tp_repr */
1990 0, /* tp_as_number */
1991 0, /* tp_as_sequence */
1992 0, /* tp_as_mapping */
1993 0, /* tp_hash */
1994 0, /* tp_call */
1995 0, /* tp_str */
1996 0, /* tp_getattro */
1997 0, /* tp_setattro */
1998 0, /* tp_as_buffer */
1999 Py_TPFLAGS_DEFAULT, /* tp_flags */
2000 0, /* tp_doc */
2001 0, /* tp_traverse */
2002 0, /* tp_clear */
2003 0, /* tp_richcompare */
2004 0, /* tp_weaklistoffset */
2005 0, /* tp_iter */
2006 0, /* tp_iternext */
2007 treebuilder_methods, /* tp_methods */
2008 0, /* tp_members */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002009};
2010
2011/* ==================================================================== */
2012/* the expat interface */
2013
2014#if defined(USE_EXPAT)
2015
2016#include "expat.h"
2017
2018#if defined(USE_PYEXPAT_CAPI)
2019#include "pyexpat.h"
2020static struct PyExpat_CAPI* expat_capi;
2021#define EXPAT(func) (expat_capi->func)
2022#else
2023#define EXPAT(func) (XML_##func)
2024#endif
2025
2026typedef struct {
2027 PyObject_HEAD
2028
2029 XML_Parser parser;
2030
2031 PyObject* target;
2032 PyObject* entity;
2033
2034 PyObject* names;
2035
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002036 PyObject* handle_start;
2037 PyObject* handle_data;
2038 PyObject* handle_end;
2039
2040 PyObject* handle_comment;
2041 PyObject* handle_pi;
2042
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002043 PyObject* handle_close;
2044
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002045} XMLParserObject;
2046
Neal Norwitz227b5332006-03-22 09:28:35 +00002047static PyTypeObject XMLParser_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002048
2049/* helpers */
2050
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002051LOCAL(PyObject*)
2052makeuniversal(XMLParserObject* self, const char* string)
2053{
2054 /* convert a UTF-8 tag/attribute name from the expat parser
2055 to a universal name string */
2056
2057 int size = strlen(string);
2058 PyObject* key;
2059 PyObject* value;
2060
2061 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00002062 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002063 if (!key)
2064 return NULL;
2065
2066 value = PyDict_GetItem(self->names, key);
2067
2068 if (value) {
2069 Py_INCREF(value);
2070 } else {
2071 /* new name. convert to universal name, and decode as
2072 necessary */
2073
2074 PyObject* tag;
2075 char* p;
2076 int i;
2077
2078 /* look for namespace separator */
2079 for (i = 0; i < size; i++)
2080 if (string[i] == '}')
2081 break;
2082 if (i != size) {
2083 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002084 tag = PyBytes_FromStringAndSize(NULL, size+1);
2085 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002086 p[0] = '{';
2087 memcpy(p+1, string, size);
2088 size++;
2089 } else {
2090 /* plain name; use key as tag */
2091 Py_INCREF(key);
2092 tag = key;
2093 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002094
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002095 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002096 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00002097 value = PyUnicode_DecodeUTF8(p, size, "strict");
2098 Py_DECREF(tag);
2099 if (!value) {
2100 Py_DECREF(key);
2101 return NULL;
2102 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002103
2104 /* add to names dictionary */
2105 if (PyDict_SetItem(self->names, key, value) < 0) {
2106 Py_DECREF(key);
2107 Py_DECREF(value);
2108 return NULL;
2109 }
2110 }
2111
2112 Py_DECREF(key);
2113 return value;
2114}
2115
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002116static void
2117expat_set_error(const char* message, int line, int column)
2118{
Victor Stinner499dfcf2011-03-21 13:26:24 +01002119 PyObject *errmsg, *error, *position;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002120
Victor Stinner499dfcf2011-03-21 13:26:24 +01002121 errmsg = PyUnicode_FromFormat("%s: line %d, column %d",
2122 message, line, column);
2123 if (errmsg == NULL)
2124 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002125
Victor Stinner499dfcf2011-03-21 13:26:24 +01002126 error = PyObject_CallFunction(elementtree_parseerror_obj, "O", errmsg);
2127 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002128 if (!error)
2129 return;
2130
2131 /* add position attribute */
2132 position = Py_BuildValue("(ii)", line, column);
2133 if (!position) {
2134 Py_DECREF(error);
2135 return;
2136 }
2137 if (PyObject_SetAttrString(error, "position", position) == -1) {
2138 Py_DECREF(error);
2139 Py_DECREF(position);
2140 return;
2141 }
2142 Py_DECREF(position);
2143
2144 PyErr_SetObject(elementtree_parseerror_obj, error);
2145 Py_DECREF(error);
2146}
2147
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002148/* -------------------------------------------------------------------- */
2149/* handlers */
2150
2151static void
2152expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2153 int data_len)
2154{
2155 PyObject* key;
2156 PyObject* value;
2157 PyObject* res;
2158
2159 if (data_len < 2 || data_in[0] != '&')
2160 return;
2161
Neal Norwitz0269b912007-08-08 06:56:02 +00002162 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002163 if (!key)
2164 return;
2165
2166 value = PyDict_GetItem(self->entity, key);
2167
2168 if (value) {
2169 if (TreeBuilder_CheckExact(self->target))
2170 res = treebuilder_handle_data(
2171 (TreeBuilderObject*) self->target, value
2172 );
2173 else if (self->handle_data)
2174 res = PyObject_CallFunction(self->handle_data, "O", value);
2175 else
2176 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002177 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002178 } else if (!PyErr_Occurred()) {
2179 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00002180 char message[128] = "undefined entity ";
2181 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002182 expat_set_error(
2183 message,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002184 EXPAT(GetErrorLineNumber)(self->parser),
2185 EXPAT(GetErrorColumnNumber)(self->parser)
2186 );
2187 }
2188
2189 Py_DECREF(key);
2190}
2191
2192static void
2193expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2194 const XML_Char **attrib_in)
2195{
2196 PyObject* res;
2197 PyObject* tag;
2198 PyObject* attrib;
2199 int ok;
2200
2201 /* tag name */
2202 tag = makeuniversal(self, tag_in);
2203 if (!tag)
2204 return; /* parser will look for errors */
2205
2206 /* attributes */
2207 if (attrib_in[0]) {
2208 attrib = PyDict_New();
2209 if (!attrib)
2210 return;
2211 while (attrib_in[0] && attrib_in[1]) {
2212 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00002213 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002214 if (!key || !value) {
2215 Py_XDECREF(value);
2216 Py_XDECREF(key);
2217 Py_DECREF(attrib);
2218 return;
2219 }
2220 ok = PyDict_SetItem(attrib, key, value);
2221 Py_DECREF(value);
2222 Py_DECREF(key);
2223 if (ok < 0) {
2224 Py_DECREF(attrib);
2225 return;
2226 }
2227 attrib_in += 2;
2228 }
2229 } else {
2230 Py_INCREF(Py_None);
2231 attrib = Py_None;
2232 }
2233
2234 if (TreeBuilder_CheckExact(self->target))
2235 /* shortcut */
2236 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2237 tag, attrib);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002238 else if (self->handle_start) {
2239 if (attrib == Py_None) {
2240 Py_DECREF(attrib);
2241 attrib = PyDict_New();
2242 if (!attrib)
2243 return;
2244 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002245 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002246 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002247 res = NULL;
2248
2249 Py_DECREF(tag);
2250 Py_DECREF(attrib);
2251
2252 Py_XDECREF(res);
2253}
2254
2255static void
2256expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2257 int data_len)
2258{
2259 PyObject* data;
2260 PyObject* res;
2261
Neal Norwitz0269b912007-08-08 06:56:02 +00002262 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002263 if (!data)
2264 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002265
2266 if (TreeBuilder_CheckExact(self->target))
2267 /* shortcut */
2268 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
2269 else if (self->handle_data)
2270 res = PyObject_CallFunction(self->handle_data, "O", data);
2271 else
2272 res = NULL;
2273
2274 Py_DECREF(data);
2275
2276 Py_XDECREF(res);
2277}
2278
2279static void
2280expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
2281{
2282 PyObject* tag;
2283 PyObject* res = NULL;
2284
2285 if (TreeBuilder_CheckExact(self->target))
2286 /* shortcut */
2287 /* the standard tree builder doesn't look at the end tag */
2288 res = treebuilder_handle_end(
2289 (TreeBuilderObject*) self->target, Py_None
2290 );
2291 else if (self->handle_end) {
2292 tag = makeuniversal(self, tag_in);
2293 if (tag) {
2294 res = PyObject_CallFunction(self->handle_end, "O", tag);
2295 Py_DECREF(tag);
2296 }
2297 }
2298
2299 Py_XDECREF(res);
2300}
2301
2302static void
2303expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
2304 const XML_Char *uri)
2305{
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002306 PyObject* sprefix = NULL;
2307 PyObject* suri = NULL;
2308
2309 suri = PyUnicode_DecodeUTF8(uri, strlen(uri), "strict");
2310 if (!suri)
2311 return;
2312
2313 if (prefix)
2314 sprefix = PyUnicode_DecodeUTF8(prefix, strlen(prefix), "strict");
2315 else
2316 sprefix = PyUnicode_FromString("");
2317 if (!sprefix) {
2318 Py_DECREF(suri);
2319 return;
2320 }
2321
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002322 treebuilder_handle_namespace(
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002323 (TreeBuilderObject*) self->target, 1, sprefix, suri
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002324 );
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002325
2326 Py_DECREF(sprefix);
2327 Py_DECREF(suri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002328}
2329
2330static void
2331expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
2332{
2333 treebuilder_handle_namespace(
2334 (TreeBuilderObject*) self->target, 0, NULL, NULL
2335 );
2336}
2337
2338static void
2339expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
2340{
2341 PyObject* comment;
2342 PyObject* res;
2343
2344 if (self->handle_comment) {
Neal Norwitz0269b912007-08-08 06:56:02 +00002345 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002346 if (comment) {
2347 res = PyObject_CallFunction(self->handle_comment, "O", comment);
2348 Py_XDECREF(res);
2349 Py_DECREF(comment);
2350 }
2351 }
2352}
2353
2354static void
2355expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
2356 const XML_Char* data_in)
2357{
2358 PyObject* target;
2359 PyObject* data;
2360 PyObject* res;
2361
2362 if (self->handle_pi) {
Neal Norwitz0269b912007-08-08 06:56:02 +00002363 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
2364 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002365 if (target && data) {
2366 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
2367 Py_XDECREF(res);
2368 Py_DECREF(data);
2369 Py_DECREF(target);
2370 } else {
2371 Py_XDECREF(data);
2372 Py_XDECREF(target);
2373 }
2374 }
2375}
2376
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002377static int
2378expat_unknown_encoding_handler(XMLParserObject *self, const XML_Char *name,
2379 XML_Encoding *info)
2380{
2381 PyObject* u;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002382 unsigned char s[256];
2383 int i;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002384 void *data;
2385 unsigned int kind;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002386
2387 memset(info, 0, sizeof(XML_Encoding));
2388
2389 for (i = 0; i < 256; i++)
2390 s[i] = i;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002391
Fredrik Lundhc3389992005-12-25 11:40:19 +00002392 u = PyUnicode_Decode((char*) s, 256, name, "replace");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002393 if (!u)
2394 return XML_STATUS_ERROR;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002395 if (PyUnicode_READY(u))
2396 return XML_STATUS_ERROR;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002397
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002398 if (PyUnicode_GET_LENGTH(u) != 256) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002399 Py_DECREF(u);
2400 return XML_STATUS_ERROR;
2401 }
2402
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002403 kind = PyUnicode_KIND(u);
2404 data = PyUnicode_DATA(u);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002405 for (i = 0; i < 256; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002406 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
2407 if (ch != Py_UNICODE_REPLACEMENT_CHARACTER)
2408 info->map[i] = ch;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002409 else
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002410 info->map[i] = -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002411 }
2412
2413 Py_DECREF(u);
2414
2415 return XML_STATUS_OK;
2416}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002417
2418/* -------------------------------------------------------------------- */
2419/* constructor and destructor */
2420
2421static PyObject*
Thomas Wouters73e5a5b2006-06-08 15:35:45 +00002422xmlparser(PyObject* self_, PyObject* args, PyObject* kw)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002423{
2424 XMLParserObject* self;
2425 /* FIXME: does this need to be static? */
2426 static XML_Memory_Handling_Suite memory_handler;
2427
2428 PyObject* target = NULL;
2429 char* encoding = NULL;
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +00002430 static char* kwlist[] = { "target", "encoding", NULL };
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002431 if (!PyArg_ParseTupleAndKeywords(args, kw, "|Oz:XMLParser", kwlist,
2432 &target, &encoding))
2433 return NULL;
2434
2435#if defined(USE_PYEXPAT_CAPI)
2436 if (!expat_capi) {
2437 PyErr_SetString(
2438 PyExc_RuntimeError, "cannot load dispatch table from pyexpat"
2439 );
2440 return NULL;
2441 }
2442#endif
2443
2444 self = PyObject_New(XMLParserObject, &XMLParser_Type);
2445 if (self == NULL)
2446 return NULL;
2447
2448 self->entity = PyDict_New();
2449 if (!self->entity) {
2450 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002451 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002452 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002453
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002454 self->names = PyDict_New();
2455 if (!self->names) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002456 PyObject_Del(self->entity);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002457 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002458 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002459 }
2460
2461 memory_handler.malloc_fcn = PyObject_Malloc;
2462 memory_handler.realloc_fcn = PyObject_Realloc;
2463 memory_handler.free_fcn = PyObject_Free;
2464
2465 self->parser = EXPAT(ParserCreate_MM)(encoding, &memory_handler, "}");
2466 if (!self->parser) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002467 PyObject_Del(self->names);
2468 PyObject_Del(self->entity);
2469 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002470 PyErr_NoMemory();
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002471 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002472 }
2473
2474 /* setup target handlers */
2475 if (!target) {
2476 target = treebuilder_new();
2477 if (!target) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002478 EXPAT(ParserFree)(self->parser);
2479 PyObject_Del(self->names);
2480 PyObject_Del(self->entity);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002481 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002482 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002483 }
2484 } else
2485 Py_INCREF(target);
2486 self->target = target;
2487
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002488 self->handle_start = PyObject_GetAttrString(target, "start");
2489 self->handle_data = PyObject_GetAttrString(target, "data");
2490 self->handle_end = PyObject_GetAttrString(target, "end");
2491 self->handle_comment = PyObject_GetAttrString(target, "comment");
2492 self->handle_pi = PyObject_GetAttrString(target, "pi");
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002493 self->handle_close = PyObject_GetAttrString(target, "close");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002494
2495 PyErr_Clear();
2496
2497 /* configure parser */
2498 EXPAT(SetUserData)(self->parser, self);
2499 EXPAT(SetElementHandler)(
2500 self->parser,
2501 (XML_StartElementHandler) expat_start_handler,
2502 (XML_EndElementHandler) expat_end_handler
2503 );
2504 EXPAT(SetDefaultHandlerExpand)(
2505 self->parser,
2506 (XML_DefaultHandler) expat_default_handler
2507 );
2508 EXPAT(SetCharacterDataHandler)(
2509 self->parser,
2510 (XML_CharacterDataHandler) expat_data_handler
2511 );
2512 if (self->handle_comment)
2513 EXPAT(SetCommentHandler)(
2514 self->parser,
2515 (XML_CommentHandler) expat_comment_handler
2516 );
2517 if (self->handle_pi)
2518 EXPAT(SetProcessingInstructionHandler)(
2519 self->parser,
2520 (XML_ProcessingInstructionHandler) expat_pi_handler
2521 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002522 EXPAT(SetUnknownEncodingHandler)(
2523 self->parser,
2524 (XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL
2525 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002526
2527 ALLOC(sizeof(XMLParserObject), "create expatparser");
2528
2529 return (PyObject*) self;
2530}
2531
2532static void
2533xmlparser_dealloc(XMLParserObject* self)
2534{
2535 EXPAT(ParserFree)(self->parser);
2536
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002537 Py_XDECREF(self->handle_close);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002538 Py_XDECREF(self->handle_pi);
2539 Py_XDECREF(self->handle_comment);
2540 Py_XDECREF(self->handle_end);
2541 Py_XDECREF(self->handle_data);
2542 Py_XDECREF(self->handle_start);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002543
2544 Py_DECREF(self->target);
2545 Py_DECREF(self->entity);
2546 Py_DECREF(self->names);
2547
2548 RELEASE(sizeof(XMLParserObject), "destroy expatparser");
2549
2550 PyObject_Del(self);
2551}
2552
2553/* -------------------------------------------------------------------- */
2554/* methods (in alphabetical order) */
2555
2556LOCAL(PyObject*)
2557expat_parse(XMLParserObject* self, char* data, int data_len, int final)
2558{
2559 int ok;
2560
2561 ok = EXPAT(Parse)(self->parser, data, data_len, final);
2562
2563 if (PyErr_Occurred())
2564 return NULL;
2565
2566 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002567 expat_set_error(
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002568 EXPAT(ErrorString)(EXPAT(GetErrorCode)(self->parser)),
2569 EXPAT(GetErrorLineNumber)(self->parser),
2570 EXPAT(GetErrorColumnNumber)(self->parser)
2571 );
2572 return NULL;
2573 }
2574
2575 Py_RETURN_NONE;
2576}
2577
2578static PyObject*
2579xmlparser_close(XMLParserObject* self, PyObject* args)
2580{
2581 /* end feeding data to parser */
2582
2583 PyObject* res;
2584 if (!PyArg_ParseTuple(args, ":close"))
2585 return NULL;
2586
2587 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002588 if (!res)
2589 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002590
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002591 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002592 Py_DECREF(res);
2593 return treebuilder_done((TreeBuilderObject*) self->target);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002594 } if (self->handle_close) {
2595 Py_DECREF(res);
2596 return PyObject_CallFunction(self->handle_close, "");
2597 } else
2598 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002599}
2600
2601static PyObject*
2602xmlparser_feed(XMLParserObject* self, PyObject* args)
2603{
2604 /* feed data to parser */
2605
2606 char* data;
2607 int data_len;
2608 if (!PyArg_ParseTuple(args, "s#:feed", &data, &data_len))
2609 return NULL;
2610
2611 return expat_parse(self, data, data_len, 0);
2612}
2613
2614static PyObject*
2615xmlparser_parse(XMLParserObject* self, PyObject* args)
2616{
2617 /* (internal) parse until end of input stream */
2618
2619 PyObject* reader;
2620 PyObject* buffer;
2621 PyObject* res;
2622
2623 PyObject* fileobj;
2624 if (!PyArg_ParseTuple(args, "O:_parse", &fileobj))
2625 return NULL;
2626
2627 reader = PyObject_GetAttrString(fileobj, "read");
2628 if (!reader)
2629 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002630
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002631 /* read from open file object */
2632 for (;;) {
2633
2634 buffer = PyObject_CallFunction(reader, "i", 64*1024);
2635
2636 if (!buffer) {
2637 /* read failed (e.g. due to KeyboardInterrupt) */
2638 Py_DECREF(reader);
2639 return NULL;
2640 }
2641
Christian Heimes72b710a2008-05-26 13:28:38 +00002642 if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002643 Py_DECREF(buffer);
2644 break;
2645 }
2646
2647 res = expat_parse(
Christian Heimes72b710a2008-05-26 13:28:38 +00002648 self, PyBytes_AS_STRING(buffer), PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002649 );
2650
2651 Py_DECREF(buffer);
2652
2653 if (!res) {
2654 Py_DECREF(reader);
2655 return NULL;
2656 }
2657 Py_DECREF(res);
2658
2659 }
2660
2661 Py_DECREF(reader);
2662
2663 res = expat_parse(self, "", 0, 1);
2664
2665 if (res && TreeBuilder_CheckExact(self->target)) {
2666 Py_DECREF(res);
2667 return treebuilder_done((TreeBuilderObject*) self->target);
2668 }
2669
2670 return res;
2671}
2672
2673static PyObject*
2674xmlparser_setevents(XMLParserObject* self, PyObject* args)
2675{
2676 /* activate element event reporting */
2677
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002678 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002679 TreeBuilderObject* target;
2680
2681 PyObject* events; /* event collector */
2682 PyObject* event_set = Py_None;
2683 if (!PyArg_ParseTuple(args, "O!|O:_setevents", &PyList_Type, &events,
2684 &event_set))
2685 return NULL;
2686
2687 if (!TreeBuilder_CheckExact(self->target)) {
2688 PyErr_SetString(
2689 PyExc_TypeError,
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01002690 "event handling only supported for ElementTree.TreeBuilder "
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002691 "targets"
2692 );
2693 return NULL;
2694 }
2695
2696 target = (TreeBuilderObject*) self->target;
2697
2698 Py_INCREF(events);
2699 Py_XDECREF(target->events);
2700 target->events = events;
2701
2702 /* clear out existing events */
2703 Py_XDECREF(target->start_event_obj); target->start_event_obj = NULL;
2704 Py_XDECREF(target->end_event_obj); target->end_event_obj = NULL;
2705 Py_XDECREF(target->start_ns_event_obj); target->start_ns_event_obj = NULL;
2706 Py_XDECREF(target->end_ns_event_obj); target->end_ns_event_obj = NULL;
2707
2708 if (event_set == Py_None) {
2709 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002710 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002711 Py_RETURN_NONE;
2712 }
2713
2714 if (!PyTuple_Check(event_set)) /* FIXME: handle arbitrary sequences */
2715 goto error;
2716
2717 for (i = 0; i < PyTuple_GET_SIZE(event_set); i++) {
2718 PyObject* item = PyTuple_GET_ITEM(event_set, i);
2719 char* event;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002720 if (PyUnicode_Check(item)) {
2721 event = _PyUnicode_AsString(item);
Victor Stinner0477bf32010-03-22 12:11:44 +00002722 if (event == NULL)
2723 goto error;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002724 } else if (PyBytes_Check(item))
2725 event = PyBytes_AS_STRING(item);
2726 else {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002727 goto error;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002728 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002729 if (strcmp(event, "start") == 0) {
2730 Py_INCREF(item);
2731 target->start_event_obj = item;
2732 } else if (strcmp(event, "end") == 0) {
2733 Py_INCREF(item);
2734 Py_XDECREF(target->end_event_obj);
2735 target->end_event_obj = item;
2736 } else if (strcmp(event, "start-ns") == 0) {
2737 Py_INCREF(item);
2738 Py_XDECREF(target->start_ns_event_obj);
2739 target->start_ns_event_obj = item;
2740 EXPAT(SetNamespaceDeclHandler)(
2741 self->parser,
2742 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2743 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2744 );
2745 } else if (strcmp(event, "end-ns") == 0) {
2746 Py_INCREF(item);
2747 Py_XDECREF(target->end_ns_event_obj);
2748 target->end_ns_event_obj = item;
2749 EXPAT(SetNamespaceDeclHandler)(
2750 self->parser,
2751 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2752 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2753 );
2754 } else {
2755 PyErr_Format(
2756 PyExc_ValueError,
2757 "unknown event '%s'", event
2758 );
2759 return NULL;
2760 }
2761 }
2762
2763 Py_RETURN_NONE;
2764
2765 error:
2766 PyErr_SetString(
2767 PyExc_TypeError,
2768 "invalid event tuple"
2769 );
2770 return NULL;
2771}
2772
2773static PyMethodDef xmlparser_methods[] = {
2774 {"feed", (PyCFunction) xmlparser_feed, METH_VARARGS},
2775 {"close", (PyCFunction) xmlparser_close, METH_VARARGS},
2776 {"_parse", (PyCFunction) xmlparser_parse, METH_VARARGS},
2777 {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
2778 {NULL, NULL}
2779};
2780
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002781static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002782xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002783{
Alexander Belopolskye239d232010-12-08 23:31:48 +00002784 if (PyUnicode_Check(nameobj)) {
2785 PyObject* res;
2786 if (PyUnicode_CompareWithASCIIString(nameobj, "entity") == 0)
2787 res = self->entity;
2788 else if (PyUnicode_CompareWithASCIIString(nameobj, "target") == 0)
2789 res = self->target;
2790 else if (PyUnicode_CompareWithASCIIString(nameobj, "version") == 0) {
2791 return PyUnicode_FromFormat(
2792 "Expat %d.%d.%d", XML_MAJOR_VERSION,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002793 XML_MINOR_VERSION, XML_MICRO_VERSION);
Alexander Belopolskye239d232010-12-08 23:31:48 +00002794 }
2795 else
2796 goto generic;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002797
Alexander Belopolskye239d232010-12-08 23:31:48 +00002798 Py_INCREF(res);
2799 return res;
2800 }
2801 generic:
2802 return PyObject_GenericGetAttr((PyObject*) self, nameobj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002803}
2804
Neal Norwitz227b5332006-03-22 09:28:35 +00002805static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002806 PyVarObject_HEAD_INIT(NULL, 0)
2807 "XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002808 /* methods */
2809 (destructor)xmlparser_dealloc, /* tp_dealloc */
2810 0, /* tp_print */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002811 0, /* tp_getattr */
2812 0, /* tp_setattr */
Mark Dickinsone94c6792009-02-02 20:36:42 +00002813 0, /* tp_reserved */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002814 0, /* tp_repr */
2815 0, /* tp_as_number */
2816 0, /* tp_as_sequence */
2817 0, /* tp_as_mapping */
2818 0, /* tp_hash */
2819 0, /* tp_call */
2820 0, /* tp_str */
2821 (getattrofunc)xmlparser_getattro, /* tp_getattro */
2822 0, /* tp_setattro */
2823 0, /* tp_as_buffer */
2824 Py_TPFLAGS_DEFAULT, /* tp_flags */
2825 0, /* tp_doc */
2826 0, /* tp_traverse */
2827 0, /* tp_clear */
2828 0, /* tp_richcompare */
2829 0, /* tp_weaklistoffset */
2830 0, /* tp_iter */
2831 0, /* tp_iternext */
2832 xmlparser_methods, /* tp_methods */
2833 0, /* tp_members */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002834};
2835
2836#endif
2837
2838/* ==================================================================== */
2839/* python module interface */
2840
2841static PyMethodDef _functions[] = {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002842 {"SubElement", (PyCFunction) subelement, METH_VARARGS|METH_KEYWORDS},
2843 {"TreeBuilder", (PyCFunction) treebuilder, METH_VARARGS},
2844#if defined(USE_EXPAT)
2845 {"XMLParser", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002846#endif
2847 {NULL, NULL}
2848};
2849
Martin v. Löwis1a214512008-06-11 05:26:20 +00002850
2851static struct PyModuleDef _elementtreemodule = {
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00002852 PyModuleDef_HEAD_INIT,
2853 "_elementtree",
2854 NULL,
2855 -1,
2856 _functions,
2857 NULL,
2858 NULL,
2859 NULL,
2860 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00002861};
2862
Neal Norwitzf6657e62006-12-28 04:47:50 +00002863PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00002864PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002865{
2866 PyObject* m;
2867 PyObject* g;
2868 char* bootstrap;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002869
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002870 /* Initialize object types */
2871 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00002872 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002873 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00002874 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002875#if defined(USE_EXPAT)
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002876 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00002877 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002878#endif
2879
Martin v. Löwis1a214512008-06-11 05:26:20 +00002880 m = PyModule_Create(&_elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002881 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00002882 return NULL;
2883
2884 /* The code below requires that the module gets already added
2885 to sys.modules. */
2886 PyDict_SetItemString(PyImport_GetModuleDict(),
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00002887 _elementtreemodule.m_name,
2888 m);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002889
2890 /* python glue code */
2891
2892 g = PyDict_New();
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002893 if (!g)
Martin v. Löwis1a214512008-06-11 05:26:20 +00002894 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002895
2896 PyDict_SetItemString(g, "__builtins__", PyEval_GetBuiltins());
2897
2898 bootstrap = (
2899
Florent Xiclunaf4bdf4e2012-02-11 11:28:16 +01002900 "from copy import deepcopy\n"
2901 "from xml.etree import ElementPath\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002902
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002903 "def iter(node, tag=None):\n" /* helper */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002904 " if tag == '*':\n"
2905 " tag = None\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002906 " if tag is None or node.tag == tag:\n"
2907 " yield node\n"
2908 " for node in node:\n"
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002909 " for node in iter(node, tag):\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002910 " yield node\n"
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002911
2912 "def itertext(node):\n" /* helper */
2913 " if node.text:\n"
2914 " yield node.text\n"
2915 " for e in node:\n"
2916 " for s in e.itertext():\n"
2917 " yield s\n"
2918 " if e.tail:\n"
2919 " yield e.tail\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002920
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002921 );
2922
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002923 if (!PyRun_String(bootstrap, Py_file_input, g, NULL))
2924 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002925
2926 elementpath_obj = PyDict_GetItemString(g, "ElementPath");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002927 elementtree_deepcopy_obj = PyDict_GetItemString(g, "deepcopy");
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002928 elementtree_iter_obj = PyDict_GetItemString(g, "iter");
2929 elementtree_itertext_obj = PyDict_GetItemString(g, "itertext");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002930
2931#if defined(USE_PYEXPAT_CAPI)
2932 /* link against pyexpat, if possible */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002933 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
2934 if (expat_capi) {
2935 /* check that it's usable */
2936 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
2937 expat_capi->size < sizeof(struct PyExpat_CAPI) ||
2938 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
2939 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
2940 expat_capi->MICRO_VERSION != XML_MICRO_VERSION)
2941 expat_capi = NULL;
2942 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002943#endif
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002944
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002945 elementtree_parseerror_obj = PyErr_NewException(
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01002946 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002947 );
2948 Py_INCREF(elementtree_parseerror_obj);
2949 PyModule_AddObject(m, "ParseError", elementtree_parseerror_obj);
2950
Eli Bendersky092af1f2012-03-04 07:14:03 +02002951 Py_INCREF((PyObject *)&Element_Type);
2952 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
2953
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002954 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002955}