blob: 179cadc5f5916a53d5da8d6c00c7dc9db41546be [file] [log] [blame]
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001/*
2 * ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003 * $Id: _elementtree.c 3473 2009-01-11 22:53:55Z fredrik $
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
5 * elementtree accelerator
6 *
7 * History:
8 * 1999-06-20 fl created (as part of sgmlop)
9 * 2001-05-29 fl effdom edition
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000010 * 2003-02-27 fl elementtree edition (alpha)
11 * 2004-06-03 fl updates for elementtree 1.2
Florent Xiclunaf15351d2010-03-13 23:24:31 +000012 * 2005-01-05 fl major optimization effort
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000013 * 2005-01-11 fl first public release (cElementTree 0.8)
14 * 2005-01-12 fl split element object into base and extras
15 * 2005-01-13 fl use tagged pointers for tail/text (cElementTree 0.9)
16 * 2005-01-17 fl added treebuilder close method
17 * 2005-01-17 fl fixed crash in getchildren
18 * 2005-01-18 fl removed observer api, added iterparse (cElementTree 0.9.3)
19 * 2005-01-23 fl revised iterparse api; added namespace event support (0.9.8)
20 * 2005-01-26 fl added VERSION module property (cElementTree 1.0)
21 * 2005-01-28 fl added remove method (1.0.1)
22 * 2005-03-01 fl added iselement function; fixed makeelement aliasing (1.0.2)
23 * 2005-03-13 fl export Comment and ProcessingInstruction/PI helpers
24 * 2005-03-26 fl added Comment and PI support to XMLParser
25 * 2005-03-27 fl event optimizations; complain about bogus events
26 * 2005-08-08 fl fixed read error handling in parse
27 * 2005-08-11 fl added runtime test for copy workaround (1.0.3)
28 * 2005-12-13 fl added expat_capi support (for xml.etree) (1.0.4)
29 * 2005-12-16 fl added support for non-standard encodings
Fredrik Lundh44ed4db2006-03-12 21:06:35 +000030 * 2006-03-08 fl fixed a couple of potential null-refs and leaks
31 * 2006-03-12 fl merge in 2.5 ssize_t changes
Florent Xiclunaf15351d2010-03-13 23:24:31 +000032 * 2007-08-25 fl call custom builder's close method from XMLParser
33 * 2007-08-31 fl added iter, extend from ET 1.3
34 * 2007-09-01 fl fixed ParseError exception, setslice source type, etc
35 * 2007-09-03 fl fixed handling of negative insert indexes
36 * 2007-09-04 fl added itertext from ET 1.3
37 * 2007-09-06 fl added position attribute to ParseError exception
38 * 2008-06-06 fl delay error reporting in iterparse (from Hrvoje Niksic)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000039 *
Florent Xiclunaf15351d2010-03-13 23:24:31 +000040 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
41 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000042 *
43 * info@pythonware.com
44 * http://www.pythonware.com
45 */
46
Fredrik Lundh6d52b552005-12-16 22:06:43 +000047/* Licensed to PSF under a Contributor Agreement. */
Florent Xiclunaf15351d2010-03-13 23:24:31 +000048/* See http://www.python.org/psf/license for licensing details. */
Fredrik Lundh6d52b552005-12-16 22:06:43 +000049
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000050#include "Python.h"
51
Thomas Wouters00ee7ba2006-08-21 19:07:27 +000052#define VERSION "1.0.6"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000053
54/* -------------------------------------------------------------------- */
55/* configuration */
56
57/* Leave defined to include the expat-based XMLParser type */
58#define USE_EXPAT
59
Florent Xiclunaf15351d2010-03-13 23:24:31 +000060/* Define to do all expat calls via pyexpat's embedded expat library */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000061/* #define USE_PYEXPAT_CAPI */
62
63/* An element can hold this many children without extra memory
64 allocations. */
65#define STATIC_CHILDREN 4
66
67/* For best performance, chose a value so that 80-90% of all nodes
68 have no more than the given number of children. Set this to zero
69 to minimize the size of the element structure itself (this only
70 helps if you have lots of leaf nodes with attributes). */
71
72/* Also note that pymalloc always allocates blocks in multiples of
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010073 eight bytes. For the current C version of ElementTree, this means
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000074 that the number of children should be an even number, at least on
75 32-bit platforms. */
76
77/* -------------------------------------------------------------------- */
78
79#if 0
80static int memory = 0;
81#define ALLOC(size, comment)\
82do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
83#define RELEASE(size, comment)\
84do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
85#else
86#define ALLOC(size, comment)
87#define RELEASE(size, comment)
88#endif
89
90/* compiler tweaks */
91#if defined(_MSC_VER)
92#define LOCAL(type) static __inline type __fastcall
93#else
94#define LOCAL(type) static type
95#endif
96
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000097/* macros used to store 'join' flags in string object pointers. note
98 that all use of text and tail as object pointers must be wrapped in
99 JOIN_OBJ. see comments in the ElementObject definition for more
100 info. */
101#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
102#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
103#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~1))
104
105/* glue functions (see the init function for details) */
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000106static PyObject* elementtree_parseerror_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000107static PyObject* elementtree_deepcopy_obj;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000108static PyObject* elementtree_iter_obj;
109static PyObject* elementtree_itertext_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000110static PyObject* elementpath_obj;
111
112/* helpers */
113
114LOCAL(PyObject*)
115deepcopy(PyObject* object, PyObject* memo)
116{
117 /* do a deep copy of the given object */
118
119 PyObject* args;
120 PyObject* result;
121
122 if (!elementtree_deepcopy_obj) {
123 PyErr_SetString(
124 PyExc_RuntimeError,
125 "deepcopy helper not found"
126 );
127 return NULL;
128 }
129
130 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000131 if (!args)
132 return NULL;
133
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000134 Py_INCREF(object); PyTuple_SET_ITEM(args, 0, (PyObject*) object);
135 Py_INCREF(memo); PyTuple_SET_ITEM(args, 1, (PyObject*) memo);
136
137 result = PyObject_CallObject(elementtree_deepcopy_obj, args);
138
139 Py_DECREF(args);
140
141 return result;
142}
143
144LOCAL(PyObject*)
145list_join(PyObject* list)
146{
147 /* join list elements (destroying the list in the process) */
148
149 PyObject* joiner;
150 PyObject* function;
151 PyObject* args;
152 PyObject* result;
153
154 switch (PyList_GET_SIZE(list)) {
155 case 0:
156 Py_DECREF(list);
Christian Heimes72b710a2008-05-26 13:28:38 +0000157 return PyBytes_FromString("");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000158 case 1:
159 result = PyList_GET_ITEM(list, 0);
160 Py_INCREF(result);
161 Py_DECREF(list);
162 return result;
163 }
164
165 /* two or more elements: slice out a suitable separator from the
166 first member, and use that to join the entire list */
167
168 joiner = PySequence_GetSlice(PyList_GET_ITEM(list, 0), 0, 0);
169 if (!joiner)
170 return NULL;
171
172 function = PyObject_GetAttrString(joiner, "join");
173 if (!function) {
174 Py_DECREF(joiner);
175 return NULL;
176 }
177
178 args = PyTuple_New(1);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000179 if (!args)
180 return NULL;
181
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000182 PyTuple_SET_ITEM(args, 0, list);
183
184 result = PyObject_CallObject(function, args);
185
186 Py_DECREF(args); /* also removes list */
187 Py_DECREF(function);
188 Py_DECREF(joiner);
189
190 return result;
191}
192
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000193/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200194/* the Element type */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000195
196typedef struct {
197
198 /* attributes (a dictionary object), or None if no attributes */
199 PyObject* attrib;
200
201 /* child elements */
202 int length; /* actual number of items */
203 int allocated; /* allocated items */
204
205 /* this either points to _children or to a malloced buffer */
206 PyObject* *children;
207
208 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100209
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000210} ElementObjectExtra;
211
212typedef struct {
213 PyObject_HEAD
214
215 /* element tag (a string). */
216 PyObject* tag;
217
218 /* text before first child. note that this is a tagged pointer;
219 use JOIN_OBJ to get the object pointer. the join flag is used
220 to distinguish lists created by the tree builder from lists
221 assigned to the attribute by application code; the former
222 should be joined before being returned to the user, the latter
223 should be left intact. */
224 PyObject* text;
225
226 /* text after this element, in parent. note that this is a tagged
227 pointer; use JOIN_OBJ to get the object pointer. */
228 PyObject* tail;
229
230 ElementObjectExtra* extra;
231
232} ElementObject;
233
Neal Norwitz227b5332006-03-22 09:28:35 +0000234static PyTypeObject Element_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000235
Christian Heimes90aa7642007-12-19 02:45:37 +0000236#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000237
238/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200239/* Element constructors and destructor */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000240
241LOCAL(int)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200242create_extra(ElementObject* self, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000243{
244 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
245 if (!self->extra)
246 return -1;
247
248 if (!attrib)
249 attrib = Py_None;
250
251 Py_INCREF(attrib);
252 self->extra->attrib = attrib;
253
254 self->extra->length = 0;
255 self->extra->allocated = STATIC_CHILDREN;
256 self->extra->children = self->extra->_children;
257
258 return 0;
259}
260
261LOCAL(void)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200262dealloc_extra(ElementObject* self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000263{
264 int i;
265
266 Py_DECREF(self->extra->attrib);
267
268 for (i = 0; i < self->extra->length; i++)
269 Py_DECREF(self->extra->children[i]);
270
271 if (self->extra->children != self->extra->_children)
272 PyObject_Free(self->extra->children);
273
274 PyObject_Free(self->extra);
275}
276
Eli Bendersky092af1f2012-03-04 07:14:03 +0200277/* Convenience internal function to create new Element objects with the given
278 * tag and attributes.
279*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000280LOCAL(PyObject*)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200281create_new_element(PyObject* tag, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000282{
283 ElementObject* self;
284
285 self = PyObject_New(ElementObject, &Element_Type);
286 if (self == NULL)
287 return NULL;
288
289 /* use None for empty dictionaries */
290 if (PyDict_CheckExact(attrib) && !PyDict_Size(attrib))
291 attrib = Py_None;
292
293 self->extra = NULL;
294
295 if (attrib != Py_None) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200296 if (create_extra(self, attrib) < 0) {
Thomas Wouters477c8d52006-05-27 19:21:47 +0000297 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000298 return NULL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000299 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000300 }
301
302 Py_INCREF(tag);
303 self->tag = tag;
304
305 Py_INCREF(Py_None);
306 self->text = Py_None;
307
308 Py_INCREF(Py_None);
309 self->tail = Py_None;
310
311 ALLOC(sizeof(ElementObject), "create element");
312
313 return (PyObject*) self;
314}
315
Eli Bendersky092af1f2012-03-04 07:14:03 +0200316static PyObject *
317element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
318{
319 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
320 if (e != NULL) {
321 Py_INCREF(Py_None);
322 e->tag = Py_None;
323
324 Py_INCREF(Py_None);
325 e->text = Py_None;
326
327 Py_INCREF(Py_None);
328 e->tail = Py_None;
329
330 e->extra = NULL;
331 }
332 return (PyObject *)e;
333}
334
335static int
336element_init(PyObject *self, PyObject *args, PyObject *kwds)
337{
338 PyObject *tag;
339 PyObject *tmp;
340 PyObject *attrib = NULL;
341 ElementObject *self_elem;
342
343 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
344 return -1;
345
346 if (attrib || kwds) {
347 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
348 if (!attrib)
349 return -1;
350 if (kwds)
351 PyDict_Update(attrib, kwds);
352 } else {
353 Py_INCREF(Py_None);
354 attrib = Py_None;
355 }
356
357 self_elem = (ElementObject *)self;
358
359 /* Use None for empty dictionaries */
360 if (PyDict_CheckExact(attrib) && PyDict_Size(attrib) == 0) {
361 Py_INCREF(Py_None);
362 attrib = Py_None;
363 }
364
365 if (attrib != Py_None) {
366 if (create_extra(self_elem, attrib) < 0) {
367 PyObject_Del(self_elem);
368 return -1;
369 }
370 }
371
372 /* If create_extra needed attrib, it took a reference to it, so we can
373 * release ours anyway.
374 */
375 Py_DECREF(attrib);
376
377 /* Replace the objects already pointed to by tag, text and tail. */
378 tmp = self_elem->tag;
379 self_elem->tag = tag;
380 Py_INCREF(tag);
381 Py_DECREF(tmp);
382
383 tmp = self_elem->text;
384 self_elem->text = Py_None;
385 Py_INCREF(Py_None);
386 Py_DECREF(JOIN_OBJ(tmp));
387
388 tmp = self_elem->tail;
389 self_elem->tail = Py_None;
390 Py_INCREF(Py_None);
391 Py_DECREF(JOIN_OBJ(tmp));
392
393 return 0;
394}
395
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000396LOCAL(int)
397element_resize(ElementObject* self, int extra)
398{
399 int size;
400 PyObject* *children;
401
402 /* make sure self->children can hold the given number of extra
403 elements. set an exception and return -1 if allocation failed */
404
405 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200406 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000407
408 size = self->extra->length + extra;
409
410 if (size > self->extra->allocated) {
411 /* use Python 2.4's list growth strategy */
412 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000413 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100414 * which needs at least 4 bytes.
415 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000416 * be safe.
417 */
418 size = size ? size : 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000419 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000420 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100421 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000422 * false alarm always assume at least one child to be safe.
423 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000424 children = PyObject_Realloc(self->extra->children,
425 size * sizeof(PyObject*));
426 if (!children)
427 goto nomemory;
428 } else {
429 children = PyObject_Malloc(size * sizeof(PyObject*));
430 if (!children)
431 goto nomemory;
432 /* copy existing children from static area to malloc buffer */
433 memcpy(children, self->extra->children,
434 self->extra->length * sizeof(PyObject*));
435 }
436 self->extra->children = children;
437 self->extra->allocated = size;
438 }
439
440 return 0;
441
442 nomemory:
443 PyErr_NoMemory();
444 return -1;
445}
446
447LOCAL(int)
448element_add_subelement(ElementObject* self, PyObject* element)
449{
450 /* add a child element to a parent */
451
452 if (element_resize(self, 1) < 0)
453 return -1;
454
455 Py_INCREF(element);
456 self->extra->children[self->extra->length] = element;
457
458 self->extra->length++;
459
460 return 0;
461}
462
463LOCAL(PyObject*)
464element_get_attrib(ElementObject* self)
465{
466 /* return borrowed reference to attrib dictionary */
467 /* note: this function assumes that the extra section exists */
468
469 PyObject* res = self->extra->attrib;
470
471 if (res == Py_None) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000472 Py_DECREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000473 /* create missing dictionary */
474 res = PyDict_New();
475 if (!res)
476 return NULL;
477 self->extra->attrib = res;
478 }
479
480 return res;
481}
482
483LOCAL(PyObject*)
484element_get_text(ElementObject* self)
485{
486 /* return borrowed reference to text attribute */
487
488 PyObject* res = self->text;
489
490 if (JOIN_GET(res)) {
491 res = JOIN_OBJ(res);
492 if (PyList_CheckExact(res)) {
493 res = list_join(res);
494 if (!res)
495 return NULL;
496 self->text = res;
497 }
498 }
499
500 return res;
501}
502
503LOCAL(PyObject*)
504element_get_tail(ElementObject* self)
505{
506 /* return borrowed reference to text attribute */
507
508 PyObject* res = self->tail;
509
510 if (JOIN_GET(res)) {
511 res = JOIN_OBJ(res);
512 if (PyList_CheckExact(res)) {
513 res = list_join(res);
514 if (!res)
515 return NULL;
516 self->tail = res;
517 }
518 }
519
520 return res;
521}
522
523static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000524subelement(PyObject* self, PyObject* args, PyObject* kw)
525{
526 PyObject* elem;
527
528 ElementObject* parent;
529 PyObject* tag;
530 PyObject* attrib = NULL;
531 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
532 &Element_Type, &parent, &tag,
533 &PyDict_Type, &attrib))
534 return NULL;
535
536 if (attrib || kw) {
537 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
538 if (!attrib)
539 return NULL;
540 if (kw)
541 PyDict_Update(attrib, kw);
542 } else {
543 Py_INCREF(Py_None);
544 attrib = Py_None;
545 }
546
Eli Bendersky092af1f2012-03-04 07:14:03 +0200547 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000548
549 Py_DECREF(attrib);
550
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000551 if (element_add_subelement(parent, elem) < 0) {
552 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000553 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000554 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000555
556 return elem;
557}
558
559static void
560element_dealloc(ElementObject* self)
561{
562 if (self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200563 dealloc_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000564
565 /* discard attributes */
566 Py_DECREF(self->tag);
567 Py_DECREF(JOIN_OBJ(self->text));
568 Py_DECREF(JOIN_OBJ(self->tail));
569
570 RELEASE(sizeof(ElementObject), "destroy element");
571
Eli Bendersky092af1f2012-03-04 07:14:03 +0200572 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000573}
574
575/* -------------------------------------------------------------------- */
576/* methods (in alphabetical order) */
577
578static PyObject*
579element_append(ElementObject* self, PyObject* args)
580{
581 PyObject* element;
582 if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element))
583 return NULL;
584
585 if (element_add_subelement(self, element) < 0)
586 return NULL;
587
588 Py_RETURN_NONE;
589}
590
591static PyObject*
592element_clear(ElementObject* self, PyObject* args)
593{
594 if (!PyArg_ParseTuple(args, ":clear"))
595 return NULL;
596
597 if (self->extra) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200598 dealloc_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000599 self->extra = NULL;
600 }
601
602 Py_INCREF(Py_None);
603 Py_DECREF(JOIN_OBJ(self->text));
604 self->text = Py_None;
605
606 Py_INCREF(Py_None);
607 Py_DECREF(JOIN_OBJ(self->tail));
608 self->tail = Py_None;
609
610 Py_RETURN_NONE;
611}
612
613static PyObject*
614element_copy(ElementObject* self, PyObject* args)
615{
616 int i;
617 ElementObject* element;
618
619 if (!PyArg_ParseTuple(args, ":__copy__"))
620 return NULL;
621
Eli Bendersky092af1f2012-03-04 07:14:03 +0200622 element = (ElementObject*) create_new_element(
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000623 self->tag, (self->extra) ? self->extra->attrib : Py_None
624 );
625 if (!element)
626 return NULL;
627
628 Py_DECREF(JOIN_OBJ(element->text));
629 element->text = self->text;
630 Py_INCREF(JOIN_OBJ(element->text));
631
632 Py_DECREF(JOIN_OBJ(element->tail));
633 element->tail = self->tail;
634 Py_INCREF(JOIN_OBJ(element->tail));
635
636 if (self->extra) {
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100637
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000638 if (element_resize(element, self->extra->length) < 0) {
639 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000640 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000641 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000642
643 for (i = 0; i < self->extra->length; i++) {
644 Py_INCREF(self->extra->children[i]);
645 element->extra->children[i] = self->extra->children[i];
646 }
647
648 element->extra->length = self->extra->length;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100649
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000650 }
651
652 return (PyObject*) element;
653}
654
655static PyObject*
656element_deepcopy(ElementObject* self, PyObject* args)
657{
658 int i;
659 ElementObject* element;
660 PyObject* tag;
661 PyObject* attrib;
662 PyObject* text;
663 PyObject* tail;
664 PyObject* id;
665
666 PyObject* memo;
667 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
668 return NULL;
669
670 tag = deepcopy(self->tag, memo);
671 if (!tag)
672 return NULL;
673
674 if (self->extra) {
675 attrib = deepcopy(self->extra->attrib, memo);
676 if (!attrib) {
677 Py_DECREF(tag);
678 return NULL;
679 }
680 } else {
681 Py_INCREF(Py_None);
682 attrib = Py_None;
683 }
684
Eli Bendersky092af1f2012-03-04 07:14:03 +0200685 element = (ElementObject*) create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000686
687 Py_DECREF(tag);
688 Py_DECREF(attrib);
689
690 if (!element)
691 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100692
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000693 text = deepcopy(JOIN_OBJ(self->text), memo);
694 if (!text)
695 goto error;
696 Py_DECREF(element->text);
697 element->text = JOIN_SET(text, JOIN_GET(self->text));
698
699 tail = deepcopy(JOIN_OBJ(self->tail), memo);
700 if (!tail)
701 goto error;
702 Py_DECREF(element->tail);
703 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
704
705 if (self->extra) {
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100706
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000707 if (element_resize(element, self->extra->length) < 0)
708 goto error;
709
710 for (i = 0; i < self->extra->length; i++) {
711 PyObject* child = deepcopy(self->extra->children[i], memo);
712 if (!child) {
713 element->extra->length = i;
714 goto error;
715 }
716 element->extra->children[i] = child;
717 }
718
719 element->extra->length = self->extra->length;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100720
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000721 }
722
723 /* add object to memo dictionary (so deepcopy won't visit it again) */
Christian Heimes217cfd12007-12-02 14:31:20 +0000724 id = PyLong_FromLong((Py_uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000725 if (!id)
726 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000727
728 i = PyDict_SetItem(memo, id, (PyObject*) element);
729
730 Py_DECREF(id);
731
732 if (i < 0)
733 goto error;
734
735 return (PyObject*) element;
736
737 error:
738 Py_DECREF(element);
739 return NULL;
740}
741
742LOCAL(int)
743checkpath(PyObject* tag)
744{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000745 Py_ssize_t i;
746 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000747
748 /* check if a tag contains an xpath character */
749
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000750#define PATHCHAR(ch) \
751 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000752
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000753 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200754 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
755 void *data = PyUnicode_DATA(tag);
756 unsigned int kind = PyUnicode_KIND(tag);
757 for (i = 0; i < len; i++) {
758 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
759 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000760 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200761 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000762 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200763 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000764 return 1;
765 }
766 return 0;
767 }
Christian Heimes72b710a2008-05-26 13:28:38 +0000768 if (PyBytes_Check(tag)) {
769 char *p = PyBytes_AS_STRING(tag);
770 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000771 if (p[i] == '{')
772 check = 0;
773 else if (p[i] == '}')
774 check = 1;
775 else if (check && PATHCHAR(p[i]))
776 return 1;
777 }
778 return 0;
779 }
780
781 return 1; /* unknown type; might be path expression */
782}
783
784static PyObject*
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000785element_extend(ElementObject* self, PyObject* args)
786{
787 PyObject* seq;
788 Py_ssize_t i, seqlen = 0;
789
790 PyObject* seq_in;
791 if (!PyArg_ParseTuple(args, "O:extend", &seq_in))
792 return NULL;
793
794 seq = PySequence_Fast(seq_in, "");
795 if (!seq) {
796 PyErr_Format(
797 PyExc_TypeError,
798 "expected sequence, not \"%.200s\"", Py_TYPE(seq_in)->tp_name
799 );
800 return NULL;
801 }
802
803 seqlen = PySequence_Size(seq);
804 for (i = 0; i < seqlen; i++) {
805 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
806 if (element_add_subelement(self, element) < 0) {
807 Py_DECREF(seq);
808 return NULL;
809 }
810 }
811
812 Py_DECREF(seq);
813
814 Py_RETURN_NONE;
815}
816
817static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000818element_find(ElementObject* self, PyObject* args)
819{
820 int i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000821 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000822 PyObject* namespaces = Py_None;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200823
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000824 if (!PyArg_ParseTuple(args, "O|O:find", &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000825 return NULL;
826
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200827 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200828 _Py_IDENTIFIER(find);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200829 return _PyObject_CallMethodId(
830 elementpath_obj, &PyId_find, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000831 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200832 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000833
834 if (!self->extra)
835 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100836
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000837 for (i = 0; i < self->extra->length; i++) {
838 PyObject* item = self->extra->children[i];
839 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +0000840 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000841 Py_INCREF(item);
842 return item;
843 }
844 }
845
846 Py_RETURN_NONE;
847}
848
849static PyObject*
850element_findtext(ElementObject* self, PyObject* args)
851{
852 int i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000853 PyObject* tag;
854 PyObject* default_value = Py_None;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000855 PyObject* namespaces = Py_None;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200856 _Py_IDENTIFIER(findtext);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200857
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000858 if (!PyArg_ParseTuple(args, "O|OO:findtext", &tag, &default_value, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000859 return NULL;
860
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000861 if (checkpath(tag) || namespaces != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200862 return _PyObject_CallMethodId(
863 elementpath_obj, &PyId_findtext, "OOOO", self, tag, default_value, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000864 );
865
866 if (!self->extra) {
867 Py_INCREF(default_value);
868 return default_value;
869 }
870
871 for (i = 0; i < self->extra->length; i++) {
872 ElementObject* item = (ElementObject*) self->extra->children[i];
Mark Dickinson211c6252009-02-01 10:28:51 +0000873 if (Element_CheckExact(item) && (PyObject_RichCompareBool(item->tag, tag, Py_EQ) == 1)) {
874
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000875 PyObject* text = element_get_text(item);
876 if (text == Py_None)
Christian Heimes72b710a2008-05-26 13:28:38 +0000877 return PyBytes_FromString("");
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000878 Py_XINCREF(text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000879 return text;
880 }
881 }
882
883 Py_INCREF(default_value);
884 return default_value;
885}
886
887static PyObject*
888element_findall(ElementObject* self, PyObject* args)
889{
890 int i;
891 PyObject* out;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000892 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000893 PyObject* namespaces = Py_None;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200894
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000895 if (!PyArg_ParseTuple(args, "O|O:findall", &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000896 return NULL;
897
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200898 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200899 _Py_IDENTIFIER(findall);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200900 return _PyObject_CallMethodId(
901 elementpath_obj, &PyId_findall, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000902 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200903 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000904
905 out = PyList_New(0);
906 if (!out)
907 return NULL;
908
909 if (!self->extra)
910 return out;
911
912 for (i = 0; i < self->extra->length; i++) {
913 PyObject* item = self->extra->children[i];
914 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +0000915 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000916 if (PyList_Append(out, item) < 0) {
917 Py_DECREF(out);
918 return NULL;
919 }
920 }
921 }
922
923 return out;
924}
925
926static PyObject*
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000927element_iterfind(ElementObject* self, PyObject* args)
928{
929 PyObject* tag;
930 PyObject* namespaces = Py_None;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200931 _Py_IDENTIFIER(iterfind);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200932
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000933 if (!PyArg_ParseTuple(args, "O|O:iterfind", &tag, &namespaces))
934 return NULL;
935
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200936 return _PyObject_CallMethodId(
937 elementpath_obj, &PyId_iterfind, "OOO", self, tag, namespaces
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000938 );
939}
940
941static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000942element_get(ElementObject* self, PyObject* args)
943{
944 PyObject* value;
945
946 PyObject* key;
947 PyObject* default_value = Py_None;
948 if (!PyArg_ParseTuple(args, "O|O:get", &key, &default_value))
949 return NULL;
950
951 if (!self->extra || self->extra->attrib == Py_None)
952 value = default_value;
953 else {
954 value = PyDict_GetItem(self->extra->attrib, key);
955 if (!value)
956 value = default_value;
957 }
958
959 Py_INCREF(value);
960 return value;
961}
962
963static PyObject*
964element_getchildren(ElementObject* self, PyObject* args)
965{
966 int i;
967 PyObject* list;
968
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000969 /* FIXME: report as deprecated? */
970
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000971 if (!PyArg_ParseTuple(args, ":getchildren"))
972 return NULL;
973
974 if (!self->extra)
975 return PyList_New(0);
976
977 list = PyList_New(self->extra->length);
978 if (!list)
979 return NULL;
980
981 for (i = 0; i < self->extra->length; i++) {
982 PyObject* item = self->extra->children[i];
983 Py_INCREF(item);
984 PyList_SET_ITEM(list, i, item);
985 }
986
987 return list;
988}
989
990static PyObject*
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000991element_iter(ElementObject* self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000992{
993 PyObject* result;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100994
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000995 PyObject* tag = Py_None;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000996 if (!PyArg_ParseTuple(args, "|O:iter", &tag))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000997 return NULL;
998
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000999 if (!elementtree_iter_obj) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001000 PyErr_SetString(
1001 PyExc_RuntimeError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001002 "iter helper not found"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001003 );
1004 return NULL;
1005 }
1006
1007 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001008 if (!args)
1009 return NULL;
Neal Norwitz02876df2006-02-07 06:58:52 +00001010
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001011 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
1012 Py_INCREF(tag); PyTuple_SET_ITEM(args, 1, (PyObject*) tag);
1013
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001014 result = PyObject_CallObject(elementtree_iter_obj, args);
1015
1016 Py_DECREF(args);
1017
1018 return result;
1019}
1020
1021
1022static PyObject*
1023element_itertext(ElementObject* self, PyObject* args)
1024{
1025 PyObject* result;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001026
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001027 if (!PyArg_ParseTuple(args, ":itertext"))
1028 return NULL;
1029
1030 if (!elementtree_itertext_obj) {
1031 PyErr_SetString(
1032 PyExc_RuntimeError,
1033 "itertext helper not found"
1034 );
1035 return NULL;
1036 }
1037
1038 args = PyTuple_New(1);
1039 if (!args)
1040 return NULL;
1041
1042 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
1043
1044 result = PyObject_CallObject(elementtree_itertext_obj, args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001045
1046 Py_DECREF(args);
1047
1048 return result;
1049}
1050
1051static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001052element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001053{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001054 ElementObject* self = (ElementObject*) self_;
1055
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001056 if (!self->extra || index < 0 || index >= self->extra->length) {
1057 PyErr_SetString(
1058 PyExc_IndexError,
1059 "child index out of range"
1060 );
1061 return NULL;
1062 }
1063
1064 Py_INCREF(self->extra->children[index]);
1065 return self->extra->children[index];
1066}
1067
1068static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001069element_insert(ElementObject* self, PyObject* args)
1070{
1071 int i;
1072
1073 int index;
1074 PyObject* element;
1075 if (!PyArg_ParseTuple(args, "iO!:insert", &index,
1076 &Element_Type, &element))
1077 return NULL;
1078
1079 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001080 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001081
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001082 if (index < 0) {
1083 index += self->extra->length;
1084 if (index < 0)
1085 index = 0;
1086 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001087 if (index > self->extra->length)
1088 index = self->extra->length;
1089
1090 if (element_resize(self, 1) < 0)
1091 return NULL;
1092
1093 for (i = self->extra->length; i > index; i--)
1094 self->extra->children[i] = self->extra->children[i-1];
1095
1096 Py_INCREF(element);
1097 self->extra->children[index] = element;
1098
1099 self->extra->length++;
1100
1101 Py_RETURN_NONE;
1102}
1103
1104static PyObject*
1105element_items(ElementObject* self, PyObject* args)
1106{
1107 if (!PyArg_ParseTuple(args, ":items"))
1108 return NULL;
1109
1110 if (!self->extra || self->extra->attrib == Py_None)
1111 return PyList_New(0);
1112
1113 return PyDict_Items(self->extra->attrib);
1114}
1115
1116static PyObject*
1117element_keys(ElementObject* self, PyObject* args)
1118{
1119 if (!PyArg_ParseTuple(args, ":keys"))
1120 return NULL;
1121
1122 if (!self->extra || self->extra->attrib == Py_None)
1123 return PyList_New(0);
1124
1125 return PyDict_Keys(self->extra->attrib);
1126}
1127
Martin v. Löwis18e16552006-02-15 17:27:45 +00001128static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001129element_length(ElementObject* self)
1130{
1131 if (!self->extra)
1132 return 0;
1133
1134 return self->extra->length;
1135}
1136
1137static PyObject*
1138element_makeelement(PyObject* self, PyObject* args, PyObject* kw)
1139{
1140 PyObject* elem;
1141
1142 PyObject* tag;
1143 PyObject* attrib;
1144 if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib))
1145 return NULL;
1146
1147 attrib = PyDict_Copy(attrib);
1148 if (!attrib)
1149 return NULL;
1150
Eli Bendersky092af1f2012-03-04 07:14:03 +02001151 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001152
1153 Py_DECREF(attrib);
1154
1155 return elem;
1156}
1157
1158static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001159element_remove(ElementObject* self, PyObject* args)
1160{
1161 int i;
1162
1163 PyObject* element;
1164 if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element))
1165 return NULL;
1166
1167 if (!self->extra) {
1168 /* element has no children, so raise exception */
1169 PyErr_SetString(
1170 PyExc_ValueError,
1171 "list.remove(x): x not in list"
1172 );
1173 return NULL;
1174 }
1175
1176 for (i = 0; i < self->extra->length; i++) {
1177 if (self->extra->children[i] == element)
1178 break;
Mark Dickinson211c6252009-02-01 10:28:51 +00001179 if (PyObject_RichCompareBool(self->extra->children[i], element, Py_EQ) == 1)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001180 break;
1181 }
1182
1183 if (i == self->extra->length) {
1184 /* element is not in children, so raise exception */
1185 PyErr_SetString(
1186 PyExc_ValueError,
1187 "list.remove(x): x not in list"
1188 );
1189 return NULL;
1190 }
1191
1192 Py_DECREF(self->extra->children[i]);
1193
1194 self->extra->length--;
1195
1196 for (; i < self->extra->length; i++)
1197 self->extra->children[i] = self->extra->children[i+1];
1198
1199 Py_RETURN_NONE;
1200}
1201
1202static PyObject*
1203element_repr(ElementObject* self)
1204{
Eli Bendersky092af1f2012-03-04 07:14:03 +02001205 if (self->tag)
1206 return PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1207 else
1208 return PyUnicode_FromFormat("<Element at %p>", self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001209}
1210
1211static PyObject*
1212element_set(ElementObject* self, PyObject* args)
1213{
1214 PyObject* attrib;
1215
1216 PyObject* key;
1217 PyObject* value;
1218 if (!PyArg_ParseTuple(args, "OO:set", &key, &value))
1219 return NULL;
1220
1221 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001222 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001223
1224 attrib = element_get_attrib(self);
1225 if (!attrib)
1226 return NULL;
1227
1228 if (PyDict_SetItem(attrib, key, value) < 0)
1229 return NULL;
1230
1231 Py_RETURN_NONE;
1232}
1233
1234static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001235element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001236{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001237 ElementObject* self = (ElementObject*) self_;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001238 int i;
1239 PyObject* old;
1240
1241 if (!self->extra || index < 0 || index >= self->extra->length) {
1242 PyErr_SetString(
1243 PyExc_IndexError,
1244 "child assignment index out of range");
1245 return -1;
1246 }
1247
1248 old = self->extra->children[index];
1249
1250 if (item) {
1251 Py_INCREF(item);
1252 self->extra->children[index] = item;
1253 } else {
1254 self->extra->length--;
1255 for (i = index; i < self->extra->length; i++)
1256 self->extra->children[i] = self->extra->children[i+1];
1257 }
1258
1259 Py_DECREF(old);
1260
1261 return 0;
1262}
1263
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001264static PyObject*
1265element_subscr(PyObject* self_, PyObject* item)
1266{
1267 ElementObject* self = (ElementObject*) self_;
1268
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001269 if (PyIndex_Check(item)) {
1270 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001271
1272 if (i == -1 && PyErr_Occurred()) {
1273 return NULL;
1274 }
1275 if (i < 0 && self->extra)
1276 i += self->extra->length;
1277 return element_getitem(self_, i);
1278 }
1279 else if (PySlice_Check(item)) {
1280 Py_ssize_t start, stop, step, slicelen, cur, i;
1281 PyObject* list;
1282
1283 if (!self->extra)
1284 return PyList_New(0);
1285
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001286 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001287 self->extra->length,
1288 &start, &stop, &step, &slicelen) < 0) {
1289 return NULL;
1290 }
1291
1292 if (slicelen <= 0)
1293 return PyList_New(0);
1294 else {
1295 list = PyList_New(slicelen);
1296 if (!list)
1297 return NULL;
1298
1299 for (cur = start, i = 0; i < slicelen;
1300 cur += step, i++) {
1301 PyObject* item = self->extra->children[cur];
1302 Py_INCREF(item);
1303 PyList_SET_ITEM(list, i, item);
1304 }
1305
1306 return list;
1307 }
1308 }
1309 else {
1310 PyErr_SetString(PyExc_TypeError,
1311 "element indices must be integers");
1312 return NULL;
1313 }
1314}
1315
1316static int
1317element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1318{
1319 ElementObject* self = (ElementObject*) self_;
1320
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001321 if (PyIndex_Check(item)) {
1322 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001323
1324 if (i == -1 && PyErr_Occurred()) {
1325 return -1;
1326 }
1327 if (i < 0 && self->extra)
1328 i += self->extra->length;
1329 return element_setitem(self_, i, value);
1330 }
1331 else if (PySlice_Check(item)) {
1332 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1333
1334 PyObject* recycle = NULL;
1335 PyObject* seq = NULL;
1336
1337 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001338 create_extra(self, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001339
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001340 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001341 self->extra->length,
1342 &start, &stop, &step, &slicelen) < 0) {
1343 return -1;
1344 }
1345
1346 if (value == NULL)
1347 newlen = 0;
1348 else {
1349 seq = PySequence_Fast(value, "");
1350 if (!seq) {
1351 PyErr_Format(
1352 PyExc_TypeError,
1353 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1354 );
1355 return -1;
1356 }
1357 newlen = PySequence_Size(seq);
1358 }
1359
1360 if (step != 1 && newlen != slicelen)
1361 {
1362 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001363 "attempt to assign sequence of size %zd "
1364 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001365 newlen, slicelen
1366 );
1367 return -1;
1368 }
1369
1370
1371 /* Resize before creating the recycle bin, to prevent refleaks. */
1372 if (newlen > slicelen) {
1373 if (element_resize(self, newlen - slicelen) < 0) {
1374 if (seq) {
1375 Py_DECREF(seq);
1376 }
1377 return -1;
1378 }
1379 }
1380
1381 if (slicelen > 0) {
1382 /* to avoid recursive calls to this method (via decref), move
1383 old items to the recycle bin here, and get rid of them when
1384 we're done modifying the element */
1385 recycle = PyList_New(slicelen);
1386 if (!recycle) {
1387 if (seq) {
1388 Py_DECREF(seq);
1389 }
1390 return -1;
1391 }
1392 for (cur = start, i = 0; i < slicelen;
1393 cur += step, i++)
1394 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1395 }
1396
1397 if (newlen < slicelen) {
1398 /* delete slice */
1399 for (i = stop; i < self->extra->length; i++)
1400 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1401 } else if (newlen > slicelen) {
1402 /* insert slice */
1403 for (i = self->extra->length-1; i >= stop; i--)
1404 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1405 }
1406
1407 /* replace the slice */
1408 for (cur = start, i = 0; i < newlen;
1409 cur += step, i++) {
1410 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1411 Py_INCREF(element);
1412 self->extra->children[cur] = element;
1413 }
1414
1415 self->extra->length += newlen - slicelen;
1416
1417 if (seq) {
1418 Py_DECREF(seq);
1419 }
1420
1421 /* discard the recycle bin, and everything in it */
1422 Py_XDECREF(recycle);
1423
1424 return 0;
1425 }
1426 else {
1427 PyErr_SetString(PyExc_TypeError,
1428 "element indices must be integers");
1429 return -1;
1430 }
1431}
1432
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001433static PyMethodDef element_methods[] = {
1434
1435 {"clear", (PyCFunction) element_clear, METH_VARARGS},
1436
1437 {"get", (PyCFunction) element_get, METH_VARARGS},
1438 {"set", (PyCFunction) element_set, METH_VARARGS},
1439
1440 {"find", (PyCFunction) element_find, METH_VARARGS},
1441 {"findtext", (PyCFunction) element_findtext, METH_VARARGS},
1442 {"findall", (PyCFunction) element_findall, METH_VARARGS},
1443
1444 {"append", (PyCFunction) element_append, METH_VARARGS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001445 {"extend", (PyCFunction) element_extend, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001446 {"insert", (PyCFunction) element_insert, METH_VARARGS},
1447 {"remove", (PyCFunction) element_remove, METH_VARARGS},
1448
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001449 {"iter", (PyCFunction) element_iter, METH_VARARGS},
1450 {"itertext", (PyCFunction) element_itertext, METH_VARARGS},
1451 {"iterfind", (PyCFunction) element_iterfind, METH_VARARGS},
1452
1453 {"getiterator", (PyCFunction) element_iter, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001454 {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS},
1455
1456 {"items", (PyCFunction) element_items, METH_VARARGS},
1457 {"keys", (PyCFunction) element_keys, METH_VARARGS},
1458
1459 {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS},
1460
1461 {"__copy__", (PyCFunction) element_copy, METH_VARARGS},
1462 {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS},
1463
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001464 {NULL, NULL}
1465};
1466
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001467static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001468element_getattro(ElementObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001469{
1470 PyObject* res;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001471 char *name = "";
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001472
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001473 if (PyUnicode_Check(nameobj))
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001474 name = _PyUnicode_AsString(nameobj);
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001475
Alexander Belopolskye239d232010-12-08 23:31:48 +00001476 if (name == NULL)
1477 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001478
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001479 /* handle common attributes first */
1480 if (strcmp(name, "tag") == 0) {
1481 res = self->tag;
1482 Py_INCREF(res);
1483 return res;
1484 } else if (strcmp(name, "text") == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001485 res = element_get_text(self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001486 Py_INCREF(res);
1487 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001488 }
1489
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001490 /* methods */
1491 res = PyObject_GenericGetAttr((PyObject*) self, nameobj);
1492 if (res)
1493 return res;
1494
1495 /* less common attributes */
1496 if (strcmp(name, "tail") == 0) {
1497 PyErr_Clear();
1498 res = element_get_tail(self);
1499 } else if (strcmp(name, "attrib") == 0) {
1500 PyErr_Clear();
1501 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001502 create_extra(self, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001503 res = element_get_attrib(self);
1504 }
1505
1506 if (!res)
1507 return NULL;
1508
1509 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001510 return res;
1511}
1512
1513static int
1514element_setattr(ElementObject* self, const char* name, PyObject* value)
1515{
1516 if (value == NULL) {
1517 PyErr_SetString(
1518 PyExc_AttributeError,
1519 "can't delete element attributes"
1520 );
1521 return -1;
1522 }
1523
1524 if (strcmp(name, "tag") == 0) {
1525 Py_DECREF(self->tag);
1526 self->tag = value;
1527 Py_INCREF(self->tag);
1528 } else if (strcmp(name, "text") == 0) {
1529 Py_DECREF(JOIN_OBJ(self->text));
1530 self->text = value;
1531 Py_INCREF(self->text);
1532 } else if (strcmp(name, "tail") == 0) {
1533 Py_DECREF(JOIN_OBJ(self->tail));
1534 self->tail = value;
1535 Py_INCREF(self->tail);
1536 } else if (strcmp(name, "attrib") == 0) {
1537 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001538 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001539 Py_DECREF(self->extra->attrib);
1540 self->extra->attrib = value;
1541 Py_INCREF(self->extra->attrib);
1542 } else {
1543 PyErr_SetString(PyExc_AttributeError, name);
1544 return -1;
1545 }
1546
1547 return 0;
1548}
1549
1550static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001551 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001552 0, /* sq_concat */
1553 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001554 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001555 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001556 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001557 0,
1558};
1559
1560static PyMappingMethods element_as_mapping = {
1561 (lenfunc) element_length,
1562 (binaryfunc) element_subscr,
1563 (objobjargproc) element_ass_subscr,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001564};
1565
Neal Norwitz227b5332006-03-22 09:28:35 +00001566static PyTypeObject Element_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001567 PyVarObject_HEAD_INIT(NULL, 0)
1568 "Element", sizeof(ElementObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001569 /* methods */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001570 (destructor)element_dealloc, /* tp_dealloc */
1571 0, /* tp_print */
1572 0, /* tp_getattr */
1573 (setattrfunc)element_setattr, /* tp_setattr */
1574 0, /* tp_reserved */
1575 (reprfunc)element_repr, /* tp_repr */
1576 0, /* tp_as_number */
1577 &element_as_sequence, /* tp_as_sequence */
1578 &element_as_mapping, /* tp_as_mapping */
1579 0, /* tp_hash */
1580 0, /* tp_call */
1581 0, /* tp_str */
1582 (getattrofunc)element_getattro, /* tp_getattro */
1583 0, /* tp_setattro */
1584 0, /* tp_as_buffer */
1585 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
1586 0, /* tp_doc */
1587 0, /* tp_traverse */
1588 0, /* tp_clear */
1589 0, /* tp_richcompare */
1590 0, /* tp_weaklistoffset */
1591 0, /* tp_iter */
1592 0, /* tp_iternext */
1593 element_methods, /* tp_methods */
1594 0, /* tp_members */
1595 0, /* tp_getset */
1596 0, /* tp_base */
1597 0, /* tp_dict */
1598 0, /* tp_descr_get */
1599 0, /* tp_descr_set */
1600 0, /* tp_dictoffset */
1601 (initproc)element_init, /* tp_init */
1602 PyType_GenericAlloc, /* tp_alloc */
1603 element_new, /* tp_new */
1604 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001605};
1606
1607/* ==================================================================== */
1608/* the tree builder type */
1609
1610typedef struct {
1611 PyObject_HEAD
1612
1613 PyObject* root; /* root node (first created node) */
1614
1615 ElementObject* this; /* current node */
1616 ElementObject* last; /* most recently created node */
1617
1618 PyObject* data; /* data collector (string or list), or NULL */
1619
1620 PyObject* stack; /* element stack */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001621 Py_ssize_t index; /* current stack size (0=empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001622
1623 /* element tracing */
1624 PyObject* events; /* list of events, or NULL if not collecting */
1625 PyObject* start_event_obj; /* event objects (NULL to ignore) */
1626 PyObject* end_event_obj;
1627 PyObject* start_ns_event_obj;
1628 PyObject* end_ns_event_obj;
1629
1630} TreeBuilderObject;
1631
Neal Norwitz227b5332006-03-22 09:28:35 +00001632static PyTypeObject TreeBuilder_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001633
Christian Heimes90aa7642007-12-19 02:45:37 +00001634#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001635
1636/* -------------------------------------------------------------------- */
1637/* constructor and destructor */
1638
1639LOCAL(PyObject*)
1640treebuilder_new(void)
1641{
1642 TreeBuilderObject* self;
1643
1644 self = PyObject_New(TreeBuilderObject, &TreeBuilder_Type);
1645 if (self == NULL)
1646 return NULL;
1647
1648 self->root = NULL;
1649
1650 Py_INCREF(Py_None);
1651 self->this = (ElementObject*) Py_None;
1652
1653 Py_INCREF(Py_None);
1654 self->last = (ElementObject*) Py_None;
1655
1656 self->data = NULL;
1657
1658 self->stack = PyList_New(20);
1659 self->index = 0;
1660
1661 self->events = NULL;
1662 self->start_event_obj = self->end_event_obj = NULL;
1663 self->start_ns_event_obj = self->end_ns_event_obj = NULL;
1664
1665 ALLOC(sizeof(TreeBuilderObject), "create treebuilder");
1666
1667 return (PyObject*) self;
1668}
1669
1670static PyObject*
Thomas Wouters73e5a5b2006-06-08 15:35:45 +00001671treebuilder(PyObject* self_, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001672{
1673 if (!PyArg_ParseTuple(args, ":TreeBuilder"))
1674 return NULL;
1675
1676 return treebuilder_new();
1677}
1678
1679static void
1680treebuilder_dealloc(TreeBuilderObject* self)
1681{
1682 Py_XDECREF(self->end_ns_event_obj);
1683 Py_XDECREF(self->start_ns_event_obj);
1684 Py_XDECREF(self->end_event_obj);
1685 Py_XDECREF(self->start_event_obj);
1686 Py_XDECREF(self->events);
1687 Py_DECREF(self->stack);
1688 Py_XDECREF(self->data);
1689 Py_DECREF(self->last);
1690 Py_DECREF(self->this);
1691 Py_XDECREF(self->root);
1692
1693 RELEASE(sizeof(TreeBuilderObject), "destroy treebuilder");
1694
1695 PyObject_Del(self);
1696}
1697
1698/* -------------------------------------------------------------------- */
1699/* handlers */
1700
1701LOCAL(PyObject*)
1702treebuilder_handle_xml(TreeBuilderObject* self, PyObject* encoding,
1703 PyObject* standalone)
1704{
1705 Py_RETURN_NONE;
1706}
1707
1708LOCAL(PyObject*)
1709treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
1710 PyObject* attrib)
1711{
1712 PyObject* node;
1713 PyObject* this;
1714
1715 if (self->data) {
1716 if (self->this == self->last) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001717 Py_DECREF(JOIN_OBJ(self->last->text));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001718 self->last->text = JOIN_SET(
1719 self->data, PyList_CheckExact(self->data)
1720 );
1721 } else {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001722 Py_DECREF(JOIN_OBJ(self->last->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001723 self->last->tail = JOIN_SET(
1724 self->data, PyList_CheckExact(self->data)
1725 );
1726 }
1727 self->data = NULL;
1728 }
1729
Eli Bendersky092af1f2012-03-04 07:14:03 +02001730 node = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001731 if (!node)
1732 return NULL;
1733
1734 this = (PyObject*) self->this;
1735
1736 if (this != Py_None) {
1737 if (element_add_subelement((ElementObject*) this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001738 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001739 } else {
1740 if (self->root) {
1741 PyErr_SetString(
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001742 elementtree_parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001743 "multiple elements on top level"
1744 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001745 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001746 }
1747 Py_INCREF(node);
1748 self->root = node;
1749 }
1750
1751 if (self->index < PyList_GET_SIZE(self->stack)) {
1752 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001753 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001754 Py_INCREF(this);
1755 } else {
1756 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001757 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001758 }
1759 self->index++;
1760
1761 Py_DECREF(this);
1762 Py_INCREF(node);
1763 self->this = (ElementObject*) node;
1764
1765 Py_DECREF(self->last);
1766 Py_INCREF(node);
1767 self->last = (ElementObject*) node;
1768
1769 if (self->start_event_obj) {
1770 PyObject* res;
1771 PyObject* action = self->start_event_obj;
1772 res = PyTuple_New(2);
1773 if (res) {
1774 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
1775 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
1776 PyList_Append(self->events, res);
1777 Py_DECREF(res);
1778 } else
1779 PyErr_Clear(); /* FIXME: propagate error */
1780 }
1781
1782 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001783
1784 error:
1785 Py_DECREF(node);
1786 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001787}
1788
1789LOCAL(PyObject*)
1790treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
1791{
1792 if (!self->data) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001793 if (self->last == (ElementObject*) Py_None) {
1794 /* ignore calls to data before the first call to start */
1795 Py_RETURN_NONE;
1796 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001797 /* store the first item as is */
1798 Py_INCREF(data); self->data = data;
1799 } else {
1800 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00001801 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
1802 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001803 /* expat often generates single character data sections; handle
1804 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00001805 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
1806 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001807 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00001808 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001809 } else if (PyList_CheckExact(self->data)) {
1810 if (PyList_Append(self->data, data) < 0)
1811 return NULL;
1812 } else {
1813 PyObject* list = PyList_New(2);
1814 if (!list)
1815 return NULL;
1816 PyList_SET_ITEM(list, 0, self->data);
1817 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
1818 self->data = list;
1819 }
1820 }
1821
1822 Py_RETURN_NONE;
1823}
1824
1825LOCAL(PyObject*)
1826treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
1827{
1828 PyObject* item;
1829
1830 if (self->data) {
1831 if (self->this == self->last) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001832 Py_DECREF(JOIN_OBJ(self->last->text));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001833 self->last->text = JOIN_SET(
1834 self->data, PyList_CheckExact(self->data)
1835 );
1836 } else {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001837 Py_DECREF(JOIN_OBJ(self->last->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001838 self->last->tail = JOIN_SET(
1839 self->data, PyList_CheckExact(self->data)
1840 );
1841 }
1842 self->data = NULL;
1843 }
1844
1845 if (self->index == 0) {
1846 PyErr_SetString(
1847 PyExc_IndexError,
1848 "pop from empty stack"
1849 );
1850 return NULL;
1851 }
1852
1853 self->index--;
1854
1855 item = PyList_GET_ITEM(self->stack, self->index);
1856 Py_INCREF(item);
1857
1858 Py_DECREF(self->last);
1859
1860 self->last = (ElementObject*) self->this;
1861 self->this = (ElementObject*) item;
1862
1863 if (self->end_event_obj) {
1864 PyObject* res;
1865 PyObject* action = self->end_event_obj;
1866 PyObject* node = (PyObject*) self->last;
1867 res = PyTuple_New(2);
1868 if (res) {
1869 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
1870 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
1871 PyList_Append(self->events, res);
1872 Py_DECREF(res);
1873 } else
1874 PyErr_Clear(); /* FIXME: propagate error */
1875 }
1876
1877 Py_INCREF(self->last);
1878 return (PyObject*) self->last;
1879}
1880
1881LOCAL(void)
1882treebuilder_handle_namespace(TreeBuilderObject* self, int start,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001883 PyObject *prefix, PyObject *uri)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001884{
1885 PyObject* res;
1886 PyObject* action;
1887 PyObject* parcel;
1888
1889 if (!self->events)
1890 return;
1891
1892 if (start) {
1893 if (!self->start_ns_event_obj)
1894 return;
1895 action = self->start_ns_event_obj;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001896 parcel = Py_BuildValue("OO", prefix, uri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001897 if (!parcel)
1898 return;
1899 Py_INCREF(action);
1900 } else {
1901 if (!self->end_ns_event_obj)
1902 return;
1903 action = self->end_ns_event_obj;
1904 Py_INCREF(action);
1905 parcel = Py_None;
1906 Py_INCREF(parcel);
1907 }
1908
1909 res = PyTuple_New(2);
1910
1911 if (res) {
1912 PyTuple_SET_ITEM(res, 0, action);
1913 PyTuple_SET_ITEM(res, 1, parcel);
1914 PyList_Append(self->events, res);
1915 Py_DECREF(res);
1916 } else
1917 PyErr_Clear(); /* FIXME: propagate error */
1918}
1919
1920/* -------------------------------------------------------------------- */
1921/* methods (in alphabetical order) */
1922
1923static PyObject*
1924treebuilder_data(TreeBuilderObject* self, PyObject* args)
1925{
1926 PyObject* data;
1927 if (!PyArg_ParseTuple(args, "O:data", &data))
1928 return NULL;
1929
1930 return treebuilder_handle_data(self, data);
1931}
1932
1933static PyObject*
1934treebuilder_end(TreeBuilderObject* self, PyObject* args)
1935{
1936 PyObject* tag;
1937 if (!PyArg_ParseTuple(args, "O:end", &tag))
1938 return NULL;
1939
1940 return treebuilder_handle_end(self, tag);
1941}
1942
1943LOCAL(PyObject*)
1944treebuilder_done(TreeBuilderObject* self)
1945{
1946 PyObject* res;
1947
1948 /* FIXME: check stack size? */
1949
1950 if (self->root)
1951 res = self->root;
1952 else
1953 res = Py_None;
1954
1955 Py_INCREF(res);
1956 return res;
1957}
1958
1959static PyObject*
1960treebuilder_close(TreeBuilderObject* self, PyObject* args)
1961{
1962 if (!PyArg_ParseTuple(args, ":close"))
1963 return NULL;
1964
1965 return treebuilder_done(self);
1966}
1967
1968static PyObject*
1969treebuilder_start(TreeBuilderObject* self, PyObject* args)
1970{
1971 PyObject* tag;
1972 PyObject* attrib = Py_None;
1973 if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib))
1974 return NULL;
1975
1976 return treebuilder_handle_start(self, tag, attrib);
1977}
1978
1979static PyObject*
1980treebuilder_xml(TreeBuilderObject* self, PyObject* args)
1981{
1982 PyObject* encoding;
1983 PyObject* standalone;
1984 if (!PyArg_ParseTuple(args, "OO:xml", &encoding, &standalone))
1985 return NULL;
1986
1987 return treebuilder_handle_xml(self, encoding, standalone);
1988}
1989
1990static PyMethodDef treebuilder_methods[] = {
1991 {"data", (PyCFunction) treebuilder_data, METH_VARARGS},
1992 {"start", (PyCFunction) treebuilder_start, METH_VARARGS},
1993 {"end", (PyCFunction) treebuilder_end, METH_VARARGS},
1994 {"xml", (PyCFunction) treebuilder_xml, METH_VARARGS},
1995 {"close", (PyCFunction) treebuilder_close, METH_VARARGS},
1996 {NULL, NULL}
1997};
1998
Neal Norwitz227b5332006-03-22 09:28:35 +00001999static PyTypeObject TreeBuilder_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002000 PyVarObject_HEAD_INIT(NULL, 0)
2001 "TreeBuilder", sizeof(TreeBuilderObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002002 /* methods */
2003 (destructor)treebuilder_dealloc, /* tp_dealloc */
2004 0, /* tp_print */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002005 0, /* tp_getattr */
2006 0, /* tp_setattr */
Mark Dickinsone94c6792009-02-02 20:36:42 +00002007 0, /* tp_reserved */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002008 0, /* tp_repr */
2009 0, /* tp_as_number */
2010 0, /* tp_as_sequence */
2011 0, /* tp_as_mapping */
2012 0, /* tp_hash */
2013 0, /* tp_call */
2014 0, /* tp_str */
2015 0, /* tp_getattro */
2016 0, /* tp_setattro */
2017 0, /* tp_as_buffer */
2018 Py_TPFLAGS_DEFAULT, /* tp_flags */
2019 0, /* tp_doc */
2020 0, /* tp_traverse */
2021 0, /* tp_clear */
2022 0, /* tp_richcompare */
2023 0, /* tp_weaklistoffset */
2024 0, /* tp_iter */
2025 0, /* tp_iternext */
2026 treebuilder_methods, /* tp_methods */
2027 0, /* tp_members */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002028};
2029
2030/* ==================================================================== */
2031/* the expat interface */
2032
2033#if defined(USE_EXPAT)
2034
2035#include "expat.h"
2036
2037#if defined(USE_PYEXPAT_CAPI)
2038#include "pyexpat.h"
2039static struct PyExpat_CAPI* expat_capi;
2040#define EXPAT(func) (expat_capi->func)
2041#else
2042#define EXPAT(func) (XML_##func)
2043#endif
2044
2045typedef struct {
2046 PyObject_HEAD
2047
2048 XML_Parser parser;
2049
2050 PyObject* target;
2051 PyObject* entity;
2052
2053 PyObject* names;
2054
2055 PyObject* handle_xml;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002056
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002057 PyObject* handle_start;
2058 PyObject* handle_data;
2059 PyObject* handle_end;
2060
2061 PyObject* handle_comment;
2062 PyObject* handle_pi;
2063
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002064 PyObject* handle_close;
2065
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002066} XMLParserObject;
2067
Neal Norwitz227b5332006-03-22 09:28:35 +00002068static PyTypeObject XMLParser_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002069
2070/* helpers */
2071
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002072LOCAL(PyObject*)
2073makeuniversal(XMLParserObject* self, const char* string)
2074{
2075 /* convert a UTF-8 tag/attribute name from the expat parser
2076 to a universal name string */
2077
2078 int size = strlen(string);
2079 PyObject* key;
2080 PyObject* value;
2081
2082 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00002083 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002084 if (!key)
2085 return NULL;
2086
2087 value = PyDict_GetItem(self->names, key);
2088
2089 if (value) {
2090 Py_INCREF(value);
2091 } else {
2092 /* new name. convert to universal name, and decode as
2093 necessary */
2094
2095 PyObject* tag;
2096 char* p;
2097 int i;
2098
2099 /* look for namespace separator */
2100 for (i = 0; i < size; i++)
2101 if (string[i] == '}')
2102 break;
2103 if (i != size) {
2104 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002105 tag = PyBytes_FromStringAndSize(NULL, size+1);
2106 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002107 p[0] = '{';
2108 memcpy(p+1, string, size);
2109 size++;
2110 } else {
2111 /* plain name; use key as tag */
2112 Py_INCREF(key);
2113 tag = key;
2114 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002115
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002116 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002117 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00002118 value = PyUnicode_DecodeUTF8(p, size, "strict");
2119 Py_DECREF(tag);
2120 if (!value) {
2121 Py_DECREF(key);
2122 return NULL;
2123 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002124
2125 /* add to names dictionary */
2126 if (PyDict_SetItem(self->names, key, value) < 0) {
2127 Py_DECREF(key);
2128 Py_DECREF(value);
2129 return NULL;
2130 }
2131 }
2132
2133 Py_DECREF(key);
2134 return value;
2135}
2136
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002137static void
2138expat_set_error(const char* message, int line, int column)
2139{
Victor Stinner499dfcf2011-03-21 13:26:24 +01002140 PyObject *errmsg, *error, *position;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002141
Victor Stinner499dfcf2011-03-21 13:26:24 +01002142 errmsg = PyUnicode_FromFormat("%s: line %d, column %d",
2143 message, line, column);
2144 if (errmsg == NULL)
2145 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002146
Victor Stinner499dfcf2011-03-21 13:26:24 +01002147 error = PyObject_CallFunction(elementtree_parseerror_obj, "O", errmsg);
2148 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002149 if (!error)
2150 return;
2151
2152 /* add position attribute */
2153 position = Py_BuildValue("(ii)", line, column);
2154 if (!position) {
2155 Py_DECREF(error);
2156 return;
2157 }
2158 if (PyObject_SetAttrString(error, "position", position) == -1) {
2159 Py_DECREF(error);
2160 Py_DECREF(position);
2161 return;
2162 }
2163 Py_DECREF(position);
2164
2165 PyErr_SetObject(elementtree_parseerror_obj, error);
2166 Py_DECREF(error);
2167}
2168
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002169/* -------------------------------------------------------------------- */
2170/* handlers */
2171
2172static void
2173expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2174 int data_len)
2175{
2176 PyObject* key;
2177 PyObject* value;
2178 PyObject* res;
2179
2180 if (data_len < 2 || data_in[0] != '&')
2181 return;
2182
Neal Norwitz0269b912007-08-08 06:56:02 +00002183 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002184 if (!key)
2185 return;
2186
2187 value = PyDict_GetItem(self->entity, key);
2188
2189 if (value) {
2190 if (TreeBuilder_CheckExact(self->target))
2191 res = treebuilder_handle_data(
2192 (TreeBuilderObject*) self->target, value
2193 );
2194 else if (self->handle_data)
2195 res = PyObject_CallFunction(self->handle_data, "O", value);
2196 else
2197 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002198 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002199 } else if (!PyErr_Occurred()) {
2200 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00002201 char message[128] = "undefined entity ";
2202 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002203 expat_set_error(
2204 message,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002205 EXPAT(GetErrorLineNumber)(self->parser),
2206 EXPAT(GetErrorColumnNumber)(self->parser)
2207 );
2208 }
2209
2210 Py_DECREF(key);
2211}
2212
2213static void
2214expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2215 const XML_Char **attrib_in)
2216{
2217 PyObject* res;
2218 PyObject* tag;
2219 PyObject* attrib;
2220 int ok;
2221
2222 /* tag name */
2223 tag = makeuniversal(self, tag_in);
2224 if (!tag)
2225 return; /* parser will look for errors */
2226
2227 /* attributes */
2228 if (attrib_in[0]) {
2229 attrib = PyDict_New();
2230 if (!attrib)
2231 return;
2232 while (attrib_in[0] && attrib_in[1]) {
2233 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00002234 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002235 if (!key || !value) {
2236 Py_XDECREF(value);
2237 Py_XDECREF(key);
2238 Py_DECREF(attrib);
2239 return;
2240 }
2241 ok = PyDict_SetItem(attrib, key, value);
2242 Py_DECREF(value);
2243 Py_DECREF(key);
2244 if (ok < 0) {
2245 Py_DECREF(attrib);
2246 return;
2247 }
2248 attrib_in += 2;
2249 }
2250 } else {
2251 Py_INCREF(Py_None);
2252 attrib = Py_None;
2253 }
2254
2255 if (TreeBuilder_CheckExact(self->target))
2256 /* shortcut */
2257 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2258 tag, attrib);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002259 else if (self->handle_start) {
2260 if (attrib == Py_None) {
2261 Py_DECREF(attrib);
2262 attrib = PyDict_New();
2263 if (!attrib)
2264 return;
2265 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002266 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002267 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002268 res = NULL;
2269
2270 Py_DECREF(tag);
2271 Py_DECREF(attrib);
2272
2273 Py_XDECREF(res);
2274}
2275
2276static void
2277expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2278 int data_len)
2279{
2280 PyObject* data;
2281 PyObject* res;
2282
Neal Norwitz0269b912007-08-08 06:56:02 +00002283 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002284 if (!data)
2285 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002286
2287 if (TreeBuilder_CheckExact(self->target))
2288 /* shortcut */
2289 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
2290 else if (self->handle_data)
2291 res = PyObject_CallFunction(self->handle_data, "O", data);
2292 else
2293 res = NULL;
2294
2295 Py_DECREF(data);
2296
2297 Py_XDECREF(res);
2298}
2299
2300static void
2301expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
2302{
2303 PyObject* tag;
2304 PyObject* res = NULL;
2305
2306 if (TreeBuilder_CheckExact(self->target))
2307 /* shortcut */
2308 /* the standard tree builder doesn't look at the end tag */
2309 res = treebuilder_handle_end(
2310 (TreeBuilderObject*) self->target, Py_None
2311 );
2312 else if (self->handle_end) {
2313 tag = makeuniversal(self, tag_in);
2314 if (tag) {
2315 res = PyObject_CallFunction(self->handle_end, "O", tag);
2316 Py_DECREF(tag);
2317 }
2318 }
2319
2320 Py_XDECREF(res);
2321}
2322
2323static void
2324expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
2325 const XML_Char *uri)
2326{
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002327 PyObject* sprefix = NULL;
2328 PyObject* suri = NULL;
2329
2330 suri = PyUnicode_DecodeUTF8(uri, strlen(uri), "strict");
2331 if (!suri)
2332 return;
2333
2334 if (prefix)
2335 sprefix = PyUnicode_DecodeUTF8(prefix, strlen(prefix), "strict");
2336 else
2337 sprefix = PyUnicode_FromString("");
2338 if (!sprefix) {
2339 Py_DECREF(suri);
2340 return;
2341 }
2342
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002343 treebuilder_handle_namespace(
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002344 (TreeBuilderObject*) self->target, 1, sprefix, suri
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002345 );
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002346
2347 Py_DECREF(sprefix);
2348 Py_DECREF(suri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002349}
2350
2351static void
2352expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
2353{
2354 treebuilder_handle_namespace(
2355 (TreeBuilderObject*) self->target, 0, NULL, NULL
2356 );
2357}
2358
2359static void
2360expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
2361{
2362 PyObject* comment;
2363 PyObject* res;
2364
2365 if (self->handle_comment) {
Neal Norwitz0269b912007-08-08 06:56:02 +00002366 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002367 if (comment) {
2368 res = PyObject_CallFunction(self->handle_comment, "O", comment);
2369 Py_XDECREF(res);
2370 Py_DECREF(comment);
2371 }
2372 }
2373}
2374
2375static void
2376expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
2377 const XML_Char* data_in)
2378{
2379 PyObject* target;
2380 PyObject* data;
2381 PyObject* res;
2382
2383 if (self->handle_pi) {
Neal Norwitz0269b912007-08-08 06:56:02 +00002384 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
2385 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002386 if (target && data) {
2387 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
2388 Py_XDECREF(res);
2389 Py_DECREF(data);
2390 Py_DECREF(target);
2391 } else {
2392 Py_XDECREF(data);
2393 Py_XDECREF(target);
2394 }
2395 }
2396}
2397
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002398static int
2399expat_unknown_encoding_handler(XMLParserObject *self, const XML_Char *name,
2400 XML_Encoding *info)
2401{
2402 PyObject* u;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002403 unsigned char s[256];
2404 int i;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002405 void *data;
2406 unsigned int kind;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002407
2408 memset(info, 0, sizeof(XML_Encoding));
2409
2410 for (i = 0; i < 256; i++)
2411 s[i] = i;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002412
Fredrik Lundhc3389992005-12-25 11:40:19 +00002413 u = PyUnicode_Decode((char*) s, 256, name, "replace");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002414 if (!u)
2415 return XML_STATUS_ERROR;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002416 if (PyUnicode_READY(u))
2417 return XML_STATUS_ERROR;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002418
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002419 if (PyUnicode_GET_LENGTH(u) != 256) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002420 Py_DECREF(u);
2421 return XML_STATUS_ERROR;
2422 }
2423
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002424 kind = PyUnicode_KIND(u);
2425 data = PyUnicode_DATA(u);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002426 for (i = 0; i < 256; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002427 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
2428 if (ch != Py_UNICODE_REPLACEMENT_CHARACTER)
2429 info->map[i] = ch;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002430 else
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002431 info->map[i] = -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002432 }
2433
2434 Py_DECREF(u);
2435
2436 return XML_STATUS_OK;
2437}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002438
2439/* -------------------------------------------------------------------- */
2440/* constructor and destructor */
2441
2442static PyObject*
Thomas Wouters73e5a5b2006-06-08 15:35:45 +00002443xmlparser(PyObject* self_, PyObject* args, PyObject* kw)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002444{
2445 XMLParserObject* self;
2446 /* FIXME: does this need to be static? */
2447 static XML_Memory_Handling_Suite memory_handler;
2448
2449 PyObject* target = NULL;
2450 char* encoding = NULL;
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +00002451 static char* kwlist[] = { "target", "encoding", NULL };
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002452 if (!PyArg_ParseTupleAndKeywords(args, kw, "|Oz:XMLParser", kwlist,
2453 &target, &encoding))
2454 return NULL;
2455
2456#if defined(USE_PYEXPAT_CAPI)
2457 if (!expat_capi) {
2458 PyErr_SetString(
2459 PyExc_RuntimeError, "cannot load dispatch table from pyexpat"
2460 );
2461 return NULL;
2462 }
2463#endif
2464
2465 self = PyObject_New(XMLParserObject, &XMLParser_Type);
2466 if (self == NULL)
2467 return NULL;
2468
2469 self->entity = PyDict_New();
2470 if (!self->entity) {
2471 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002472 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002473 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002474
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002475 self->names = PyDict_New();
2476 if (!self->names) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002477 PyObject_Del(self->entity);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002478 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002479 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002480 }
2481
2482 memory_handler.malloc_fcn = PyObject_Malloc;
2483 memory_handler.realloc_fcn = PyObject_Realloc;
2484 memory_handler.free_fcn = PyObject_Free;
2485
2486 self->parser = EXPAT(ParserCreate_MM)(encoding, &memory_handler, "}");
2487 if (!self->parser) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002488 PyObject_Del(self->names);
2489 PyObject_Del(self->entity);
2490 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002491 PyErr_NoMemory();
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002492 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002493 }
2494
2495 /* setup target handlers */
2496 if (!target) {
2497 target = treebuilder_new();
2498 if (!target) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002499 EXPAT(ParserFree)(self->parser);
2500 PyObject_Del(self->names);
2501 PyObject_Del(self->entity);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002502 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002503 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002504 }
2505 } else
2506 Py_INCREF(target);
2507 self->target = target;
2508
2509 self->handle_xml = PyObject_GetAttrString(target, "xml");
2510 self->handle_start = PyObject_GetAttrString(target, "start");
2511 self->handle_data = PyObject_GetAttrString(target, "data");
2512 self->handle_end = PyObject_GetAttrString(target, "end");
2513 self->handle_comment = PyObject_GetAttrString(target, "comment");
2514 self->handle_pi = PyObject_GetAttrString(target, "pi");
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002515 self->handle_close = PyObject_GetAttrString(target, "close");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002516
2517 PyErr_Clear();
2518
2519 /* configure parser */
2520 EXPAT(SetUserData)(self->parser, self);
2521 EXPAT(SetElementHandler)(
2522 self->parser,
2523 (XML_StartElementHandler) expat_start_handler,
2524 (XML_EndElementHandler) expat_end_handler
2525 );
2526 EXPAT(SetDefaultHandlerExpand)(
2527 self->parser,
2528 (XML_DefaultHandler) expat_default_handler
2529 );
2530 EXPAT(SetCharacterDataHandler)(
2531 self->parser,
2532 (XML_CharacterDataHandler) expat_data_handler
2533 );
2534 if (self->handle_comment)
2535 EXPAT(SetCommentHandler)(
2536 self->parser,
2537 (XML_CommentHandler) expat_comment_handler
2538 );
2539 if (self->handle_pi)
2540 EXPAT(SetProcessingInstructionHandler)(
2541 self->parser,
2542 (XML_ProcessingInstructionHandler) expat_pi_handler
2543 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002544 EXPAT(SetUnknownEncodingHandler)(
2545 self->parser,
2546 (XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL
2547 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002548
2549 ALLOC(sizeof(XMLParserObject), "create expatparser");
2550
2551 return (PyObject*) self;
2552}
2553
2554static void
2555xmlparser_dealloc(XMLParserObject* self)
2556{
2557 EXPAT(ParserFree)(self->parser);
2558
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002559 Py_XDECREF(self->handle_close);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002560 Py_XDECREF(self->handle_pi);
2561 Py_XDECREF(self->handle_comment);
2562 Py_XDECREF(self->handle_end);
2563 Py_XDECREF(self->handle_data);
2564 Py_XDECREF(self->handle_start);
2565 Py_XDECREF(self->handle_xml);
2566
2567 Py_DECREF(self->target);
2568 Py_DECREF(self->entity);
2569 Py_DECREF(self->names);
2570
2571 RELEASE(sizeof(XMLParserObject), "destroy expatparser");
2572
2573 PyObject_Del(self);
2574}
2575
2576/* -------------------------------------------------------------------- */
2577/* methods (in alphabetical order) */
2578
2579LOCAL(PyObject*)
2580expat_parse(XMLParserObject* self, char* data, int data_len, int final)
2581{
2582 int ok;
2583
2584 ok = EXPAT(Parse)(self->parser, data, data_len, final);
2585
2586 if (PyErr_Occurred())
2587 return NULL;
2588
2589 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002590 expat_set_error(
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002591 EXPAT(ErrorString)(EXPAT(GetErrorCode)(self->parser)),
2592 EXPAT(GetErrorLineNumber)(self->parser),
2593 EXPAT(GetErrorColumnNumber)(self->parser)
2594 );
2595 return NULL;
2596 }
2597
2598 Py_RETURN_NONE;
2599}
2600
2601static PyObject*
2602xmlparser_close(XMLParserObject* self, PyObject* args)
2603{
2604 /* end feeding data to parser */
2605
2606 PyObject* res;
2607 if (!PyArg_ParseTuple(args, ":close"))
2608 return NULL;
2609
2610 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002611 if (!res)
2612 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002613
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002614 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002615 Py_DECREF(res);
2616 return treebuilder_done((TreeBuilderObject*) self->target);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002617 } if (self->handle_close) {
2618 Py_DECREF(res);
2619 return PyObject_CallFunction(self->handle_close, "");
2620 } else
2621 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002622}
2623
2624static PyObject*
2625xmlparser_feed(XMLParserObject* self, PyObject* args)
2626{
2627 /* feed data to parser */
2628
2629 char* data;
2630 int data_len;
2631 if (!PyArg_ParseTuple(args, "s#:feed", &data, &data_len))
2632 return NULL;
2633
2634 return expat_parse(self, data, data_len, 0);
2635}
2636
2637static PyObject*
2638xmlparser_parse(XMLParserObject* self, PyObject* args)
2639{
2640 /* (internal) parse until end of input stream */
2641
2642 PyObject* reader;
2643 PyObject* buffer;
2644 PyObject* res;
2645
2646 PyObject* fileobj;
2647 if (!PyArg_ParseTuple(args, "O:_parse", &fileobj))
2648 return NULL;
2649
2650 reader = PyObject_GetAttrString(fileobj, "read");
2651 if (!reader)
2652 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002653
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002654 /* read from open file object */
2655 for (;;) {
2656
2657 buffer = PyObject_CallFunction(reader, "i", 64*1024);
2658
2659 if (!buffer) {
2660 /* read failed (e.g. due to KeyboardInterrupt) */
2661 Py_DECREF(reader);
2662 return NULL;
2663 }
2664
Christian Heimes72b710a2008-05-26 13:28:38 +00002665 if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002666 Py_DECREF(buffer);
2667 break;
2668 }
2669
2670 res = expat_parse(
Christian Heimes72b710a2008-05-26 13:28:38 +00002671 self, PyBytes_AS_STRING(buffer), PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002672 );
2673
2674 Py_DECREF(buffer);
2675
2676 if (!res) {
2677 Py_DECREF(reader);
2678 return NULL;
2679 }
2680 Py_DECREF(res);
2681
2682 }
2683
2684 Py_DECREF(reader);
2685
2686 res = expat_parse(self, "", 0, 1);
2687
2688 if (res && TreeBuilder_CheckExact(self->target)) {
2689 Py_DECREF(res);
2690 return treebuilder_done((TreeBuilderObject*) self->target);
2691 }
2692
2693 return res;
2694}
2695
2696static PyObject*
2697xmlparser_setevents(XMLParserObject* self, PyObject* args)
2698{
2699 /* activate element event reporting */
2700
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002701 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002702 TreeBuilderObject* target;
2703
2704 PyObject* events; /* event collector */
2705 PyObject* event_set = Py_None;
2706 if (!PyArg_ParseTuple(args, "O!|O:_setevents", &PyList_Type, &events,
2707 &event_set))
2708 return NULL;
2709
2710 if (!TreeBuilder_CheckExact(self->target)) {
2711 PyErr_SetString(
2712 PyExc_TypeError,
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01002713 "event handling only supported for ElementTree.TreeBuilder "
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002714 "targets"
2715 );
2716 return NULL;
2717 }
2718
2719 target = (TreeBuilderObject*) self->target;
2720
2721 Py_INCREF(events);
2722 Py_XDECREF(target->events);
2723 target->events = events;
2724
2725 /* clear out existing events */
2726 Py_XDECREF(target->start_event_obj); target->start_event_obj = NULL;
2727 Py_XDECREF(target->end_event_obj); target->end_event_obj = NULL;
2728 Py_XDECREF(target->start_ns_event_obj); target->start_ns_event_obj = NULL;
2729 Py_XDECREF(target->end_ns_event_obj); target->end_ns_event_obj = NULL;
2730
2731 if (event_set == Py_None) {
2732 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002733 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002734 Py_RETURN_NONE;
2735 }
2736
2737 if (!PyTuple_Check(event_set)) /* FIXME: handle arbitrary sequences */
2738 goto error;
2739
2740 for (i = 0; i < PyTuple_GET_SIZE(event_set); i++) {
2741 PyObject* item = PyTuple_GET_ITEM(event_set, i);
2742 char* event;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002743 if (PyUnicode_Check(item)) {
2744 event = _PyUnicode_AsString(item);
Victor Stinner0477bf32010-03-22 12:11:44 +00002745 if (event == NULL)
2746 goto error;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002747 } else if (PyBytes_Check(item))
2748 event = PyBytes_AS_STRING(item);
2749 else {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002750 goto error;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002751 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002752 if (strcmp(event, "start") == 0) {
2753 Py_INCREF(item);
2754 target->start_event_obj = item;
2755 } else if (strcmp(event, "end") == 0) {
2756 Py_INCREF(item);
2757 Py_XDECREF(target->end_event_obj);
2758 target->end_event_obj = item;
2759 } else if (strcmp(event, "start-ns") == 0) {
2760 Py_INCREF(item);
2761 Py_XDECREF(target->start_ns_event_obj);
2762 target->start_ns_event_obj = item;
2763 EXPAT(SetNamespaceDeclHandler)(
2764 self->parser,
2765 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2766 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2767 );
2768 } else if (strcmp(event, "end-ns") == 0) {
2769 Py_INCREF(item);
2770 Py_XDECREF(target->end_ns_event_obj);
2771 target->end_ns_event_obj = item;
2772 EXPAT(SetNamespaceDeclHandler)(
2773 self->parser,
2774 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2775 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2776 );
2777 } else {
2778 PyErr_Format(
2779 PyExc_ValueError,
2780 "unknown event '%s'", event
2781 );
2782 return NULL;
2783 }
2784 }
2785
2786 Py_RETURN_NONE;
2787
2788 error:
2789 PyErr_SetString(
2790 PyExc_TypeError,
2791 "invalid event tuple"
2792 );
2793 return NULL;
2794}
2795
2796static PyMethodDef xmlparser_methods[] = {
2797 {"feed", (PyCFunction) xmlparser_feed, METH_VARARGS},
2798 {"close", (PyCFunction) xmlparser_close, METH_VARARGS},
2799 {"_parse", (PyCFunction) xmlparser_parse, METH_VARARGS},
2800 {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
2801 {NULL, NULL}
2802};
2803
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002804static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002805xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002806{
Alexander Belopolskye239d232010-12-08 23:31:48 +00002807 if (PyUnicode_Check(nameobj)) {
2808 PyObject* res;
2809 if (PyUnicode_CompareWithASCIIString(nameobj, "entity") == 0)
2810 res = self->entity;
2811 else if (PyUnicode_CompareWithASCIIString(nameobj, "target") == 0)
2812 res = self->target;
2813 else if (PyUnicode_CompareWithASCIIString(nameobj, "version") == 0) {
2814 return PyUnicode_FromFormat(
2815 "Expat %d.%d.%d", XML_MAJOR_VERSION,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002816 XML_MINOR_VERSION, XML_MICRO_VERSION);
Alexander Belopolskye239d232010-12-08 23:31:48 +00002817 }
2818 else
2819 goto generic;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002820
Alexander Belopolskye239d232010-12-08 23:31:48 +00002821 Py_INCREF(res);
2822 return res;
2823 }
2824 generic:
2825 return PyObject_GenericGetAttr((PyObject*) self, nameobj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002826}
2827
Neal Norwitz227b5332006-03-22 09:28:35 +00002828static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002829 PyVarObject_HEAD_INIT(NULL, 0)
2830 "XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002831 /* methods */
2832 (destructor)xmlparser_dealloc, /* tp_dealloc */
2833 0, /* tp_print */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002834 0, /* tp_getattr */
2835 0, /* tp_setattr */
Mark Dickinsone94c6792009-02-02 20:36:42 +00002836 0, /* tp_reserved */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002837 0, /* tp_repr */
2838 0, /* tp_as_number */
2839 0, /* tp_as_sequence */
2840 0, /* tp_as_mapping */
2841 0, /* tp_hash */
2842 0, /* tp_call */
2843 0, /* tp_str */
2844 (getattrofunc)xmlparser_getattro, /* tp_getattro */
2845 0, /* tp_setattro */
2846 0, /* tp_as_buffer */
2847 Py_TPFLAGS_DEFAULT, /* tp_flags */
2848 0, /* tp_doc */
2849 0, /* tp_traverse */
2850 0, /* tp_clear */
2851 0, /* tp_richcompare */
2852 0, /* tp_weaklistoffset */
2853 0, /* tp_iter */
2854 0, /* tp_iternext */
2855 xmlparser_methods, /* tp_methods */
2856 0, /* tp_members */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002857};
2858
2859#endif
2860
2861/* ==================================================================== */
2862/* python module interface */
2863
2864static PyMethodDef _functions[] = {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002865 {"SubElement", (PyCFunction) subelement, METH_VARARGS|METH_KEYWORDS},
2866 {"TreeBuilder", (PyCFunction) treebuilder, METH_VARARGS},
2867#if defined(USE_EXPAT)
2868 {"XMLParser", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002869#endif
2870 {NULL, NULL}
2871};
2872
Martin v. Löwis1a214512008-06-11 05:26:20 +00002873
2874static struct PyModuleDef _elementtreemodule = {
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00002875 PyModuleDef_HEAD_INIT,
2876 "_elementtree",
2877 NULL,
2878 -1,
2879 _functions,
2880 NULL,
2881 NULL,
2882 NULL,
2883 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00002884};
2885
Neal Norwitzf6657e62006-12-28 04:47:50 +00002886PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00002887PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002888{
2889 PyObject* m;
2890 PyObject* g;
2891 char* bootstrap;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002892
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002893 /* Initialize object types */
2894 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00002895 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002896 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00002897 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002898#if defined(USE_EXPAT)
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002899 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00002900 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002901#endif
2902
Martin v. Löwis1a214512008-06-11 05:26:20 +00002903 m = PyModule_Create(&_elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002904 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00002905 return NULL;
2906
2907 /* The code below requires that the module gets already added
2908 to sys.modules. */
2909 PyDict_SetItemString(PyImport_GetModuleDict(),
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00002910 _elementtreemodule.m_name,
2911 m);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002912
2913 /* python glue code */
2914
2915 g = PyDict_New();
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002916 if (!g)
Martin v. Löwis1a214512008-06-11 05:26:20 +00002917 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002918
2919 PyDict_SetItemString(g, "__builtins__", PyEval_GetBuiltins());
2920
2921 bootstrap = (
2922
Florent Xiclunaf4bdf4e2012-02-11 11:28:16 +01002923 "from copy import deepcopy\n"
2924 "from xml.etree import ElementPath\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002925
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002926 "def iter(node, tag=None):\n" /* helper */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002927 " if tag == '*':\n"
2928 " tag = None\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002929 " if tag is None or node.tag == tag:\n"
2930 " yield node\n"
2931 " for node in node:\n"
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002932 " for node in iter(node, tag):\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002933 " yield node\n"
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002934
2935 "def itertext(node):\n" /* helper */
2936 " if node.text:\n"
2937 " yield node.text\n"
2938 " for e in node:\n"
2939 " for s in e.itertext():\n"
2940 " yield s\n"
2941 " if e.tail:\n"
2942 " yield e.tail\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002943
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002944 );
2945
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002946 if (!PyRun_String(bootstrap, Py_file_input, g, NULL))
2947 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002948
2949 elementpath_obj = PyDict_GetItemString(g, "ElementPath");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002950 elementtree_deepcopy_obj = PyDict_GetItemString(g, "deepcopy");
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002951 elementtree_iter_obj = PyDict_GetItemString(g, "iter");
2952 elementtree_itertext_obj = PyDict_GetItemString(g, "itertext");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002953
2954#if defined(USE_PYEXPAT_CAPI)
2955 /* link against pyexpat, if possible */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002956 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
2957 if (expat_capi) {
2958 /* check that it's usable */
2959 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
2960 expat_capi->size < sizeof(struct PyExpat_CAPI) ||
2961 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
2962 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
2963 expat_capi->MICRO_VERSION != XML_MICRO_VERSION)
2964 expat_capi = NULL;
2965 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002966#endif
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002967
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002968 elementtree_parseerror_obj = PyErr_NewException(
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01002969 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002970 );
2971 Py_INCREF(elementtree_parseerror_obj);
2972 PyModule_AddObject(m, "ParseError", elementtree_parseerror_obj);
2973
Eli Bendersky092af1f2012-03-04 07:14:03 +02002974 Py_INCREF((PyObject *)&Element_Type);
2975 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
2976
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002977 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002978}