blob: ba37cd7b7a718fc4eb75dcc7b585b989564c20ff [file] [log] [blame]
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001/*
2 * ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003 * $Id: _elementtree.c 3473 2009-01-11 22:53:55Z fredrik $
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
5 * elementtree accelerator
6 *
7 * History:
8 * 1999-06-20 fl created (as part of sgmlop)
9 * 2001-05-29 fl effdom edition
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000010 * 2003-02-27 fl elementtree edition (alpha)
11 * 2004-06-03 fl updates for elementtree 1.2
Florent Xiclunaf15351d2010-03-13 23:24:31 +000012 * 2005-01-05 fl major optimization effort
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000013 * 2005-01-11 fl first public release (cElementTree 0.8)
14 * 2005-01-12 fl split element object into base and extras
15 * 2005-01-13 fl use tagged pointers for tail/text (cElementTree 0.9)
16 * 2005-01-17 fl added treebuilder close method
17 * 2005-01-17 fl fixed crash in getchildren
18 * 2005-01-18 fl removed observer api, added iterparse (cElementTree 0.9.3)
19 * 2005-01-23 fl revised iterparse api; added namespace event support (0.9.8)
20 * 2005-01-26 fl added VERSION module property (cElementTree 1.0)
21 * 2005-01-28 fl added remove method (1.0.1)
22 * 2005-03-01 fl added iselement function; fixed makeelement aliasing (1.0.2)
23 * 2005-03-13 fl export Comment and ProcessingInstruction/PI helpers
24 * 2005-03-26 fl added Comment and PI support to XMLParser
25 * 2005-03-27 fl event optimizations; complain about bogus events
26 * 2005-08-08 fl fixed read error handling in parse
27 * 2005-08-11 fl added runtime test for copy workaround (1.0.3)
28 * 2005-12-13 fl added expat_capi support (for xml.etree) (1.0.4)
29 * 2005-12-16 fl added support for non-standard encodings
Fredrik Lundh44ed4db2006-03-12 21:06:35 +000030 * 2006-03-08 fl fixed a couple of potential null-refs and leaks
31 * 2006-03-12 fl merge in 2.5 ssize_t changes
Florent Xiclunaf15351d2010-03-13 23:24:31 +000032 * 2007-08-25 fl call custom builder's close method from XMLParser
33 * 2007-08-31 fl added iter, extend from ET 1.3
34 * 2007-09-01 fl fixed ParseError exception, setslice source type, etc
35 * 2007-09-03 fl fixed handling of negative insert indexes
36 * 2007-09-04 fl added itertext from ET 1.3
37 * 2007-09-06 fl added position attribute to ParseError exception
38 * 2008-06-06 fl delay error reporting in iterparse (from Hrvoje Niksic)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000039 *
Florent Xiclunaf15351d2010-03-13 23:24:31 +000040 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
41 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000042 *
43 * info@pythonware.com
44 * http://www.pythonware.com
45 */
46
Fredrik Lundh6d52b552005-12-16 22:06:43 +000047/* Licensed to PSF under a Contributor Agreement. */
Florent Xiclunaf15351d2010-03-13 23:24:31 +000048/* See http://www.python.org/psf/license for licensing details. */
Fredrik Lundh6d52b552005-12-16 22:06:43 +000049
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000050#include "Python.h"
51
Thomas Wouters00ee7ba2006-08-21 19:07:27 +000052#define VERSION "1.0.6"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000053
54/* -------------------------------------------------------------------- */
55/* configuration */
56
57/* Leave defined to include the expat-based XMLParser type */
58#define USE_EXPAT
59
Florent Xiclunaf15351d2010-03-13 23:24:31 +000060/* Define to do all expat calls via pyexpat's embedded expat library */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000061/* #define USE_PYEXPAT_CAPI */
62
63/* An element can hold this many children without extra memory
64 allocations. */
65#define STATIC_CHILDREN 4
66
67/* For best performance, chose a value so that 80-90% of all nodes
68 have no more than the given number of children. Set this to zero
69 to minimize the size of the element structure itself (this only
70 helps if you have lots of leaf nodes with attributes). */
71
72/* Also note that pymalloc always allocates blocks in multiples of
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010073 eight bytes. For the current C version of ElementTree, this means
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000074 that the number of children should be an even number, at least on
75 32-bit platforms. */
76
77/* -------------------------------------------------------------------- */
78
79#if 0
80static int memory = 0;
81#define ALLOC(size, comment)\
82do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
83#define RELEASE(size, comment)\
84do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
85#else
86#define ALLOC(size, comment)
87#define RELEASE(size, comment)
88#endif
89
90/* compiler tweaks */
91#if defined(_MSC_VER)
92#define LOCAL(type) static __inline type __fastcall
93#else
94#define LOCAL(type) static type
95#endif
96
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000097/* macros used to store 'join' flags in string object pointers. note
98 that all use of text and tail as object pointers must be wrapped in
99 JOIN_OBJ. see comments in the ElementObject definition for more
100 info. */
101#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
102#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
103#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~1))
104
105/* glue functions (see the init function for details) */
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000106static PyObject* elementtree_parseerror_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000107static PyObject* elementtree_deepcopy_obj;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000108static PyObject* elementtree_iter_obj;
109static PyObject* elementtree_itertext_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000110static PyObject* elementpath_obj;
111
112/* helpers */
113
114LOCAL(PyObject*)
115deepcopy(PyObject* object, PyObject* memo)
116{
117 /* do a deep copy of the given object */
118
119 PyObject* args;
120 PyObject* result;
121
122 if (!elementtree_deepcopy_obj) {
123 PyErr_SetString(
124 PyExc_RuntimeError,
125 "deepcopy helper not found"
126 );
127 return NULL;
128 }
129
130 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000131 if (!args)
132 return NULL;
133
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000134 Py_INCREF(object); PyTuple_SET_ITEM(args, 0, (PyObject*) object);
135 Py_INCREF(memo); PyTuple_SET_ITEM(args, 1, (PyObject*) memo);
136
137 result = PyObject_CallObject(elementtree_deepcopy_obj, args);
138
139 Py_DECREF(args);
140
141 return result;
142}
143
144LOCAL(PyObject*)
145list_join(PyObject* list)
146{
147 /* join list elements (destroying the list in the process) */
148
149 PyObject* joiner;
150 PyObject* function;
151 PyObject* args;
152 PyObject* result;
153
154 switch (PyList_GET_SIZE(list)) {
155 case 0:
156 Py_DECREF(list);
Christian Heimes72b710a2008-05-26 13:28:38 +0000157 return PyBytes_FromString("");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000158 case 1:
159 result = PyList_GET_ITEM(list, 0);
160 Py_INCREF(result);
161 Py_DECREF(list);
162 return result;
163 }
164
165 /* two or more elements: slice out a suitable separator from the
166 first member, and use that to join the entire list */
167
168 joiner = PySequence_GetSlice(PyList_GET_ITEM(list, 0), 0, 0);
169 if (!joiner)
170 return NULL;
171
172 function = PyObject_GetAttrString(joiner, "join");
173 if (!function) {
174 Py_DECREF(joiner);
175 return NULL;
176 }
177
178 args = PyTuple_New(1);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000179 if (!args)
180 return NULL;
181
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000182 PyTuple_SET_ITEM(args, 0, list);
183
184 result = PyObject_CallObject(function, args);
185
186 Py_DECREF(args); /* also removes list */
187 Py_DECREF(function);
188 Py_DECREF(joiner);
189
190 return result;
191}
192
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000193/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200194/* the Element type */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000195
196typedef struct {
197
198 /* attributes (a dictionary object), or None if no attributes */
199 PyObject* attrib;
200
201 /* child elements */
202 int length; /* actual number of items */
203 int allocated; /* allocated items */
204
205 /* this either points to _children or to a malloced buffer */
206 PyObject* *children;
207
208 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100209
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000210} ElementObjectExtra;
211
212typedef struct {
213 PyObject_HEAD
214
215 /* element tag (a string). */
216 PyObject* tag;
217
218 /* text before first child. note that this is a tagged pointer;
219 use JOIN_OBJ to get the object pointer. the join flag is used
220 to distinguish lists created by the tree builder from lists
221 assigned to the attribute by application code; the former
222 should be joined before being returned to the user, the latter
223 should be left intact. */
224 PyObject* text;
225
226 /* text after this element, in parent. note that this is a tagged
227 pointer; use JOIN_OBJ to get the object pointer. */
228 PyObject* tail;
229
230 ElementObjectExtra* extra;
231
232} ElementObject;
233
Neal Norwitz227b5332006-03-22 09:28:35 +0000234static PyTypeObject Element_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000235
Christian Heimes90aa7642007-12-19 02:45:37 +0000236#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000237
238/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200239/* Element constructors and destructor */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000240
241LOCAL(int)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200242create_extra(ElementObject* self, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000243{
244 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
245 if (!self->extra)
246 return -1;
247
248 if (!attrib)
249 attrib = Py_None;
250
251 Py_INCREF(attrib);
252 self->extra->attrib = attrib;
253
254 self->extra->length = 0;
255 self->extra->allocated = STATIC_CHILDREN;
256 self->extra->children = self->extra->_children;
257
258 return 0;
259}
260
261LOCAL(void)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200262dealloc_extra(ElementObject* self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000263{
264 int i;
265
266 Py_DECREF(self->extra->attrib);
267
268 for (i = 0; i < self->extra->length; i++)
269 Py_DECREF(self->extra->children[i]);
270
271 if (self->extra->children != self->extra->_children)
272 PyObject_Free(self->extra->children);
273
274 PyObject_Free(self->extra);
275}
276
Eli Bendersky092af1f2012-03-04 07:14:03 +0200277/* Convenience internal function to create new Element objects with the given
278 * tag and attributes.
279*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000280LOCAL(PyObject*)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200281create_new_element(PyObject* tag, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000282{
283 ElementObject* self;
284
285 self = PyObject_New(ElementObject, &Element_Type);
286 if (self == NULL)
287 return NULL;
288
289 /* use None for empty dictionaries */
290 if (PyDict_CheckExact(attrib) && !PyDict_Size(attrib))
291 attrib = Py_None;
292
293 self->extra = NULL;
294
295 if (attrib != Py_None) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200296 if (create_extra(self, attrib) < 0) {
Thomas Wouters477c8d52006-05-27 19:21:47 +0000297 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000298 return NULL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000299 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000300 }
301
302 Py_INCREF(tag);
303 self->tag = tag;
304
305 Py_INCREF(Py_None);
306 self->text = Py_None;
307
308 Py_INCREF(Py_None);
309 self->tail = Py_None;
310
311 ALLOC(sizeof(ElementObject), "create element");
312
313 return (PyObject*) self;
314}
315
Eli Bendersky092af1f2012-03-04 07:14:03 +0200316static PyObject *
317element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
318{
319 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
320 if (e != NULL) {
321 Py_INCREF(Py_None);
322 e->tag = Py_None;
323
324 Py_INCREF(Py_None);
325 e->text = Py_None;
326
327 Py_INCREF(Py_None);
328 e->tail = Py_None;
329
330 e->extra = NULL;
331 }
332 return (PyObject *)e;
333}
334
335static int
336element_init(PyObject *self, PyObject *args, PyObject *kwds)
337{
338 PyObject *tag;
339 PyObject *tmp;
340 PyObject *attrib = NULL;
341 ElementObject *self_elem;
342
343 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
344 return -1;
345
346 if (attrib || kwds) {
347 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
348 if (!attrib)
349 return -1;
350 if (kwds)
351 PyDict_Update(attrib, kwds);
352 } else {
353 Py_INCREF(Py_None);
354 attrib = Py_None;
355 }
356
357 self_elem = (ElementObject *)self;
358
359 /* Use None for empty dictionaries */
360 if (PyDict_CheckExact(attrib) && PyDict_Size(attrib) == 0) {
361 Py_INCREF(Py_None);
362 attrib = Py_None;
363 }
364
365 if (attrib != Py_None) {
366 if (create_extra(self_elem, attrib) < 0) {
367 PyObject_Del(self_elem);
368 return -1;
369 }
370 }
371
372 /* If create_extra needed attrib, it took a reference to it, so we can
373 * release ours anyway.
374 */
375 Py_DECREF(attrib);
376
377 /* Replace the objects already pointed to by tag, text and tail. */
378 tmp = self_elem->tag;
379 self_elem->tag = tag;
380 Py_INCREF(tag);
381 Py_DECREF(tmp);
382
383 tmp = self_elem->text;
384 self_elem->text = Py_None;
385 Py_INCREF(Py_None);
386 Py_DECREF(JOIN_OBJ(tmp));
387
388 tmp = self_elem->tail;
389 self_elem->tail = Py_None;
390 Py_INCREF(Py_None);
391 Py_DECREF(JOIN_OBJ(tmp));
392
393 return 0;
394}
395
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000396LOCAL(int)
397element_resize(ElementObject* self, int extra)
398{
399 int size;
400 PyObject* *children;
401
402 /* make sure self->children can hold the given number of extra
403 elements. set an exception and return -1 if allocation failed */
404
405 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200406 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000407
408 size = self->extra->length + extra;
409
410 if (size > self->extra->allocated) {
411 /* use Python 2.4's list growth strategy */
412 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000413 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100414 * which needs at least 4 bytes.
415 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000416 * be safe.
417 */
418 size = size ? size : 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000419 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000420 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100421 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000422 * false alarm always assume at least one child to be safe.
423 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000424 children = PyObject_Realloc(self->extra->children,
425 size * sizeof(PyObject*));
426 if (!children)
427 goto nomemory;
428 } else {
429 children = PyObject_Malloc(size * sizeof(PyObject*));
430 if (!children)
431 goto nomemory;
432 /* copy existing children from static area to malloc buffer */
433 memcpy(children, self->extra->children,
434 self->extra->length * sizeof(PyObject*));
435 }
436 self->extra->children = children;
437 self->extra->allocated = size;
438 }
439
440 return 0;
441
442 nomemory:
443 PyErr_NoMemory();
444 return -1;
445}
446
447LOCAL(int)
448element_add_subelement(ElementObject* self, PyObject* element)
449{
450 /* add a child element to a parent */
451
452 if (element_resize(self, 1) < 0)
453 return -1;
454
455 Py_INCREF(element);
456 self->extra->children[self->extra->length] = element;
457
458 self->extra->length++;
459
460 return 0;
461}
462
463LOCAL(PyObject*)
464element_get_attrib(ElementObject* self)
465{
466 /* return borrowed reference to attrib dictionary */
467 /* note: this function assumes that the extra section exists */
468
469 PyObject* res = self->extra->attrib;
470
471 if (res == Py_None) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000472 Py_DECREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000473 /* create missing dictionary */
474 res = PyDict_New();
475 if (!res)
476 return NULL;
477 self->extra->attrib = res;
478 }
479
480 return res;
481}
482
483LOCAL(PyObject*)
484element_get_text(ElementObject* self)
485{
486 /* return borrowed reference to text attribute */
487
488 PyObject* res = self->text;
489
490 if (JOIN_GET(res)) {
491 res = JOIN_OBJ(res);
492 if (PyList_CheckExact(res)) {
493 res = list_join(res);
494 if (!res)
495 return NULL;
496 self->text = res;
497 }
498 }
499
500 return res;
501}
502
503LOCAL(PyObject*)
504element_get_tail(ElementObject* self)
505{
506 /* return borrowed reference to text attribute */
507
508 PyObject* res = self->tail;
509
510 if (JOIN_GET(res)) {
511 res = JOIN_OBJ(res);
512 if (PyList_CheckExact(res)) {
513 res = list_join(res);
514 if (!res)
515 return NULL;
516 self->tail = res;
517 }
518 }
519
520 return res;
521}
522
523static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000524subelement(PyObject* self, PyObject* args, PyObject* kw)
525{
526 PyObject* elem;
527
528 ElementObject* parent;
529 PyObject* tag;
530 PyObject* attrib = NULL;
531 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
532 &Element_Type, &parent, &tag,
533 &PyDict_Type, &attrib))
534 return NULL;
535
536 if (attrib || kw) {
537 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
538 if (!attrib)
539 return NULL;
540 if (kw)
541 PyDict_Update(attrib, kw);
542 } else {
543 Py_INCREF(Py_None);
544 attrib = Py_None;
545 }
546
Eli Bendersky092af1f2012-03-04 07:14:03 +0200547 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000548
549 Py_DECREF(attrib);
550
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000551 if (element_add_subelement(parent, elem) < 0) {
552 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000553 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000554 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000555
556 return elem;
557}
558
559static void
560element_dealloc(ElementObject* self)
561{
562 if (self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200563 dealloc_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000564
565 /* discard attributes */
566 Py_DECREF(self->tag);
567 Py_DECREF(JOIN_OBJ(self->text));
568 Py_DECREF(JOIN_OBJ(self->tail));
569
570 RELEASE(sizeof(ElementObject), "destroy element");
571
Eli Bendersky092af1f2012-03-04 07:14:03 +0200572 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000573}
574
575/* -------------------------------------------------------------------- */
576/* methods (in alphabetical order) */
577
578static PyObject*
579element_append(ElementObject* self, PyObject* args)
580{
581 PyObject* element;
582 if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element))
583 return NULL;
584
585 if (element_add_subelement(self, element) < 0)
586 return NULL;
587
588 Py_RETURN_NONE;
589}
590
591static PyObject*
592element_clear(ElementObject* self, PyObject* args)
593{
594 if (!PyArg_ParseTuple(args, ":clear"))
595 return NULL;
596
597 if (self->extra) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200598 dealloc_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000599 self->extra = NULL;
600 }
601
602 Py_INCREF(Py_None);
603 Py_DECREF(JOIN_OBJ(self->text));
604 self->text = Py_None;
605
606 Py_INCREF(Py_None);
607 Py_DECREF(JOIN_OBJ(self->tail));
608 self->tail = Py_None;
609
610 Py_RETURN_NONE;
611}
612
613static PyObject*
614element_copy(ElementObject* self, PyObject* args)
615{
616 int i;
617 ElementObject* element;
618
619 if (!PyArg_ParseTuple(args, ":__copy__"))
620 return NULL;
621
Eli Bendersky092af1f2012-03-04 07:14:03 +0200622 element = (ElementObject*) create_new_element(
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000623 self->tag, (self->extra) ? self->extra->attrib : Py_None
624 );
625 if (!element)
626 return NULL;
627
628 Py_DECREF(JOIN_OBJ(element->text));
629 element->text = self->text;
630 Py_INCREF(JOIN_OBJ(element->text));
631
632 Py_DECREF(JOIN_OBJ(element->tail));
633 element->tail = self->tail;
634 Py_INCREF(JOIN_OBJ(element->tail));
635
636 if (self->extra) {
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100637
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000638 if (element_resize(element, self->extra->length) < 0) {
639 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000640 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000641 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000642
643 for (i = 0; i < self->extra->length; i++) {
644 Py_INCREF(self->extra->children[i]);
645 element->extra->children[i] = self->extra->children[i];
646 }
647
648 element->extra->length = self->extra->length;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100649
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000650 }
651
652 return (PyObject*) element;
653}
654
655static PyObject*
656element_deepcopy(ElementObject* self, PyObject* args)
657{
658 int i;
659 ElementObject* element;
660 PyObject* tag;
661 PyObject* attrib;
662 PyObject* text;
663 PyObject* tail;
664 PyObject* id;
665
666 PyObject* memo;
667 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
668 return NULL;
669
670 tag = deepcopy(self->tag, memo);
671 if (!tag)
672 return NULL;
673
674 if (self->extra) {
675 attrib = deepcopy(self->extra->attrib, memo);
676 if (!attrib) {
677 Py_DECREF(tag);
678 return NULL;
679 }
680 } else {
681 Py_INCREF(Py_None);
682 attrib = Py_None;
683 }
684
Eli Bendersky092af1f2012-03-04 07:14:03 +0200685 element = (ElementObject*) create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000686
687 Py_DECREF(tag);
688 Py_DECREF(attrib);
689
690 if (!element)
691 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100692
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000693 text = deepcopy(JOIN_OBJ(self->text), memo);
694 if (!text)
695 goto error;
696 Py_DECREF(element->text);
697 element->text = JOIN_SET(text, JOIN_GET(self->text));
698
699 tail = deepcopy(JOIN_OBJ(self->tail), memo);
700 if (!tail)
701 goto error;
702 Py_DECREF(element->tail);
703 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
704
705 if (self->extra) {
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100706
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000707 if (element_resize(element, self->extra->length) < 0)
708 goto error;
709
710 for (i = 0; i < self->extra->length; i++) {
711 PyObject* child = deepcopy(self->extra->children[i], memo);
712 if (!child) {
713 element->extra->length = i;
714 goto error;
715 }
716 element->extra->children[i] = child;
717 }
718
719 element->extra->length = self->extra->length;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100720
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000721 }
722
723 /* add object to memo dictionary (so deepcopy won't visit it again) */
Christian Heimes217cfd12007-12-02 14:31:20 +0000724 id = PyLong_FromLong((Py_uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000725 if (!id)
726 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000727
728 i = PyDict_SetItem(memo, id, (PyObject*) element);
729
730 Py_DECREF(id);
731
732 if (i < 0)
733 goto error;
734
735 return (PyObject*) element;
736
737 error:
738 Py_DECREF(element);
739 return NULL;
740}
741
742LOCAL(int)
743checkpath(PyObject* tag)
744{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000745 Py_ssize_t i;
746 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000747
748 /* check if a tag contains an xpath character */
749
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000750#define PATHCHAR(ch) \
751 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000752
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000753 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200754 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
755 void *data = PyUnicode_DATA(tag);
756 unsigned int kind = PyUnicode_KIND(tag);
757 for (i = 0; i < len; i++) {
758 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
759 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000760 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200761 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000762 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200763 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000764 return 1;
765 }
766 return 0;
767 }
Christian Heimes72b710a2008-05-26 13:28:38 +0000768 if (PyBytes_Check(tag)) {
769 char *p = PyBytes_AS_STRING(tag);
770 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000771 if (p[i] == '{')
772 check = 0;
773 else if (p[i] == '}')
774 check = 1;
775 else if (check && PATHCHAR(p[i]))
776 return 1;
777 }
778 return 0;
779 }
780
781 return 1; /* unknown type; might be path expression */
782}
783
784static PyObject*
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000785element_extend(ElementObject* self, PyObject* args)
786{
787 PyObject* seq;
788 Py_ssize_t i, seqlen = 0;
789
790 PyObject* seq_in;
791 if (!PyArg_ParseTuple(args, "O:extend", &seq_in))
792 return NULL;
793
794 seq = PySequence_Fast(seq_in, "");
795 if (!seq) {
796 PyErr_Format(
797 PyExc_TypeError,
798 "expected sequence, not \"%.200s\"", Py_TYPE(seq_in)->tp_name
799 );
800 return NULL;
801 }
802
803 seqlen = PySequence_Size(seq);
804 for (i = 0; i < seqlen; i++) {
805 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
806 if (element_add_subelement(self, element) < 0) {
807 Py_DECREF(seq);
808 return NULL;
809 }
810 }
811
812 Py_DECREF(seq);
813
814 Py_RETURN_NONE;
815}
816
817static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000818element_find(ElementObject* self, PyObject* args)
819{
820 int i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000821 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000822 PyObject* namespaces = Py_None;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200823
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000824 if (!PyArg_ParseTuple(args, "O|O:find", &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000825 return NULL;
826
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200827 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200828 _Py_IDENTIFIER(find);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200829 return _PyObject_CallMethodId(
830 elementpath_obj, &PyId_find, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000831 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200832 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000833
834 if (!self->extra)
835 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100836
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000837 for (i = 0; i < self->extra->length; i++) {
838 PyObject* item = self->extra->children[i];
839 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +0000840 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000841 Py_INCREF(item);
842 return item;
843 }
844 }
845
846 Py_RETURN_NONE;
847}
848
849static PyObject*
850element_findtext(ElementObject* self, PyObject* args)
851{
852 int i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000853 PyObject* tag;
854 PyObject* default_value = Py_None;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000855 PyObject* namespaces = Py_None;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200856 _Py_IDENTIFIER(findtext);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200857
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000858 if (!PyArg_ParseTuple(args, "O|OO:findtext", &tag, &default_value, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000859 return NULL;
860
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000861 if (checkpath(tag) || namespaces != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200862 return _PyObject_CallMethodId(
863 elementpath_obj, &PyId_findtext, "OOOO", self, tag, default_value, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000864 );
865
866 if (!self->extra) {
867 Py_INCREF(default_value);
868 return default_value;
869 }
870
871 for (i = 0; i < self->extra->length; i++) {
872 ElementObject* item = (ElementObject*) self->extra->children[i];
Mark Dickinson211c6252009-02-01 10:28:51 +0000873 if (Element_CheckExact(item) && (PyObject_RichCompareBool(item->tag, tag, Py_EQ) == 1)) {
874
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000875 PyObject* text = element_get_text(item);
876 if (text == Py_None)
Christian Heimes72b710a2008-05-26 13:28:38 +0000877 return PyBytes_FromString("");
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000878 Py_XINCREF(text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000879 return text;
880 }
881 }
882
883 Py_INCREF(default_value);
884 return default_value;
885}
886
887static PyObject*
888element_findall(ElementObject* self, PyObject* args)
889{
890 int i;
891 PyObject* out;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000892 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000893 PyObject* namespaces = Py_None;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200894
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000895 if (!PyArg_ParseTuple(args, "O|O:findall", &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000896 return NULL;
897
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200898 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200899 _Py_IDENTIFIER(findall);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200900 return _PyObject_CallMethodId(
901 elementpath_obj, &PyId_findall, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000902 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200903 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000904
905 out = PyList_New(0);
906 if (!out)
907 return NULL;
908
909 if (!self->extra)
910 return out;
911
912 for (i = 0; i < self->extra->length; i++) {
913 PyObject* item = self->extra->children[i];
914 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +0000915 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000916 if (PyList_Append(out, item) < 0) {
917 Py_DECREF(out);
918 return NULL;
919 }
920 }
921 }
922
923 return out;
924}
925
926static PyObject*
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000927element_iterfind(ElementObject* self, PyObject* args)
928{
929 PyObject* tag;
930 PyObject* namespaces = Py_None;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200931 _Py_IDENTIFIER(iterfind);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200932
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000933 if (!PyArg_ParseTuple(args, "O|O:iterfind", &tag, &namespaces))
934 return NULL;
935
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200936 return _PyObject_CallMethodId(
937 elementpath_obj, &PyId_iterfind, "OOO", self, tag, namespaces
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000938 );
939}
940
941static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000942element_get(ElementObject* self, PyObject* args)
943{
944 PyObject* value;
945
946 PyObject* key;
947 PyObject* default_value = Py_None;
948 if (!PyArg_ParseTuple(args, "O|O:get", &key, &default_value))
949 return NULL;
950
951 if (!self->extra || self->extra->attrib == Py_None)
952 value = default_value;
953 else {
954 value = PyDict_GetItem(self->extra->attrib, key);
955 if (!value)
956 value = default_value;
957 }
958
959 Py_INCREF(value);
960 return value;
961}
962
963static PyObject*
964element_getchildren(ElementObject* self, PyObject* args)
965{
966 int i;
967 PyObject* list;
968
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000969 /* FIXME: report as deprecated? */
970
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000971 if (!PyArg_ParseTuple(args, ":getchildren"))
972 return NULL;
973
974 if (!self->extra)
975 return PyList_New(0);
976
977 list = PyList_New(self->extra->length);
978 if (!list)
979 return NULL;
980
981 for (i = 0; i < self->extra->length; i++) {
982 PyObject* item = self->extra->children[i];
983 Py_INCREF(item);
984 PyList_SET_ITEM(list, i, item);
985 }
986
987 return list;
988}
989
990static PyObject*
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000991element_iter(ElementObject* self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000992{
993 PyObject* result;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100994
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000995 PyObject* tag = Py_None;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000996 if (!PyArg_ParseTuple(args, "|O:iter", &tag))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000997 return NULL;
998
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000999 if (!elementtree_iter_obj) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001000 PyErr_SetString(
1001 PyExc_RuntimeError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001002 "iter helper not found"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001003 );
1004 return NULL;
1005 }
1006
1007 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001008 if (!args)
1009 return NULL;
Neal Norwitz02876df2006-02-07 06:58:52 +00001010
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001011 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
1012 Py_INCREF(tag); PyTuple_SET_ITEM(args, 1, (PyObject*) tag);
1013
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001014 result = PyObject_CallObject(elementtree_iter_obj, args);
1015
1016 Py_DECREF(args);
1017
1018 return result;
1019}
1020
1021
1022static PyObject*
1023element_itertext(ElementObject* self, PyObject* args)
1024{
1025 PyObject* result;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001026
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001027 if (!PyArg_ParseTuple(args, ":itertext"))
1028 return NULL;
1029
1030 if (!elementtree_itertext_obj) {
1031 PyErr_SetString(
1032 PyExc_RuntimeError,
1033 "itertext helper not found"
1034 );
1035 return NULL;
1036 }
1037
1038 args = PyTuple_New(1);
1039 if (!args)
1040 return NULL;
1041
1042 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
1043
1044 result = PyObject_CallObject(elementtree_itertext_obj, args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001045
1046 Py_DECREF(args);
1047
1048 return result;
1049}
1050
1051static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001052element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001053{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001054 ElementObject* self = (ElementObject*) self_;
1055
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001056 if (!self->extra || index < 0 || index >= self->extra->length) {
1057 PyErr_SetString(
1058 PyExc_IndexError,
1059 "child index out of range"
1060 );
1061 return NULL;
1062 }
1063
1064 Py_INCREF(self->extra->children[index]);
1065 return self->extra->children[index];
1066}
1067
1068static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001069element_insert(ElementObject* self, PyObject* args)
1070{
1071 int i;
1072
1073 int index;
1074 PyObject* element;
1075 if (!PyArg_ParseTuple(args, "iO!:insert", &index,
1076 &Element_Type, &element))
1077 return NULL;
1078
1079 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001080 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001081
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001082 if (index < 0) {
1083 index += self->extra->length;
1084 if (index < 0)
1085 index = 0;
1086 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001087 if (index > self->extra->length)
1088 index = self->extra->length;
1089
1090 if (element_resize(self, 1) < 0)
1091 return NULL;
1092
1093 for (i = self->extra->length; i > index; i--)
1094 self->extra->children[i] = self->extra->children[i-1];
1095
1096 Py_INCREF(element);
1097 self->extra->children[index] = element;
1098
1099 self->extra->length++;
1100
1101 Py_RETURN_NONE;
1102}
1103
1104static PyObject*
1105element_items(ElementObject* self, PyObject* args)
1106{
1107 if (!PyArg_ParseTuple(args, ":items"))
1108 return NULL;
1109
1110 if (!self->extra || self->extra->attrib == Py_None)
1111 return PyList_New(0);
1112
1113 return PyDict_Items(self->extra->attrib);
1114}
1115
1116static PyObject*
1117element_keys(ElementObject* self, PyObject* args)
1118{
1119 if (!PyArg_ParseTuple(args, ":keys"))
1120 return NULL;
1121
1122 if (!self->extra || self->extra->attrib == Py_None)
1123 return PyList_New(0);
1124
1125 return PyDict_Keys(self->extra->attrib);
1126}
1127
Martin v. Löwis18e16552006-02-15 17:27:45 +00001128static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001129element_length(ElementObject* self)
1130{
1131 if (!self->extra)
1132 return 0;
1133
1134 return self->extra->length;
1135}
1136
1137static PyObject*
1138element_makeelement(PyObject* self, PyObject* args, PyObject* kw)
1139{
1140 PyObject* elem;
1141
1142 PyObject* tag;
1143 PyObject* attrib;
1144 if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib))
1145 return NULL;
1146
1147 attrib = PyDict_Copy(attrib);
1148 if (!attrib)
1149 return NULL;
1150
Eli Bendersky092af1f2012-03-04 07:14:03 +02001151 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001152
1153 Py_DECREF(attrib);
1154
1155 return elem;
1156}
1157
1158static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001159element_remove(ElementObject* self, PyObject* args)
1160{
1161 int i;
1162
1163 PyObject* element;
1164 if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element))
1165 return NULL;
1166
1167 if (!self->extra) {
1168 /* element has no children, so raise exception */
1169 PyErr_SetString(
1170 PyExc_ValueError,
1171 "list.remove(x): x not in list"
1172 );
1173 return NULL;
1174 }
1175
1176 for (i = 0; i < self->extra->length; i++) {
1177 if (self->extra->children[i] == element)
1178 break;
Mark Dickinson211c6252009-02-01 10:28:51 +00001179 if (PyObject_RichCompareBool(self->extra->children[i], element, Py_EQ) == 1)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001180 break;
1181 }
1182
1183 if (i == self->extra->length) {
1184 /* element is not in children, so raise exception */
1185 PyErr_SetString(
1186 PyExc_ValueError,
1187 "list.remove(x): x not in list"
1188 );
1189 return NULL;
1190 }
1191
1192 Py_DECREF(self->extra->children[i]);
1193
1194 self->extra->length--;
1195
1196 for (; i < self->extra->length; i++)
1197 self->extra->children[i] = self->extra->children[i+1];
1198
1199 Py_RETURN_NONE;
1200}
1201
1202static PyObject*
1203element_repr(ElementObject* self)
1204{
Eli Bendersky092af1f2012-03-04 07:14:03 +02001205 if (self->tag)
1206 return PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1207 else
1208 return PyUnicode_FromFormat("<Element at %p>", self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001209}
1210
1211static PyObject*
1212element_set(ElementObject* self, PyObject* args)
1213{
1214 PyObject* attrib;
1215
1216 PyObject* key;
1217 PyObject* value;
1218 if (!PyArg_ParseTuple(args, "OO:set", &key, &value))
1219 return NULL;
1220
1221 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001222 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001223
1224 attrib = element_get_attrib(self);
1225 if (!attrib)
1226 return NULL;
1227
1228 if (PyDict_SetItem(attrib, key, value) < 0)
1229 return NULL;
1230
1231 Py_RETURN_NONE;
1232}
1233
1234static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001235element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001236{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001237 ElementObject* self = (ElementObject*) self_;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001238 int i;
1239 PyObject* old;
1240
1241 if (!self->extra || index < 0 || index >= self->extra->length) {
1242 PyErr_SetString(
1243 PyExc_IndexError,
1244 "child assignment index out of range");
1245 return -1;
1246 }
1247
1248 old = self->extra->children[index];
1249
1250 if (item) {
1251 Py_INCREF(item);
1252 self->extra->children[index] = item;
1253 } else {
1254 self->extra->length--;
1255 for (i = index; i < self->extra->length; i++)
1256 self->extra->children[i] = self->extra->children[i+1];
1257 }
1258
1259 Py_DECREF(old);
1260
1261 return 0;
1262}
1263
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001264static PyObject*
1265element_subscr(PyObject* self_, PyObject* item)
1266{
1267 ElementObject* self = (ElementObject*) self_;
1268
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001269 if (PyIndex_Check(item)) {
1270 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001271
1272 if (i == -1 && PyErr_Occurred()) {
1273 return NULL;
1274 }
1275 if (i < 0 && self->extra)
1276 i += self->extra->length;
1277 return element_getitem(self_, i);
1278 }
1279 else if (PySlice_Check(item)) {
1280 Py_ssize_t start, stop, step, slicelen, cur, i;
1281 PyObject* list;
1282
1283 if (!self->extra)
1284 return PyList_New(0);
1285
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001286 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001287 self->extra->length,
1288 &start, &stop, &step, &slicelen) < 0) {
1289 return NULL;
1290 }
1291
1292 if (slicelen <= 0)
1293 return PyList_New(0);
1294 else {
1295 list = PyList_New(slicelen);
1296 if (!list)
1297 return NULL;
1298
1299 for (cur = start, i = 0; i < slicelen;
1300 cur += step, i++) {
1301 PyObject* item = self->extra->children[cur];
1302 Py_INCREF(item);
1303 PyList_SET_ITEM(list, i, item);
1304 }
1305
1306 return list;
1307 }
1308 }
1309 else {
1310 PyErr_SetString(PyExc_TypeError,
1311 "element indices must be integers");
1312 return NULL;
1313 }
1314}
1315
1316static int
1317element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1318{
1319 ElementObject* self = (ElementObject*) self_;
1320
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001321 if (PyIndex_Check(item)) {
1322 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001323
1324 if (i == -1 && PyErr_Occurred()) {
1325 return -1;
1326 }
1327 if (i < 0 && self->extra)
1328 i += self->extra->length;
1329 return element_setitem(self_, i, value);
1330 }
1331 else if (PySlice_Check(item)) {
1332 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1333
1334 PyObject* recycle = NULL;
1335 PyObject* seq = NULL;
1336
1337 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001338 create_extra(self, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001339
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001340 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001341 self->extra->length,
1342 &start, &stop, &step, &slicelen) < 0) {
1343 return -1;
1344 }
1345
Eli Bendersky865756a2012-03-09 13:38:15 +02001346 if (value == NULL) {
1347 /* Delete slice */
1348 size_t cur;
1349 Py_ssize_t i;
1350
1351 if (slicelen <= 0)
1352 return 0;
1353
1354 /* Since we're deleting, the direction of the range doesn't matter,
1355 * so for simplicity make it always ascending.
1356 */
1357 if (step < 0) {
1358 stop = start + 1;
1359 start = stop + step * (slicelen - 1) - 1;
1360 step = -step;
1361 }
1362
1363 assert((size_t)slicelen <= PY_SIZE_MAX / sizeof(PyObject *));
1364
1365 /* recycle is a list that will contain all the children
1366 * scheduled for removal.
1367 */
1368 if (!(recycle = PyList_New(slicelen))) {
1369 PyErr_NoMemory();
1370 return -1;
1371 }
1372
1373 /* This loop walks over all the children that have to be deleted,
1374 * with cur pointing at them. num_moved is the amount of children
1375 * until the next deleted child that have to be "shifted down" to
1376 * occupy the deleted's places.
1377 * Note that in the ith iteration, shifting is done i+i places down
1378 * because i children were already removed.
1379 */
1380 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1381 /* Compute how many children have to be moved, clipping at the
1382 * list end.
1383 */
1384 Py_ssize_t num_moved = step - 1;
1385 if (cur + step >= (size_t)self->extra->length) {
1386 num_moved = self->extra->length - cur - 1;
1387 }
1388
1389 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1390
1391 memmove(
1392 self->extra->children + cur - i,
1393 self->extra->children + cur + 1,
1394 num_moved * sizeof(PyObject *));
1395 }
1396
1397 /* Leftover "tail" after the last removed child */
1398 cur = start + (size_t)slicelen * step;
1399 if (cur < (size_t)self->extra->length) {
1400 memmove(
1401 self->extra->children + cur - slicelen,
1402 self->extra->children + cur,
1403 (self->extra->length - cur) * sizeof(PyObject *));
1404 }
1405
1406 self->extra->length -= slicelen;
1407
1408 /* Discard the recycle list with all the deleted sub-elements */
1409 Py_XDECREF(recycle);
1410 return 0;
1411 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001412 else {
Eli Bendersky865756a2012-03-09 13:38:15 +02001413 /* A new slice is actually being assigned */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001414 seq = PySequence_Fast(value, "");
1415 if (!seq) {
1416 PyErr_Format(
1417 PyExc_TypeError,
1418 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1419 );
1420 return -1;
1421 }
1422 newlen = PySequence_Size(seq);
1423 }
1424
1425 if (step != 1 && newlen != slicelen)
1426 {
1427 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001428 "attempt to assign sequence of size %zd "
1429 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001430 newlen, slicelen
1431 );
1432 return -1;
1433 }
1434
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001435 /* Resize before creating the recycle bin, to prevent refleaks. */
1436 if (newlen > slicelen) {
1437 if (element_resize(self, newlen - slicelen) < 0) {
1438 if (seq) {
1439 Py_DECREF(seq);
1440 }
1441 return -1;
1442 }
1443 }
1444
1445 if (slicelen > 0) {
1446 /* to avoid recursive calls to this method (via decref), move
1447 old items to the recycle bin here, and get rid of them when
1448 we're done modifying the element */
1449 recycle = PyList_New(slicelen);
1450 if (!recycle) {
1451 if (seq) {
1452 Py_DECREF(seq);
1453 }
1454 return -1;
1455 }
1456 for (cur = start, i = 0; i < slicelen;
1457 cur += step, i++)
1458 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1459 }
1460
1461 if (newlen < slicelen) {
1462 /* delete slice */
1463 for (i = stop; i < self->extra->length; i++)
1464 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1465 } else if (newlen > slicelen) {
1466 /* insert slice */
1467 for (i = self->extra->length-1; i >= stop; i--)
1468 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1469 }
1470
1471 /* replace the slice */
1472 for (cur = start, i = 0; i < newlen;
1473 cur += step, i++) {
1474 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1475 Py_INCREF(element);
1476 self->extra->children[cur] = element;
1477 }
1478
1479 self->extra->length += newlen - slicelen;
1480
1481 if (seq) {
1482 Py_DECREF(seq);
1483 }
1484
1485 /* discard the recycle bin, and everything in it */
1486 Py_XDECREF(recycle);
1487
1488 return 0;
1489 }
1490 else {
1491 PyErr_SetString(PyExc_TypeError,
1492 "element indices must be integers");
1493 return -1;
1494 }
1495}
1496
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001497static PyMethodDef element_methods[] = {
1498
1499 {"clear", (PyCFunction) element_clear, METH_VARARGS},
1500
1501 {"get", (PyCFunction) element_get, METH_VARARGS},
1502 {"set", (PyCFunction) element_set, METH_VARARGS},
1503
1504 {"find", (PyCFunction) element_find, METH_VARARGS},
1505 {"findtext", (PyCFunction) element_findtext, METH_VARARGS},
1506 {"findall", (PyCFunction) element_findall, METH_VARARGS},
1507
1508 {"append", (PyCFunction) element_append, METH_VARARGS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001509 {"extend", (PyCFunction) element_extend, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001510 {"insert", (PyCFunction) element_insert, METH_VARARGS},
1511 {"remove", (PyCFunction) element_remove, METH_VARARGS},
1512
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001513 {"iter", (PyCFunction) element_iter, METH_VARARGS},
1514 {"itertext", (PyCFunction) element_itertext, METH_VARARGS},
1515 {"iterfind", (PyCFunction) element_iterfind, METH_VARARGS},
1516
1517 {"getiterator", (PyCFunction) element_iter, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001518 {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS},
1519
1520 {"items", (PyCFunction) element_items, METH_VARARGS},
1521 {"keys", (PyCFunction) element_keys, METH_VARARGS},
1522
1523 {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS},
1524
1525 {"__copy__", (PyCFunction) element_copy, METH_VARARGS},
1526 {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS},
1527
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001528 {NULL, NULL}
1529};
1530
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001531static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001532element_getattro(ElementObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001533{
1534 PyObject* res;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001535 char *name = "";
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001536
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001537 if (PyUnicode_Check(nameobj))
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001538 name = _PyUnicode_AsString(nameobj);
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001539
Alexander Belopolskye239d232010-12-08 23:31:48 +00001540 if (name == NULL)
1541 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001542
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001543 /* handle common attributes first */
1544 if (strcmp(name, "tag") == 0) {
1545 res = self->tag;
1546 Py_INCREF(res);
1547 return res;
1548 } else if (strcmp(name, "text") == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001549 res = element_get_text(self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001550 Py_INCREF(res);
1551 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001552 }
1553
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001554 /* methods */
1555 res = PyObject_GenericGetAttr((PyObject*) self, nameobj);
1556 if (res)
1557 return res;
1558
1559 /* less common attributes */
1560 if (strcmp(name, "tail") == 0) {
1561 PyErr_Clear();
1562 res = element_get_tail(self);
1563 } else if (strcmp(name, "attrib") == 0) {
1564 PyErr_Clear();
1565 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001566 create_extra(self, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001567 res = element_get_attrib(self);
1568 }
1569
1570 if (!res)
1571 return NULL;
1572
1573 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001574 return res;
1575}
1576
1577static int
1578element_setattr(ElementObject* self, const char* name, PyObject* value)
1579{
1580 if (value == NULL) {
1581 PyErr_SetString(
1582 PyExc_AttributeError,
1583 "can't delete element attributes"
1584 );
1585 return -1;
1586 }
1587
1588 if (strcmp(name, "tag") == 0) {
1589 Py_DECREF(self->tag);
1590 self->tag = value;
1591 Py_INCREF(self->tag);
1592 } else if (strcmp(name, "text") == 0) {
1593 Py_DECREF(JOIN_OBJ(self->text));
1594 self->text = value;
1595 Py_INCREF(self->text);
1596 } else if (strcmp(name, "tail") == 0) {
1597 Py_DECREF(JOIN_OBJ(self->tail));
1598 self->tail = value;
1599 Py_INCREF(self->tail);
1600 } else if (strcmp(name, "attrib") == 0) {
1601 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001602 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001603 Py_DECREF(self->extra->attrib);
1604 self->extra->attrib = value;
1605 Py_INCREF(self->extra->attrib);
1606 } else {
1607 PyErr_SetString(PyExc_AttributeError, name);
1608 return -1;
1609 }
1610
1611 return 0;
1612}
1613
1614static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001615 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001616 0, /* sq_concat */
1617 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001618 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001619 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001620 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001621 0,
1622};
1623
1624static PyMappingMethods element_as_mapping = {
1625 (lenfunc) element_length,
1626 (binaryfunc) element_subscr,
1627 (objobjargproc) element_ass_subscr,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001628};
1629
Neal Norwitz227b5332006-03-22 09:28:35 +00001630static PyTypeObject Element_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001631 PyVarObject_HEAD_INIT(NULL, 0)
1632 "Element", sizeof(ElementObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001633 /* methods */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001634 (destructor)element_dealloc, /* tp_dealloc */
1635 0, /* tp_print */
1636 0, /* tp_getattr */
1637 (setattrfunc)element_setattr, /* tp_setattr */
1638 0, /* tp_reserved */
1639 (reprfunc)element_repr, /* tp_repr */
1640 0, /* tp_as_number */
1641 &element_as_sequence, /* tp_as_sequence */
1642 &element_as_mapping, /* tp_as_mapping */
1643 0, /* tp_hash */
1644 0, /* tp_call */
1645 0, /* tp_str */
1646 (getattrofunc)element_getattro, /* tp_getattro */
1647 0, /* tp_setattro */
1648 0, /* tp_as_buffer */
1649 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
1650 0, /* tp_doc */
1651 0, /* tp_traverse */
1652 0, /* tp_clear */
1653 0, /* tp_richcompare */
1654 0, /* tp_weaklistoffset */
1655 0, /* tp_iter */
1656 0, /* tp_iternext */
1657 element_methods, /* tp_methods */
1658 0, /* tp_members */
1659 0, /* tp_getset */
1660 0, /* tp_base */
1661 0, /* tp_dict */
1662 0, /* tp_descr_get */
1663 0, /* tp_descr_set */
1664 0, /* tp_dictoffset */
1665 (initproc)element_init, /* tp_init */
1666 PyType_GenericAlloc, /* tp_alloc */
1667 element_new, /* tp_new */
1668 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001669};
1670
1671/* ==================================================================== */
1672/* the tree builder type */
1673
1674typedef struct {
1675 PyObject_HEAD
1676
1677 PyObject* root; /* root node (first created node) */
1678
1679 ElementObject* this; /* current node */
1680 ElementObject* last; /* most recently created node */
1681
1682 PyObject* data; /* data collector (string or list), or NULL */
1683
1684 PyObject* stack; /* element stack */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001685 Py_ssize_t index; /* current stack size (0=empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001686
1687 /* element tracing */
1688 PyObject* events; /* list of events, or NULL if not collecting */
1689 PyObject* start_event_obj; /* event objects (NULL to ignore) */
1690 PyObject* end_event_obj;
1691 PyObject* start_ns_event_obj;
1692 PyObject* end_ns_event_obj;
1693
1694} TreeBuilderObject;
1695
Neal Norwitz227b5332006-03-22 09:28:35 +00001696static PyTypeObject TreeBuilder_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001697
Christian Heimes90aa7642007-12-19 02:45:37 +00001698#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001699
1700/* -------------------------------------------------------------------- */
1701/* constructor and destructor */
1702
1703LOCAL(PyObject*)
1704treebuilder_new(void)
1705{
1706 TreeBuilderObject* self;
1707
1708 self = PyObject_New(TreeBuilderObject, &TreeBuilder_Type);
1709 if (self == NULL)
1710 return NULL;
1711
1712 self->root = NULL;
1713
1714 Py_INCREF(Py_None);
1715 self->this = (ElementObject*) Py_None;
1716
1717 Py_INCREF(Py_None);
1718 self->last = (ElementObject*) Py_None;
1719
1720 self->data = NULL;
1721
1722 self->stack = PyList_New(20);
1723 self->index = 0;
1724
1725 self->events = NULL;
1726 self->start_event_obj = self->end_event_obj = NULL;
1727 self->start_ns_event_obj = self->end_ns_event_obj = NULL;
1728
1729 ALLOC(sizeof(TreeBuilderObject), "create treebuilder");
1730
1731 return (PyObject*) self;
1732}
1733
1734static PyObject*
Thomas Wouters73e5a5b2006-06-08 15:35:45 +00001735treebuilder(PyObject* self_, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001736{
1737 if (!PyArg_ParseTuple(args, ":TreeBuilder"))
1738 return NULL;
1739
1740 return treebuilder_new();
1741}
1742
1743static void
1744treebuilder_dealloc(TreeBuilderObject* self)
1745{
1746 Py_XDECREF(self->end_ns_event_obj);
1747 Py_XDECREF(self->start_ns_event_obj);
1748 Py_XDECREF(self->end_event_obj);
1749 Py_XDECREF(self->start_event_obj);
1750 Py_XDECREF(self->events);
1751 Py_DECREF(self->stack);
1752 Py_XDECREF(self->data);
1753 Py_DECREF(self->last);
1754 Py_DECREF(self->this);
1755 Py_XDECREF(self->root);
1756
1757 RELEASE(sizeof(TreeBuilderObject), "destroy treebuilder");
1758
1759 PyObject_Del(self);
1760}
1761
1762/* -------------------------------------------------------------------- */
1763/* handlers */
1764
1765LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001766treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
1767 PyObject* attrib)
1768{
1769 PyObject* node;
1770 PyObject* this;
1771
1772 if (self->data) {
1773 if (self->this == self->last) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001774 Py_DECREF(JOIN_OBJ(self->last->text));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001775 self->last->text = JOIN_SET(
1776 self->data, PyList_CheckExact(self->data)
1777 );
1778 } else {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001779 Py_DECREF(JOIN_OBJ(self->last->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001780 self->last->tail = JOIN_SET(
1781 self->data, PyList_CheckExact(self->data)
1782 );
1783 }
1784 self->data = NULL;
1785 }
1786
Eli Bendersky092af1f2012-03-04 07:14:03 +02001787 node = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001788 if (!node)
1789 return NULL;
1790
1791 this = (PyObject*) self->this;
1792
1793 if (this != Py_None) {
1794 if (element_add_subelement((ElementObject*) this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001795 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001796 } else {
1797 if (self->root) {
1798 PyErr_SetString(
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001799 elementtree_parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001800 "multiple elements on top level"
1801 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001802 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001803 }
1804 Py_INCREF(node);
1805 self->root = node;
1806 }
1807
1808 if (self->index < PyList_GET_SIZE(self->stack)) {
1809 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001810 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001811 Py_INCREF(this);
1812 } else {
1813 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001814 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001815 }
1816 self->index++;
1817
1818 Py_DECREF(this);
1819 Py_INCREF(node);
1820 self->this = (ElementObject*) node;
1821
1822 Py_DECREF(self->last);
1823 Py_INCREF(node);
1824 self->last = (ElementObject*) node;
1825
1826 if (self->start_event_obj) {
1827 PyObject* res;
1828 PyObject* action = self->start_event_obj;
1829 res = PyTuple_New(2);
1830 if (res) {
1831 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
1832 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
1833 PyList_Append(self->events, res);
1834 Py_DECREF(res);
1835 } else
1836 PyErr_Clear(); /* FIXME: propagate error */
1837 }
1838
1839 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001840
1841 error:
1842 Py_DECREF(node);
1843 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001844}
1845
1846LOCAL(PyObject*)
1847treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
1848{
1849 if (!self->data) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001850 if (self->last == (ElementObject*) Py_None) {
1851 /* ignore calls to data before the first call to start */
1852 Py_RETURN_NONE;
1853 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001854 /* store the first item as is */
1855 Py_INCREF(data); self->data = data;
1856 } else {
1857 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00001858 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
1859 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001860 /* expat often generates single character data sections; handle
1861 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00001862 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
1863 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001864 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00001865 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001866 } else if (PyList_CheckExact(self->data)) {
1867 if (PyList_Append(self->data, data) < 0)
1868 return NULL;
1869 } else {
1870 PyObject* list = PyList_New(2);
1871 if (!list)
1872 return NULL;
1873 PyList_SET_ITEM(list, 0, self->data);
1874 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
1875 self->data = list;
1876 }
1877 }
1878
1879 Py_RETURN_NONE;
1880}
1881
1882LOCAL(PyObject*)
1883treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
1884{
1885 PyObject* item;
1886
1887 if (self->data) {
1888 if (self->this == self->last) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001889 Py_DECREF(JOIN_OBJ(self->last->text));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001890 self->last->text = JOIN_SET(
1891 self->data, PyList_CheckExact(self->data)
1892 );
1893 } else {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001894 Py_DECREF(JOIN_OBJ(self->last->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001895 self->last->tail = JOIN_SET(
1896 self->data, PyList_CheckExact(self->data)
1897 );
1898 }
1899 self->data = NULL;
1900 }
1901
1902 if (self->index == 0) {
1903 PyErr_SetString(
1904 PyExc_IndexError,
1905 "pop from empty stack"
1906 );
1907 return NULL;
1908 }
1909
1910 self->index--;
1911
1912 item = PyList_GET_ITEM(self->stack, self->index);
1913 Py_INCREF(item);
1914
1915 Py_DECREF(self->last);
1916
1917 self->last = (ElementObject*) self->this;
1918 self->this = (ElementObject*) item;
1919
1920 if (self->end_event_obj) {
1921 PyObject* res;
1922 PyObject* action = self->end_event_obj;
1923 PyObject* node = (PyObject*) self->last;
1924 res = PyTuple_New(2);
1925 if (res) {
1926 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
1927 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
1928 PyList_Append(self->events, res);
1929 Py_DECREF(res);
1930 } else
1931 PyErr_Clear(); /* FIXME: propagate error */
1932 }
1933
1934 Py_INCREF(self->last);
1935 return (PyObject*) self->last;
1936}
1937
1938LOCAL(void)
1939treebuilder_handle_namespace(TreeBuilderObject* self, int start,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001940 PyObject *prefix, PyObject *uri)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001941{
1942 PyObject* res;
1943 PyObject* action;
1944 PyObject* parcel;
1945
1946 if (!self->events)
1947 return;
1948
1949 if (start) {
1950 if (!self->start_ns_event_obj)
1951 return;
1952 action = self->start_ns_event_obj;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001953 parcel = Py_BuildValue("OO", prefix, uri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001954 if (!parcel)
1955 return;
1956 Py_INCREF(action);
1957 } else {
1958 if (!self->end_ns_event_obj)
1959 return;
1960 action = self->end_ns_event_obj;
1961 Py_INCREF(action);
1962 parcel = Py_None;
1963 Py_INCREF(parcel);
1964 }
1965
1966 res = PyTuple_New(2);
1967
1968 if (res) {
1969 PyTuple_SET_ITEM(res, 0, action);
1970 PyTuple_SET_ITEM(res, 1, parcel);
1971 PyList_Append(self->events, res);
1972 Py_DECREF(res);
1973 } else
1974 PyErr_Clear(); /* FIXME: propagate error */
1975}
1976
1977/* -------------------------------------------------------------------- */
1978/* methods (in alphabetical order) */
1979
1980static PyObject*
1981treebuilder_data(TreeBuilderObject* self, PyObject* args)
1982{
1983 PyObject* data;
1984 if (!PyArg_ParseTuple(args, "O:data", &data))
1985 return NULL;
1986
1987 return treebuilder_handle_data(self, data);
1988}
1989
1990static PyObject*
1991treebuilder_end(TreeBuilderObject* self, PyObject* args)
1992{
1993 PyObject* tag;
1994 if (!PyArg_ParseTuple(args, "O:end", &tag))
1995 return NULL;
1996
1997 return treebuilder_handle_end(self, tag);
1998}
1999
2000LOCAL(PyObject*)
2001treebuilder_done(TreeBuilderObject* self)
2002{
2003 PyObject* res;
2004
2005 /* FIXME: check stack size? */
2006
2007 if (self->root)
2008 res = self->root;
2009 else
2010 res = Py_None;
2011
2012 Py_INCREF(res);
2013 return res;
2014}
2015
2016static PyObject*
2017treebuilder_close(TreeBuilderObject* self, PyObject* args)
2018{
2019 if (!PyArg_ParseTuple(args, ":close"))
2020 return NULL;
2021
2022 return treebuilder_done(self);
2023}
2024
2025static PyObject*
2026treebuilder_start(TreeBuilderObject* self, PyObject* args)
2027{
2028 PyObject* tag;
2029 PyObject* attrib = Py_None;
2030 if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib))
2031 return NULL;
2032
2033 return treebuilder_handle_start(self, tag, attrib);
2034}
2035
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002036static PyMethodDef treebuilder_methods[] = {
2037 {"data", (PyCFunction) treebuilder_data, METH_VARARGS},
2038 {"start", (PyCFunction) treebuilder_start, METH_VARARGS},
2039 {"end", (PyCFunction) treebuilder_end, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002040 {"close", (PyCFunction) treebuilder_close, METH_VARARGS},
2041 {NULL, NULL}
2042};
2043
Neal Norwitz227b5332006-03-22 09:28:35 +00002044static PyTypeObject TreeBuilder_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002045 PyVarObject_HEAD_INIT(NULL, 0)
2046 "TreeBuilder", sizeof(TreeBuilderObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002047 /* methods */
2048 (destructor)treebuilder_dealloc, /* tp_dealloc */
2049 0, /* tp_print */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002050 0, /* tp_getattr */
2051 0, /* tp_setattr */
Mark Dickinsone94c6792009-02-02 20:36:42 +00002052 0, /* tp_reserved */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002053 0, /* tp_repr */
2054 0, /* tp_as_number */
2055 0, /* tp_as_sequence */
2056 0, /* tp_as_mapping */
2057 0, /* tp_hash */
2058 0, /* tp_call */
2059 0, /* tp_str */
2060 0, /* tp_getattro */
2061 0, /* tp_setattro */
2062 0, /* tp_as_buffer */
2063 Py_TPFLAGS_DEFAULT, /* tp_flags */
2064 0, /* tp_doc */
2065 0, /* tp_traverse */
2066 0, /* tp_clear */
2067 0, /* tp_richcompare */
2068 0, /* tp_weaklistoffset */
2069 0, /* tp_iter */
2070 0, /* tp_iternext */
2071 treebuilder_methods, /* tp_methods */
2072 0, /* tp_members */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002073};
2074
2075/* ==================================================================== */
2076/* the expat interface */
2077
2078#if defined(USE_EXPAT)
2079
2080#include "expat.h"
2081
2082#if defined(USE_PYEXPAT_CAPI)
2083#include "pyexpat.h"
2084static struct PyExpat_CAPI* expat_capi;
2085#define EXPAT(func) (expat_capi->func)
2086#else
2087#define EXPAT(func) (XML_##func)
2088#endif
2089
2090typedef struct {
2091 PyObject_HEAD
2092
2093 XML_Parser parser;
2094
2095 PyObject* target;
2096 PyObject* entity;
2097
2098 PyObject* names;
2099
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002100 PyObject* handle_start;
2101 PyObject* handle_data;
2102 PyObject* handle_end;
2103
2104 PyObject* handle_comment;
2105 PyObject* handle_pi;
2106
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002107 PyObject* handle_close;
2108
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002109} XMLParserObject;
2110
Neal Norwitz227b5332006-03-22 09:28:35 +00002111static PyTypeObject XMLParser_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002112
2113/* helpers */
2114
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002115LOCAL(PyObject*)
2116makeuniversal(XMLParserObject* self, const char* string)
2117{
2118 /* convert a UTF-8 tag/attribute name from the expat parser
2119 to a universal name string */
2120
2121 int size = strlen(string);
2122 PyObject* key;
2123 PyObject* value;
2124
2125 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00002126 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002127 if (!key)
2128 return NULL;
2129
2130 value = PyDict_GetItem(self->names, key);
2131
2132 if (value) {
2133 Py_INCREF(value);
2134 } else {
2135 /* new name. convert to universal name, and decode as
2136 necessary */
2137
2138 PyObject* tag;
2139 char* p;
2140 int i;
2141
2142 /* look for namespace separator */
2143 for (i = 0; i < size; i++)
2144 if (string[i] == '}')
2145 break;
2146 if (i != size) {
2147 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002148 tag = PyBytes_FromStringAndSize(NULL, size+1);
2149 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002150 p[0] = '{';
2151 memcpy(p+1, string, size);
2152 size++;
2153 } else {
2154 /* plain name; use key as tag */
2155 Py_INCREF(key);
2156 tag = key;
2157 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002158
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002159 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002160 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00002161 value = PyUnicode_DecodeUTF8(p, size, "strict");
2162 Py_DECREF(tag);
2163 if (!value) {
2164 Py_DECREF(key);
2165 return NULL;
2166 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002167
2168 /* add to names dictionary */
2169 if (PyDict_SetItem(self->names, key, value) < 0) {
2170 Py_DECREF(key);
2171 Py_DECREF(value);
2172 return NULL;
2173 }
2174 }
2175
2176 Py_DECREF(key);
2177 return value;
2178}
2179
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002180static void
2181expat_set_error(const char* message, int line, int column)
2182{
Victor Stinner499dfcf2011-03-21 13:26:24 +01002183 PyObject *errmsg, *error, *position;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002184
Victor Stinner499dfcf2011-03-21 13:26:24 +01002185 errmsg = PyUnicode_FromFormat("%s: line %d, column %d",
2186 message, line, column);
2187 if (errmsg == NULL)
2188 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002189
Victor Stinner499dfcf2011-03-21 13:26:24 +01002190 error = PyObject_CallFunction(elementtree_parseerror_obj, "O", errmsg);
2191 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002192 if (!error)
2193 return;
2194
2195 /* add position attribute */
2196 position = Py_BuildValue("(ii)", line, column);
2197 if (!position) {
2198 Py_DECREF(error);
2199 return;
2200 }
2201 if (PyObject_SetAttrString(error, "position", position) == -1) {
2202 Py_DECREF(error);
2203 Py_DECREF(position);
2204 return;
2205 }
2206 Py_DECREF(position);
2207
2208 PyErr_SetObject(elementtree_parseerror_obj, error);
2209 Py_DECREF(error);
2210}
2211
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002212/* -------------------------------------------------------------------- */
2213/* handlers */
2214
2215static void
2216expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2217 int data_len)
2218{
2219 PyObject* key;
2220 PyObject* value;
2221 PyObject* res;
2222
2223 if (data_len < 2 || data_in[0] != '&')
2224 return;
2225
Neal Norwitz0269b912007-08-08 06:56:02 +00002226 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002227 if (!key)
2228 return;
2229
2230 value = PyDict_GetItem(self->entity, key);
2231
2232 if (value) {
2233 if (TreeBuilder_CheckExact(self->target))
2234 res = treebuilder_handle_data(
2235 (TreeBuilderObject*) self->target, value
2236 );
2237 else if (self->handle_data)
2238 res = PyObject_CallFunction(self->handle_data, "O", value);
2239 else
2240 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002241 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002242 } else if (!PyErr_Occurred()) {
2243 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00002244 char message[128] = "undefined entity ";
2245 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002246 expat_set_error(
2247 message,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002248 EXPAT(GetErrorLineNumber)(self->parser),
2249 EXPAT(GetErrorColumnNumber)(self->parser)
2250 );
2251 }
2252
2253 Py_DECREF(key);
2254}
2255
2256static void
2257expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2258 const XML_Char **attrib_in)
2259{
2260 PyObject* res;
2261 PyObject* tag;
2262 PyObject* attrib;
2263 int ok;
2264
2265 /* tag name */
2266 tag = makeuniversal(self, tag_in);
2267 if (!tag)
2268 return; /* parser will look for errors */
2269
2270 /* attributes */
2271 if (attrib_in[0]) {
2272 attrib = PyDict_New();
2273 if (!attrib)
2274 return;
2275 while (attrib_in[0] && attrib_in[1]) {
2276 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00002277 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002278 if (!key || !value) {
2279 Py_XDECREF(value);
2280 Py_XDECREF(key);
2281 Py_DECREF(attrib);
2282 return;
2283 }
2284 ok = PyDict_SetItem(attrib, key, value);
2285 Py_DECREF(value);
2286 Py_DECREF(key);
2287 if (ok < 0) {
2288 Py_DECREF(attrib);
2289 return;
2290 }
2291 attrib_in += 2;
2292 }
2293 } else {
2294 Py_INCREF(Py_None);
2295 attrib = Py_None;
2296 }
2297
2298 if (TreeBuilder_CheckExact(self->target))
2299 /* shortcut */
2300 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2301 tag, attrib);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002302 else if (self->handle_start) {
2303 if (attrib == Py_None) {
2304 Py_DECREF(attrib);
2305 attrib = PyDict_New();
2306 if (!attrib)
2307 return;
2308 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002309 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002310 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002311 res = NULL;
2312
2313 Py_DECREF(tag);
2314 Py_DECREF(attrib);
2315
2316 Py_XDECREF(res);
2317}
2318
2319static void
2320expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2321 int data_len)
2322{
2323 PyObject* data;
2324 PyObject* res;
2325
Neal Norwitz0269b912007-08-08 06:56:02 +00002326 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002327 if (!data)
2328 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002329
2330 if (TreeBuilder_CheckExact(self->target))
2331 /* shortcut */
2332 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
2333 else if (self->handle_data)
2334 res = PyObject_CallFunction(self->handle_data, "O", data);
2335 else
2336 res = NULL;
2337
2338 Py_DECREF(data);
2339
2340 Py_XDECREF(res);
2341}
2342
2343static void
2344expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
2345{
2346 PyObject* tag;
2347 PyObject* res = NULL;
2348
2349 if (TreeBuilder_CheckExact(self->target))
2350 /* shortcut */
2351 /* the standard tree builder doesn't look at the end tag */
2352 res = treebuilder_handle_end(
2353 (TreeBuilderObject*) self->target, Py_None
2354 );
2355 else if (self->handle_end) {
2356 tag = makeuniversal(self, tag_in);
2357 if (tag) {
2358 res = PyObject_CallFunction(self->handle_end, "O", tag);
2359 Py_DECREF(tag);
2360 }
2361 }
2362
2363 Py_XDECREF(res);
2364}
2365
2366static void
2367expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
2368 const XML_Char *uri)
2369{
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002370 PyObject* sprefix = NULL;
2371 PyObject* suri = NULL;
2372
2373 suri = PyUnicode_DecodeUTF8(uri, strlen(uri), "strict");
2374 if (!suri)
2375 return;
2376
2377 if (prefix)
2378 sprefix = PyUnicode_DecodeUTF8(prefix, strlen(prefix), "strict");
2379 else
2380 sprefix = PyUnicode_FromString("");
2381 if (!sprefix) {
2382 Py_DECREF(suri);
2383 return;
2384 }
2385
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002386 treebuilder_handle_namespace(
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002387 (TreeBuilderObject*) self->target, 1, sprefix, suri
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002388 );
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002389
2390 Py_DECREF(sprefix);
2391 Py_DECREF(suri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002392}
2393
2394static void
2395expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
2396{
2397 treebuilder_handle_namespace(
2398 (TreeBuilderObject*) self->target, 0, NULL, NULL
2399 );
2400}
2401
2402static void
2403expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
2404{
2405 PyObject* comment;
2406 PyObject* res;
2407
2408 if (self->handle_comment) {
Neal Norwitz0269b912007-08-08 06:56:02 +00002409 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002410 if (comment) {
2411 res = PyObject_CallFunction(self->handle_comment, "O", comment);
2412 Py_XDECREF(res);
2413 Py_DECREF(comment);
2414 }
2415 }
2416}
2417
2418static void
2419expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
2420 const XML_Char* data_in)
2421{
2422 PyObject* target;
2423 PyObject* data;
2424 PyObject* res;
2425
2426 if (self->handle_pi) {
Neal Norwitz0269b912007-08-08 06:56:02 +00002427 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
2428 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002429 if (target && data) {
2430 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
2431 Py_XDECREF(res);
2432 Py_DECREF(data);
2433 Py_DECREF(target);
2434 } else {
2435 Py_XDECREF(data);
2436 Py_XDECREF(target);
2437 }
2438 }
2439}
2440
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002441static int
2442expat_unknown_encoding_handler(XMLParserObject *self, const XML_Char *name,
2443 XML_Encoding *info)
2444{
2445 PyObject* u;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002446 unsigned char s[256];
2447 int i;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002448 void *data;
2449 unsigned int kind;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002450
2451 memset(info, 0, sizeof(XML_Encoding));
2452
2453 for (i = 0; i < 256; i++)
2454 s[i] = i;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002455
Fredrik Lundhc3389992005-12-25 11:40:19 +00002456 u = PyUnicode_Decode((char*) s, 256, name, "replace");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002457 if (!u)
2458 return XML_STATUS_ERROR;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002459 if (PyUnicode_READY(u))
2460 return XML_STATUS_ERROR;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002461
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002462 if (PyUnicode_GET_LENGTH(u) != 256) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002463 Py_DECREF(u);
2464 return XML_STATUS_ERROR;
2465 }
2466
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002467 kind = PyUnicode_KIND(u);
2468 data = PyUnicode_DATA(u);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002469 for (i = 0; i < 256; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002470 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
2471 if (ch != Py_UNICODE_REPLACEMENT_CHARACTER)
2472 info->map[i] = ch;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002473 else
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002474 info->map[i] = -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002475 }
2476
2477 Py_DECREF(u);
2478
2479 return XML_STATUS_OK;
2480}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002481
2482/* -------------------------------------------------------------------- */
2483/* constructor and destructor */
2484
2485static PyObject*
Thomas Wouters73e5a5b2006-06-08 15:35:45 +00002486xmlparser(PyObject* self_, PyObject* args, PyObject* kw)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002487{
2488 XMLParserObject* self;
2489 /* FIXME: does this need to be static? */
2490 static XML_Memory_Handling_Suite memory_handler;
2491
2492 PyObject* target = NULL;
2493 char* encoding = NULL;
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +00002494 static char* kwlist[] = { "target", "encoding", NULL };
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002495 if (!PyArg_ParseTupleAndKeywords(args, kw, "|Oz:XMLParser", kwlist,
2496 &target, &encoding))
2497 return NULL;
2498
2499#if defined(USE_PYEXPAT_CAPI)
2500 if (!expat_capi) {
2501 PyErr_SetString(
2502 PyExc_RuntimeError, "cannot load dispatch table from pyexpat"
2503 );
2504 return NULL;
2505 }
2506#endif
2507
2508 self = PyObject_New(XMLParserObject, &XMLParser_Type);
2509 if (self == NULL)
2510 return NULL;
2511
2512 self->entity = PyDict_New();
2513 if (!self->entity) {
2514 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002515 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002516 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002517
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002518 self->names = PyDict_New();
2519 if (!self->names) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002520 PyObject_Del(self->entity);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002521 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002522 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002523 }
2524
2525 memory_handler.malloc_fcn = PyObject_Malloc;
2526 memory_handler.realloc_fcn = PyObject_Realloc;
2527 memory_handler.free_fcn = PyObject_Free;
2528
2529 self->parser = EXPAT(ParserCreate_MM)(encoding, &memory_handler, "}");
2530 if (!self->parser) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002531 PyObject_Del(self->names);
2532 PyObject_Del(self->entity);
2533 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002534 PyErr_NoMemory();
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002535 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002536 }
2537
2538 /* setup target handlers */
2539 if (!target) {
2540 target = treebuilder_new();
2541 if (!target) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002542 EXPAT(ParserFree)(self->parser);
2543 PyObject_Del(self->names);
2544 PyObject_Del(self->entity);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002545 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002546 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002547 }
2548 } else
2549 Py_INCREF(target);
2550 self->target = target;
2551
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002552 self->handle_start = PyObject_GetAttrString(target, "start");
2553 self->handle_data = PyObject_GetAttrString(target, "data");
2554 self->handle_end = PyObject_GetAttrString(target, "end");
2555 self->handle_comment = PyObject_GetAttrString(target, "comment");
2556 self->handle_pi = PyObject_GetAttrString(target, "pi");
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002557 self->handle_close = PyObject_GetAttrString(target, "close");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002558
2559 PyErr_Clear();
2560
2561 /* configure parser */
2562 EXPAT(SetUserData)(self->parser, self);
2563 EXPAT(SetElementHandler)(
2564 self->parser,
2565 (XML_StartElementHandler) expat_start_handler,
2566 (XML_EndElementHandler) expat_end_handler
2567 );
2568 EXPAT(SetDefaultHandlerExpand)(
2569 self->parser,
2570 (XML_DefaultHandler) expat_default_handler
2571 );
2572 EXPAT(SetCharacterDataHandler)(
2573 self->parser,
2574 (XML_CharacterDataHandler) expat_data_handler
2575 );
2576 if (self->handle_comment)
2577 EXPAT(SetCommentHandler)(
2578 self->parser,
2579 (XML_CommentHandler) expat_comment_handler
2580 );
2581 if (self->handle_pi)
2582 EXPAT(SetProcessingInstructionHandler)(
2583 self->parser,
2584 (XML_ProcessingInstructionHandler) expat_pi_handler
2585 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002586 EXPAT(SetUnknownEncodingHandler)(
2587 self->parser,
2588 (XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL
2589 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002590
2591 ALLOC(sizeof(XMLParserObject), "create expatparser");
2592
2593 return (PyObject*) self;
2594}
2595
2596static void
2597xmlparser_dealloc(XMLParserObject* self)
2598{
2599 EXPAT(ParserFree)(self->parser);
2600
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002601 Py_XDECREF(self->handle_close);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002602 Py_XDECREF(self->handle_pi);
2603 Py_XDECREF(self->handle_comment);
2604 Py_XDECREF(self->handle_end);
2605 Py_XDECREF(self->handle_data);
2606 Py_XDECREF(self->handle_start);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002607
2608 Py_DECREF(self->target);
2609 Py_DECREF(self->entity);
2610 Py_DECREF(self->names);
2611
2612 RELEASE(sizeof(XMLParserObject), "destroy expatparser");
2613
2614 PyObject_Del(self);
2615}
2616
2617/* -------------------------------------------------------------------- */
2618/* methods (in alphabetical order) */
2619
2620LOCAL(PyObject*)
2621expat_parse(XMLParserObject* self, char* data, int data_len, int final)
2622{
2623 int ok;
2624
2625 ok = EXPAT(Parse)(self->parser, data, data_len, final);
2626
2627 if (PyErr_Occurred())
2628 return NULL;
2629
2630 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002631 expat_set_error(
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002632 EXPAT(ErrorString)(EXPAT(GetErrorCode)(self->parser)),
2633 EXPAT(GetErrorLineNumber)(self->parser),
2634 EXPAT(GetErrorColumnNumber)(self->parser)
2635 );
2636 return NULL;
2637 }
2638
2639 Py_RETURN_NONE;
2640}
2641
2642static PyObject*
2643xmlparser_close(XMLParserObject* self, PyObject* args)
2644{
2645 /* end feeding data to parser */
2646
2647 PyObject* res;
2648 if (!PyArg_ParseTuple(args, ":close"))
2649 return NULL;
2650
2651 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002652 if (!res)
2653 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002654
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002655 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002656 Py_DECREF(res);
2657 return treebuilder_done((TreeBuilderObject*) self->target);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002658 } if (self->handle_close) {
2659 Py_DECREF(res);
2660 return PyObject_CallFunction(self->handle_close, "");
2661 } else
2662 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002663}
2664
2665static PyObject*
2666xmlparser_feed(XMLParserObject* self, PyObject* args)
2667{
2668 /* feed data to parser */
2669
2670 char* data;
2671 int data_len;
2672 if (!PyArg_ParseTuple(args, "s#:feed", &data, &data_len))
2673 return NULL;
2674
2675 return expat_parse(self, data, data_len, 0);
2676}
2677
2678static PyObject*
2679xmlparser_parse(XMLParserObject* self, PyObject* args)
2680{
2681 /* (internal) parse until end of input stream */
2682
2683 PyObject* reader;
2684 PyObject* buffer;
2685 PyObject* res;
2686
2687 PyObject* fileobj;
2688 if (!PyArg_ParseTuple(args, "O:_parse", &fileobj))
2689 return NULL;
2690
2691 reader = PyObject_GetAttrString(fileobj, "read");
2692 if (!reader)
2693 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002694
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002695 /* read from open file object */
2696 for (;;) {
2697
2698 buffer = PyObject_CallFunction(reader, "i", 64*1024);
2699
2700 if (!buffer) {
2701 /* read failed (e.g. due to KeyboardInterrupt) */
2702 Py_DECREF(reader);
2703 return NULL;
2704 }
2705
Christian Heimes72b710a2008-05-26 13:28:38 +00002706 if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002707 Py_DECREF(buffer);
2708 break;
2709 }
2710
2711 res = expat_parse(
Christian Heimes72b710a2008-05-26 13:28:38 +00002712 self, PyBytes_AS_STRING(buffer), PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002713 );
2714
2715 Py_DECREF(buffer);
2716
2717 if (!res) {
2718 Py_DECREF(reader);
2719 return NULL;
2720 }
2721 Py_DECREF(res);
2722
2723 }
2724
2725 Py_DECREF(reader);
2726
2727 res = expat_parse(self, "", 0, 1);
2728
2729 if (res && TreeBuilder_CheckExact(self->target)) {
2730 Py_DECREF(res);
2731 return treebuilder_done((TreeBuilderObject*) self->target);
2732 }
2733
2734 return res;
2735}
2736
2737static PyObject*
2738xmlparser_setevents(XMLParserObject* self, PyObject* args)
2739{
2740 /* activate element event reporting */
2741
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002742 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002743 TreeBuilderObject* target;
2744
2745 PyObject* events; /* event collector */
2746 PyObject* event_set = Py_None;
2747 if (!PyArg_ParseTuple(args, "O!|O:_setevents", &PyList_Type, &events,
2748 &event_set))
2749 return NULL;
2750
2751 if (!TreeBuilder_CheckExact(self->target)) {
2752 PyErr_SetString(
2753 PyExc_TypeError,
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01002754 "event handling only supported for ElementTree.TreeBuilder "
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002755 "targets"
2756 );
2757 return NULL;
2758 }
2759
2760 target = (TreeBuilderObject*) self->target;
2761
2762 Py_INCREF(events);
2763 Py_XDECREF(target->events);
2764 target->events = events;
2765
2766 /* clear out existing events */
2767 Py_XDECREF(target->start_event_obj); target->start_event_obj = NULL;
2768 Py_XDECREF(target->end_event_obj); target->end_event_obj = NULL;
2769 Py_XDECREF(target->start_ns_event_obj); target->start_ns_event_obj = NULL;
2770 Py_XDECREF(target->end_ns_event_obj); target->end_ns_event_obj = NULL;
2771
2772 if (event_set == Py_None) {
2773 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002774 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002775 Py_RETURN_NONE;
2776 }
2777
2778 if (!PyTuple_Check(event_set)) /* FIXME: handle arbitrary sequences */
2779 goto error;
2780
2781 for (i = 0; i < PyTuple_GET_SIZE(event_set); i++) {
2782 PyObject* item = PyTuple_GET_ITEM(event_set, i);
2783 char* event;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002784 if (PyUnicode_Check(item)) {
2785 event = _PyUnicode_AsString(item);
Victor Stinner0477bf32010-03-22 12:11:44 +00002786 if (event == NULL)
2787 goto error;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002788 } else if (PyBytes_Check(item))
2789 event = PyBytes_AS_STRING(item);
2790 else {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002791 goto error;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002792 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002793 if (strcmp(event, "start") == 0) {
2794 Py_INCREF(item);
2795 target->start_event_obj = item;
2796 } else if (strcmp(event, "end") == 0) {
2797 Py_INCREF(item);
2798 Py_XDECREF(target->end_event_obj);
2799 target->end_event_obj = item;
2800 } else if (strcmp(event, "start-ns") == 0) {
2801 Py_INCREF(item);
2802 Py_XDECREF(target->start_ns_event_obj);
2803 target->start_ns_event_obj = item;
2804 EXPAT(SetNamespaceDeclHandler)(
2805 self->parser,
2806 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2807 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2808 );
2809 } else if (strcmp(event, "end-ns") == 0) {
2810 Py_INCREF(item);
2811 Py_XDECREF(target->end_ns_event_obj);
2812 target->end_ns_event_obj = item;
2813 EXPAT(SetNamespaceDeclHandler)(
2814 self->parser,
2815 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2816 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2817 );
2818 } else {
2819 PyErr_Format(
2820 PyExc_ValueError,
2821 "unknown event '%s'", event
2822 );
2823 return NULL;
2824 }
2825 }
2826
2827 Py_RETURN_NONE;
2828
2829 error:
2830 PyErr_SetString(
2831 PyExc_TypeError,
2832 "invalid event tuple"
2833 );
2834 return NULL;
2835}
2836
2837static PyMethodDef xmlparser_methods[] = {
2838 {"feed", (PyCFunction) xmlparser_feed, METH_VARARGS},
2839 {"close", (PyCFunction) xmlparser_close, METH_VARARGS},
2840 {"_parse", (PyCFunction) xmlparser_parse, METH_VARARGS},
2841 {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
2842 {NULL, NULL}
2843};
2844
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002845static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002846xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002847{
Alexander Belopolskye239d232010-12-08 23:31:48 +00002848 if (PyUnicode_Check(nameobj)) {
2849 PyObject* res;
2850 if (PyUnicode_CompareWithASCIIString(nameobj, "entity") == 0)
2851 res = self->entity;
2852 else if (PyUnicode_CompareWithASCIIString(nameobj, "target") == 0)
2853 res = self->target;
2854 else if (PyUnicode_CompareWithASCIIString(nameobj, "version") == 0) {
2855 return PyUnicode_FromFormat(
2856 "Expat %d.%d.%d", XML_MAJOR_VERSION,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002857 XML_MINOR_VERSION, XML_MICRO_VERSION);
Alexander Belopolskye239d232010-12-08 23:31:48 +00002858 }
2859 else
2860 goto generic;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002861
Alexander Belopolskye239d232010-12-08 23:31:48 +00002862 Py_INCREF(res);
2863 return res;
2864 }
2865 generic:
2866 return PyObject_GenericGetAttr((PyObject*) self, nameobj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002867}
2868
Neal Norwitz227b5332006-03-22 09:28:35 +00002869static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002870 PyVarObject_HEAD_INIT(NULL, 0)
2871 "XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002872 /* methods */
2873 (destructor)xmlparser_dealloc, /* tp_dealloc */
2874 0, /* tp_print */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002875 0, /* tp_getattr */
2876 0, /* tp_setattr */
Mark Dickinsone94c6792009-02-02 20:36:42 +00002877 0, /* tp_reserved */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002878 0, /* tp_repr */
2879 0, /* tp_as_number */
2880 0, /* tp_as_sequence */
2881 0, /* tp_as_mapping */
2882 0, /* tp_hash */
2883 0, /* tp_call */
2884 0, /* tp_str */
2885 (getattrofunc)xmlparser_getattro, /* tp_getattro */
2886 0, /* tp_setattro */
2887 0, /* tp_as_buffer */
2888 Py_TPFLAGS_DEFAULT, /* tp_flags */
2889 0, /* tp_doc */
2890 0, /* tp_traverse */
2891 0, /* tp_clear */
2892 0, /* tp_richcompare */
2893 0, /* tp_weaklistoffset */
2894 0, /* tp_iter */
2895 0, /* tp_iternext */
2896 xmlparser_methods, /* tp_methods */
2897 0, /* tp_members */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002898};
2899
2900#endif
2901
2902/* ==================================================================== */
2903/* python module interface */
2904
2905static PyMethodDef _functions[] = {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002906 {"SubElement", (PyCFunction) subelement, METH_VARARGS|METH_KEYWORDS},
2907 {"TreeBuilder", (PyCFunction) treebuilder, METH_VARARGS},
2908#if defined(USE_EXPAT)
2909 {"XMLParser", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002910#endif
2911 {NULL, NULL}
2912};
2913
Martin v. Löwis1a214512008-06-11 05:26:20 +00002914
2915static struct PyModuleDef _elementtreemodule = {
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00002916 PyModuleDef_HEAD_INIT,
2917 "_elementtree",
2918 NULL,
2919 -1,
2920 _functions,
2921 NULL,
2922 NULL,
2923 NULL,
2924 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00002925};
2926
Neal Norwitzf6657e62006-12-28 04:47:50 +00002927PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00002928PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002929{
2930 PyObject* m;
2931 PyObject* g;
2932 char* bootstrap;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002933
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002934 /* Initialize object types */
2935 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00002936 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002937 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00002938 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002939#if defined(USE_EXPAT)
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002940 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00002941 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002942#endif
2943
Martin v. Löwis1a214512008-06-11 05:26:20 +00002944 m = PyModule_Create(&_elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002945 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00002946 return NULL;
2947
2948 /* The code below requires that the module gets already added
2949 to sys.modules. */
2950 PyDict_SetItemString(PyImport_GetModuleDict(),
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00002951 _elementtreemodule.m_name,
2952 m);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002953
2954 /* python glue code */
2955
2956 g = PyDict_New();
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002957 if (!g)
Martin v. Löwis1a214512008-06-11 05:26:20 +00002958 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002959
2960 PyDict_SetItemString(g, "__builtins__", PyEval_GetBuiltins());
2961
2962 bootstrap = (
2963
Florent Xiclunaf4bdf4e2012-02-11 11:28:16 +01002964 "from copy import deepcopy\n"
2965 "from xml.etree import ElementPath\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002966
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002967 "def iter(node, tag=None):\n" /* helper */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002968 " if tag == '*':\n"
2969 " tag = None\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002970 " if tag is None or node.tag == tag:\n"
2971 " yield node\n"
2972 " for node in node:\n"
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002973 " for node in iter(node, tag):\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002974 " yield node\n"
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002975
2976 "def itertext(node):\n" /* helper */
2977 " if node.text:\n"
2978 " yield node.text\n"
2979 " for e in node:\n"
2980 " for s in e.itertext():\n"
2981 " yield s\n"
2982 " if e.tail:\n"
2983 " yield e.tail\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002984
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002985 );
2986
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002987 if (!PyRun_String(bootstrap, Py_file_input, g, NULL))
2988 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002989
2990 elementpath_obj = PyDict_GetItemString(g, "ElementPath");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002991 elementtree_deepcopy_obj = PyDict_GetItemString(g, "deepcopy");
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002992 elementtree_iter_obj = PyDict_GetItemString(g, "iter");
2993 elementtree_itertext_obj = PyDict_GetItemString(g, "itertext");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002994
2995#if defined(USE_PYEXPAT_CAPI)
2996 /* link against pyexpat, if possible */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002997 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
2998 if (expat_capi) {
2999 /* check that it's usable */
3000 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
3001 expat_capi->size < sizeof(struct PyExpat_CAPI) ||
3002 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
3003 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
3004 expat_capi->MICRO_VERSION != XML_MICRO_VERSION)
3005 expat_capi = NULL;
3006 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003007#endif
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003008
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003009 elementtree_parseerror_obj = PyErr_NewException(
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003010 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003011 );
3012 Py_INCREF(elementtree_parseerror_obj);
3013 PyModule_AddObject(m, "ParseError", elementtree_parseerror_obj);
3014
Eli Bendersky092af1f2012-03-04 07:14:03 +02003015 Py_INCREF((PyObject *)&Element_Type);
3016 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
3017
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003018 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003019}