blob: a50a3e7a67b993e7828a64324380463cede9e262 [file] [log] [blame]
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001/*
2 * ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003 * $Id: _elementtree.c 3473 2009-01-11 22:53:55Z fredrik $
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
5 * elementtree accelerator
6 *
7 * History:
8 * 1999-06-20 fl created (as part of sgmlop)
9 * 2001-05-29 fl effdom edition
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000010 * 2003-02-27 fl elementtree edition (alpha)
11 * 2004-06-03 fl updates for elementtree 1.2
Florent Xiclunaf15351d2010-03-13 23:24:31 +000012 * 2005-01-05 fl major optimization effort
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000013 * 2005-01-11 fl first public release (cElementTree 0.8)
14 * 2005-01-12 fl split element object into base and extras
15 * 2005-01-13 fl use tagged pointers for tail/text (cElementTree 0.9)
16 * 2005-01-17 fl added treebuilder close method
17 * 2005-01-17 fl fixed crash in getchildren
18 * 2005-01-18 fl removed observer api, added iterparse (cElementTree 0.9.3)
19 * 2005-01-23 fl revised iterparse api; added namespace event support (0.9.8)
20 * 2005-01-26 fl added VERSION module property (cElementTree 1.0)
21 * 2005-01-28 fl added remove method (1.0.1)
22 * 2005-03-01 fl added iselement function; fixed makeelement aliasing (1.0.2)
23 * 2005-03-13 fl export Comment and ProcessingInstruction/PI helpers
24 * 2005-03-26 fl added Comment and PI support to XMLParser
25 * 2005-03-27 fl event optimizations; complain about bogus events
26 * 2005-08-08 fl fixed read error handling in parse
27 * 2005-08-11 fl added runtime test for copy workaround (1.0.3)
28 * 2005-12-13 fl added expat_capi support (for xml.etree) (1.0.4)
29 * 2005-12-16 fl added support for non-standard encodings
Fredrik Lundh44ed4db2006-03-12 21:06:35 +000030 * 2006-03-08 fl fixed a couple of potential null-refs and leaks
31 * 2006-03-12 fl merge in 2.5 ssize_t changes
Florent Xiclunaf15351d2010-03-13 23:24:31 +000032 * 2007-08-25 fl call custom builder's close method from XMLParser
33 * 2007-08-31 fl added iter, extend from ET 1.3
34 * 2007-09-01 fl fixed ParseError exception, setslice source type, etc
35 * 2007-09-03 fl fixed handling of negative insert indexes
36 * 2007-09-04 fl added itertext from ET 1.3
37 * 2007-09-06 fl added position attribute to ParseError exception
38 * 2008-06-06 fl delay error reporting in iterparse (from Hrvoje Niksic)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000039 *
Florent Xiclunaf15351d2010-03-13 23:24:31 +000040 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
41 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000042 *
43 * info@pythonware.com
44 * http://www.pythonware.com
45 */
46
Fredrik Lundh6d52b552005-12-16 22:06:43 +000047/* Licensed to PSF under a Contributor Agreement. */
Florent Xiclunaf15351d2010-03-13 23:24:31 +000048/* See http://www.python.org/psf/license for licensing details. */
Fredrik Lundh6d52b552005-12-16 22:06:43 +000049
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000050#include "Python.h"
51
Thomas Wouters00ee7ba2006-08-21 19:07:27 +000052#define VERSION "1.0.6"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000053
54/* -------------------------------------------------------------------- */
55/* configuration */
56
57/* Leave defined to include the expat-based XMLParser type */
58#define USE_EXPAT
59
Florent Xiclunaf15351d2010-03-13 23:24:31 +000060/* Define to do all expat calls via pyexpat's embedded expat library */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000061/* #define USE_PYEXPAT_CAPI */
62
63/* An element can hold this many children without extra memory
64 allocations. */
65#define STATIC_CHILDREN 4
66
67/* For best performance, chose a value so that 80-90% of all nodes
68 have no more than the given number of children. Set this to zero
69 to minimize the size of the element structure itself (this only
70 helps if you have lots of leaf nodes with attributes). */
71
72/* Also note that pymalloc always allocates blocks in multiples of
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010073 eight bytes. For the current C version of ElementTree, this means
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000074 that the number of children should be an even number, at least on
75 32-bit platforms. */
76
77/* -------------------------------------------------------------------- */
78
79#if 0
80static int memory = 0;
81#define ALLOC(size, comment)\
82do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
83#define RELEASE(size, comment)\
84do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
85#else
86#define ALLOC(size, comment)
87#define RELEASE(size, comment)
88#endif
89
90/* compiler tweaks */
91#if defined(_MSC_VER)
92#define LOCAL(type) static __inline type __fastcall
93#else
94#define LOCAL(type) static type
95#endif
96
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000097/* macros used to store 'join' flags in string object pointers. note
98 that all use of text and tail as object pointers must be wrapped in
99 JOIN_OBJ. see comments in the ElementObject definition for more
100 info. */
101#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
102#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
103#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~1))
104
105/* glue functions (see the init function for details) */
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000106static PyObject* elementtree_parseerror_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000107static PyObject* elementtree_deepcopy_obj;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000108static PyObject* elementtree_iter_obj;
109static PyObject* elementtree_itertext_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000110static PyObject* elementpath_obj;
111
112/* helpers */
113
114LOCAL(PyObject*)
115deepcopy(PyObject* object, PyObject* memo)
116{
117 /* do a deep copy of the given object */
118
119 PyObject* args;
120 PyObject* result;
121
122 if (!elementtree_deepcopy_obj) {
123 PyErr_SetString(
124 PyExc_RuntimeError,
125 "deepcopy helper not found"
126 );
127 return NULL;
128 }
129
130 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000131 if (!args)
132 return NULL;
133
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000134 Py_INCREF(object); PyTuple_SET_ITEM(args, 0, (PyObject*) object);
135 Py_INCREF(memo); PyTuple_SET_ITEM(args, 1, (PyObject*) memo);
136
137 result = PyObject_CallObject(elementtree_deepcopy_obj, args);
138
139 Py_DECREF(args);
140
141 return result;
142}
143
144LOCAL(PyObject*)
145list_join(PyObject* list)
146{
147 /* join list elements (destroying the list in the process) */
148
149 PyObject* joiner;
150 PyObject* function;
151 PyObject* args;
152 PyObject* result;
153
154 switch (PyList_GET_SIZE(list)) {
155 case 0:
156 Py_DECREF(list);
Christian Heimes72b710a2008-05-26 13:28:38 +0000157 return PyBytes_FromString("");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000158 case 1:
159 result = PyList_GET_ITEM(list, 0);
160 Py_INCREF(result);
161 Py_DECREF(list);
162 return result;
163 }
164
165 /* two or more elements: slice out a suitable separator from the
166 first member, and use that to join the entire list */
167
168 joiner = PySequence_GetSlice(PyList_GET_ITEM(list, 0), 0, 0);
169 if (!joiner)
170 return NULL;
171
172 function = PyObject_GetAttrString(joiner, "join");
173 if (!function) {
174 Py_DECREF(joiner);
175 return NULL;
176 }
177
178 args = PyTuple_New(1);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000179 if (!args)
180 return NULL;
181
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000182 PyTuple_SET_ITEM(args, 0, list);
183
184 result = PyObject_CallObject(function, args);
185
186 Py_DECREF(args); /* also removes list */
187 Py_DECREF(function);
188 Py_DECREF(joiner);
189
190 return result;
191}
192
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000193/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200194/* the Element type */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000195
196typedef struct {
197
198 /* attributes (a dictionary object), or None if no attributes */
199 PyObject* attrib;
200
201 /* child elements */
202 int length; /* actual number of items */
203 int allocated; /* allocated items */
204
205 /* this either points to _children or to a malloced buffer */
206 PyObject* *children;
207
208 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100209
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000210} ElementObjectExtra;
211
212typedef struct {
213 PyObject_HEAD
214
215 /* element tag (a string). */
216 PyObject* tag;
217
218 /* text before first child. note that this is a tagged pointer;
219 use JOIN_OBJ to get the object pointer. the join flag is used
220 to distinguish lists created by the tree builder from lists
221 assigned to the attribute by application code; the former
222 should be joined before being returned to the user, the latter
223 should be left intact. */
224 PyObject* text;
225
226 /* text after this element, in parent. note that this is a tagged
227 pointer; use JOIN_OBJ to get the object pointer. */
228 PyObject* tail;
229
230 ElementObjectExtra* extra;
231
232} ElementObject;
233
Neal Norwitz227b5332006-03-22 09:28:35 +0000234static PyTypeObject Element_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000235
Christian Heimes90aa7642007-12-19 02:45:37 +0000236#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000237
238/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200239/* Element constructors and destructor */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000240
241LOCAL(int)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200242create_extra(ElementObject* self, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000243{
244 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
245 if (!self->extra)
246 return -1;
247
248 if (!attrib)
249 attrib = Py_None;
250
251 Py_INCREF(attrib);
252 self->extra->attrib = attrib;
253
254 self->extra->length = 0;
255 self->extra->allocated = STATIC_CHILDREN;
256 self->extra->children = self->extra->_children;
257
258 return 0;
259}
260
261LOCAL(void)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200262dealloc_extra(ElementObject* self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000263{
264 int i;
265
266 Py_DECREF(self->extra->attrib);
267
268 for (i = 0; i < self->extra->length; i++)
269 Py_DECREF(self->extra->children[i]);
270
271 if (self->extra->children != self->extra->_children)
272 PyObject_Free(self->extra->children);
273
274 PyObject_Free(self->extra);
275}
276
Eli Bendersky092af1f2012-03-04 07:14:03 +0200277/* Convenience internal function to create new Element objects with the given
278 * tag and attributes.
279*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000280LOCAL(PyObject*)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200281create_new_element(PyObject* tag, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000282{
283 ElementObject* self;
284
285 self = PyObject_New(ElementObject, &Element_Type);
286 if (self == NULL)
287 return NULL;
288
289 /* use None for empty dictionaries */
290 if (PyDict_CheckExact(attrib) && !PyDict_Size(attrib))
291 attrib = Py_None;
292
293 self->extra = NULL;
294
295 if (attrib != Py_None) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200296 if (create_extra(self, attrib) < 0) {
Thomas Wouters477c8d52006-05-27 19:21:47 +0000297 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000298 return NULL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000299 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000300 }
301
302 Py_INCREF(tag);
303 self->tag = tag;
304
305 Py_INCREF(Py_None);
306 self->text = Py_None;
307
308 Py_INCREF(Py_None);
309 self->tail = Py_None;
310
311 ALLOC(sizeof(ElementObject), "create element");
312
313 return (PyObject*) self;
314}
315
Eli Bendersky092af1f2012-03-04 07:14:03 +0200316static PyObject *
317element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
318{
319 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
320 if (e != NULL) {
321 Py_INCREF(Py_None);
322 e->tag = Py_None;
323
324 Py_INCREF(Py_None);
325 e->text = Py_None;
326
327 Py_INCREF(Py_None);
328 e->tail = Py_None;
329
330 e->extra = NULL;
331 }
332 return (PyObject *)e;
333}
334
335static int
336element_init(PyObject *self, PyObject *args, PyObject *kwds)
337{
338 PyObject *tag;
339 PyObject *tmp;
340 PyObject *attrib = NULL;
341 ElementObject *self_elem;
342
343 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
344 return -1;
345
346 if (attrib || kwds) {
347 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
348 if (!attrib)
349 return -1;
350 if (kwds)
351 PyDict_Update(attrib, kwds);
352 } else {
353 Py_INCREF(Py_None);
354 attrib = Py_None;
355 }
356
357 self_elem = (ElementObject *)self;
358
359 /* Use None for empty dictionaries */
360 if (PyDict_CheckExact(attrib) && PyDict_Size(attrib) == 0) {
361 Py_INCREF(Py_None);
362 attrib = Py_None;
363 }
364
365 if (attrib != Py_None) {
366 if (create_extra(self_elem, attrib) < 0) {
367 PyObject_Del(self_elem);
368 return -1;
369 }
370 }
371
372 /* If create_extra needed attrib, it took a reference to it, so we can
373 * release ours anyway.
374 */
375 Py_DECREF(attrib);
376
377 /* Replace the objects already pointed to by tag, text and tail. */
378 tmp = self_elem->tag;
379 self_elem->tag = tag;
380 Py_INCREF(tag);
381 Py_DECREF(tmp);
382
383 tmp = self_elem->text;
384 self_elem->text = Py_None;
385 Py_INCREF(Py_None);
386 Py_DECREF(JOIN_OBJ(tmp));
387
388 tmp = self_elem->tail;
389 self_elem->tail = Py_None;
390 Py_INCREF(Py_None);
391 Py_DECREF(JOIN_OBJ(tmp));
392
393 return 0;
394}
395
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000396LOCAL(int)
397element_resize(ElementObject* self, int extra)
398{
399 int size;
400 PyObject* *children;
401
402 /* make sure self->children can hold the given number of extra
403 elements. set an exception and return -1 if allocation failed */
404
405 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200406 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000407
408 size = self->extra->length + extra;
409
410 if (size > self->extra->allocated) {
411 /* use Python 2.4's list growth strategy */
412 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000413 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100414 * which needs at least 4 bytes.
415 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000416 * be safe.
417 */
418 size = size ? size : 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000419 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000420 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100421 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000422 * false alarm always assume at least one child to be safe.
423 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000424 children = PyObject_Realloc(self->extra->children,
425 size * sizeof(PyObject*));
426 if (!children)
427 goto nomemory;
428 } else {
429 children = PyObject_Malloc(size * sizeof(PyObject*));
430 if (!children)
431 goto nomemory;
432 /* copy existing children from static area to malloc buffer */
433 memcpy(children, self->extra->children,
434 self->extra->length * sizeof(PyObject*));
435 }
436 self->extra->children = children;
437 self->extra->allocated = size;
438 }
439
440 return 0;
441
442 nomemory:
443 PyErr_NoMemory();
444 return -1;
445}
446
447LOCAL(int)
448element_add_subelement(ElementObject* self, PyObject* element)
449{
450 /* add a child element to a parent */
451
452 if (element_resize(self, 1) < 0)
453 return -1;
454
455 Py_INCREF(element);
456 self->extra->children[self->extra->length] = element;
457
458 self->extra->length++;
459
460 return 0;
461}
462
463LOCAL(PyObject*)
464element_get_attrib(ElementObject* self)
465{
466 /* return borrowed reference to attrib dictionary */
467 /* note: this function assumes that the extra section exists */
468
469 PyObject* res = self->extra->attrib;
470
471 if (res == Py_None) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000472 Py_DECREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000473 /* create missing dictionary */
474 res = PyDict_New();
475 if (!res)
476 return NULL;
477 self->extra->attrib = res;
478 }
479
480 return res;
481}
482
483LOCAL(PyObject*)
484element_get_text(ElementObject* self)
485{
486 /* return borrowed reference to text attribute */
487
488 PyObject* res = self->text;
489
490 if (JOIN_GET(res)) {
491 res = JOIN_OBJ(res);
492 if (PyList_CheckExact(res)) {
493 res = list_join(res);
494 if (!res)
495 return NULL;
496 self->text = res;
497 }
498 }
499
500 return res;
501}
502
503LOCAL(PyObject*)
504element_get_tail(ElementObject* self)
505{
506 /* return borrowed reference to text attribute */
507
508 PyObject* res = self->tail;
509
510 if (JOIN_GET(res)) {
511 res = JOIN_OBJ(res);
512 if (PyList_CheckExact(res)) {
513 res = list_join(res);
514 if (!res)
515 return NULL;
516 self->tail = res;
517 }
518 }
519
520 return res;
521}
522
523static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000524subelement(PyObject* self, PyObject* args, PyObject* kw)
525{
526 PyObject* elem;
527
528 ElementObject* parent;
529 PyObject* tag;
530 PyObject* attrib = NULL;
531 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
532 &Element_Type, &parent, &tag,
533 &PyDict_Type, &attrib))
534 return NULL;
535
536 if (attrib || kw) {
537 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
538 if (!attrib)
539 return NULL;
540 if (kw)
541 PyDict_Update(attrib, kw);
542 } else {
543 Py_INCREF(Py_None);
544 attrib = Py_None;
545 }
546
Eli Bendersky092af1f2012-03-04 07:14:03 +0200547 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000548
549 Py_DECREF(attrib);
550
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000551 if (element_add_subelement(parent, elem) < 0) {
552 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000553 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000554 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000555
556 return elem;
557}
558
559static void
560element_dealloc(ElementObject* self)
561{
562 if (self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200563 dealloc_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000564
565 /* discard attributes */
566 Py_DECREF(self->tag);
567 Py_DECREF(JOIN_OBJ(self->text));
568 Py_DECREF(JOIN_OBJ(self->tail));
569
570 RELEASE(sizeof(ElementObject), "destroy element");
571
Eli Bendersky092af1f2012-03-04 07:14:03 +0200572 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000573}
574
575/* -------------------------------------------------------------------- */
576/* methods (in alphabetical order) */
577
578static PyObject*
579element_append(ElementObject* self, PyObject* args)
580{
581 PyObject* element;
582 if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element))
583 return NULL;
584
585 if (element_add_subelement(self, element) < 0)
586 return NULL;
587
588 Py_RETURN_NONE;
589}
590
591static PyObject*
592element_clear(ElementObject* self, PyObject* args)
593{
594 if (!PyArg_ParseTuple(args, ":clear"))
595 return NULL;
596
597 if (self->extra) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200598 dealloc_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000599 self->extra = NULL;
600 }
601
602 Py_INCREF(Py_None);
603 Py_DECREF(JOIN_OBJ(self->text));
604 self->text = Py_None;
605
606 Py_INCREF(Py_None);
607 Py_DECREF(JOIN_OBJ(self->tail));
608 self->tail = Py_None;
609
610 Py_RETURN_NONE;
611}
612
613static PyObject*
614element_copy(ElementObject* self, PyObject* args)
615{
616 int i;
617 ElementObject* element;
618
619 if (!PyArg_ParseTuple(args, ":__copy__"))
620 return NULL;
621
Eli Bendersky092af1f2012-03-04 07:14:03 +0200622 element = (ElementObject*) create_new_element(
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000623 self->tag, (self->extra) ? self->extra->attrib : Py_None
624 );
625 if (!element)
626 return NULL;
627
628 Py_DECREF(JOIN_OBJ(element->text));
629 element->text = self->text;
630 Py_INCREF(JOIN_OBJ(element->text));
631
632 Py_DECREF(JOIN_OBJ(element->tail));
633 element->tail = self->tail;
634 Py_INCREF(JOIN_OBJ(element->tail));
635
636 if (self->extra) {
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100637
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000638 if (element_resize(element, self->extra->length) < 0) {
639 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000640 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000641 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000642
643 for (i = 0; i < self->extra->length; i++) {
644 Py_INCREF(self->extra->children[i]);
645 element->extra->children[i] = self->extra->children[i];
646 }
647
648 element->extra->length = self->extra->length;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100649
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000650 }
651
652 return (PyObject*) element;
653}
654
655static PyObject*
656element_deepcopy(ElementObject* self, PyObject* args)
657{
658 int i;
659 ElementObject* element;
660 PyObject* tag;
661 PyObject* attrib;
662 PyObject* text;
663 PyObject* tail;
664 PyObject* id;
665
666 PyObject* memo;
667 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
668 return NULL;
669
670 tag = deepcopy(self->tag, memo);
671 if (!tag)
672 return NULL;
673
674 if (self->extra) {
675 attrib = deepcopy(self->extra->attrib, memo);
676 if (!attrib) {
677 Py_DECREF(tag);
678 return NULL;
679 }
680 } else {
681 Py_INCREF(Py_None);
682 attrib = Py_None;
683 }
684
Eli Bendersky092af1f2012-03-04 07:14:03 +0200685 element = (ElementObject*) create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000686
687 Py_DECREF(tag);
688 Py_DECREF(attrib);
689
690 if (!element)
691 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100692
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000693 text = deepcopy(JOIN_OBJ(self->text), memo);
694 if (!text)
695 goto error;
696 Py_DECREF(element->text);
697 element->text = JOIN_SET(text, JOIN_GET(self->text));
698
699 tail = deepcopy(JOIN_OBJ(self->tail), memo);
700 if (!tail)
701 goto error;
702 Py_DECREF(element->tail);
703 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
704
705 if (self->extra) {
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100706
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000707 if (element_resize(element, self->extra->length) < 0)
708 goto error;
709
710 for (i = 0; i < self->extra->length; i++) {
711 PyObject* child = deepcopy(self->extra->children[i], memo);
712 if (!child) {
713 element->extra->length = i;
714 goto error;
715 }
716 element->extra->children[i] = child;
717 }
718
719 element->extra->length = self->extra->length;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100720
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000721 }
722
723 /* add object to memo dictionary (so deepcopy won't visit it again) */
Christian Heimes217cfd12007-12-02 14:31:20 +0000724 id = PyLong_FromLong((Py_uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000725 if (!id)
726 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000727
728 i = PyDict_SetItem(memo, id, (PyObject*) element);
729
730 Py_DECREF(id);
731
732 if (i < 0)
733 goto error;
734
735 return (PyObject*) element;
736
737 error:
738 Py_DECREF(element);
739 return NULL;
740}
741
742LOCAL(int)
743checkpath(PyObject* tag)
744{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000745 Py_ssize_t i;
746 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000747
748 /* check if a tag contains an xpath character */
749
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000750#define PATHCHAR(ch) \
751 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000752
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000753 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200754 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
755 void *data = PyUnicode_DATA(tag);
756 unsigned int kind = PyUnicode_KIND(tag);
757 for (i = 0; i < len; i++) {
758 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
759 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000760 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200761 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000762 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200763 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000764 return 1;
765 }
766 return 0;
767 }
Christian Heimes72b710a2008-05-26 13:28:38 +0000768 if (PyBytes_Check(tag)) {
769 char *p = PyBytes_AS_STRING(tag);
770 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000771 if (p[i] == '{')
772 check = 0;
773 else if (p[i] == '}')
774 check = 1;
775 else if (check && PATHCHAR(p[i]))
776 return 1;
777 }
778 return 0;
779 }
780
781 return 1; /* unknown type; might be path expression */
782}
783
784static PyObject*
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000785element_extend(ElementObject* self, PyObject* args)
786{
787 PyObject* seq;
788 Py_ssize_t i, seqlen = 0;
789
790 PyObject* seq_in;
791 if (!PyArg_ParseTuple(args, "O:extend", &seq_in))
792 return NULL;
793
794 seq = PySequence_Fast(seq_in, "");
795 if (!seq) {
796 PyErr_Format(
797 PyExc_TypeError,
798 "expected sequence, not \"%.200s\"", Py_TYPE(seq_in)->tp_name
799 );
800 return NULL;
801 }
802
803 seqlen = PySequence_Size(seq);
804 for (i = 0; i < seqlen; i++) {
805 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
806 if (element_add_subelement(self, element) < 0) {
807 Py_DECREF(seq);
808 return NULL;
809 }
810 }
811
812 Py_DECREF(seq);
813
814 Py_RETURN_NONE;
815}
816
817static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000818element_find(ElementObject* self, PyObject* args)
819{
820 int i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000821 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000822 PyObject* namespaces = Py_None;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200823
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000824 if (!PyArg_ParseTuple(args, "O|O:find", &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000825 return NULL;
826
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200827 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200828 _Py_IDENTIFIER(find);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200829 return _PyObject_CallMethodId(
830 elementpath_obj, &PyId_find, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000831 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200832 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000833
834 if (!self->extra)
835 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100836
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000837 for (i = 0; i < self->extra->length; i++) {
838 PyObject* item = self->extra->children[i];
839 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +0000840 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000841 Py_INCREF(item);
842 return item;
843 }
844 }
845
846 Py_RETURN_NONE;
847}
848
849static PyObject*
850element_findtext(ElementObject* self, PyObject* args)
851{
852 int i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000853 PyObject* tag;
854 PyObject* default_value = Py_None;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000855 PyObject* namespaces = Py_None;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200856 _Py_IDENTIFIER(findtext);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200857
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000858 if (!PyArg_ParseTuple(args, "O|OO:findtext", &tag, &default_value, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000859 return NULL;
860
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000861 if (checkpath(tag) || namespaces != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200862 return _PyObject_CallMethodId(
863 elementpath_obj, &PyId_findtext, "OOOO", self, tag, default_value, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000864 );
865
866 if (!self->extra) {
867 Py_INCREF(default_value);
868 return default_value;
869 }
870
871 for (i = 0; i < self->extra->length; i++) {
872 ElementObject* item = (ElementObject*) self->extra->children[i];
Mark Dickinson211c6252009-02-01 10:28:51 +0000873 if (Element_CheckExact(item) && (PyObject_RichCompareBool(item->tag, tag, Py_EQ) == 1)) {
874
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000875 PyObject* text = element_get_text(item);
876 if (text == Py_None)
Christian Heimes72b710a2008-05-26 13:28:38 +0000877 return PyBytes_FromString("");
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000878 Py_XINCREF(text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000879 return text;
880 }
881 }
882
883 Py_INCREF(default_value);
884 return default_value;
885}
886
887static PyObject*
888element_findall(ElementObject* self, PyObject* args)
889{
890 int i;
891 PyObject* out;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000892 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000893 PyObject* namespaces = Py_None;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200894
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000895 if (!PyArg_ParseTuple(args, "O|O:findall", &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000896 return NULL;
897
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200898 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200899 _Py_IDENTIFIER(findall);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200900 return _PyObject_CallMethodId(
901 elementpath_obj, &PyId_findall, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000902 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200903 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000904
905 out = PyList_New(0);
906 if (!out)
907 return NULL;
908
909 if (!self->extra)
910 return out;
911
912 for (i = 0; i < self->extra->length; i++) {
913 PyObject* item = self->extra->children[i];
914 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +0000915 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000916 if (PyList_Append(out, item) < 0) {
917 Py_DECREF(out);
918 return NULL;
919 }
920 }
921 }
922
923 return out;
924}
925
926static PyObject*
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000927element_iterfind(ElementObject* self, PyObject* args)
928{
929 PyObject* tag;
930 PyObject* namespaces = Py_None;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200931 _Py_IDENTIFIER(iterfind);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200932
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000933 if (!PyArg_ParseTuple(args, "O|O:iterfind", &tag, &namespaces))
934 return NULL;
935
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200936 return _PyObject_CallMethodId(
937 elementpath_obj, &PyId_iterfind, "OOO", self, tag, namespaces
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000938 );
939}
940
941static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000942element_get(ElementObject* self, PyObject* args)
943{
944 PyObject* value;
945
946 PyObject* key;
947 PyObject* default_value = Py_None;
948 if (!PyArg_ParseTuple(args, "O|O:get", &key, &default_value))
949 return NULL;
950
951 if (!self->extra || self->extra->attrib == Py_None)
952 value = default_value;
953 else {
954 value = PyDict_GetItem(self->extra->attrib, key);
955 if (!value)
956 value = default_value;
957 }
958
959 Py_INCREF(value);
960 return value;
961}
962
963static PyObject*
964element_getchildren(ElementObject* self, PyObject* args)
965{
966 int i;
967 PyObject* list;
968
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000969 /* FIXME: report as deprecated? */
970
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000971 if (!PyArg_ParseTuple(args, ":getchildren"))
972 return NULL;
973
974 if (!self->extra)
975 return PyList_New(0);
976
977 list = PyList_New(self->extra->length);
978 if (!list)
979 return NULL;
980
981 for (i = 0; i < self->extra->length; i++) {
982 PyObject* item = self->extra->children[i];
983 Py_INCREF(item);
984 PyList_SET_ITEM(list, i, item);
985 }
986
987 return list;
988}
989
990static PyObject*
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000991element_iter(ElementObject* self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000992{
993 PyObject* result;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100994
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000995 PyObject* tag = Py_None;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000996 if (!PyArg_ParseTuple(args, "|O:iter", &tag))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000997 return NULL;
998
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000999 if (!elementtree_iter_obj) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001000 PyErr_SetString(
1001 PyExc_RuntimeError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001002 "iter helper not found"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001003 );
1004 return NULL;
1005 }
1006
1007 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001008 if (!args)
1009 return NULL;
Neal Norwitz02876df2006-02-07 06:58:52 +00001010
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001011 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
1012 Py_INCREF(tag); PyTuple_SET_ITEM(args, 1, (PyObject*) tag);
1013
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001014 result = PyObject_CallObject(elementtree_iter_obj, args);
1015
1016 Py_DECREF(args);
1017
1018 return result;
1019}
1020
1021
1022static PyObject*
1023element_itertext(ElementObject* self, PyObject* args)
1024{
1025 PyObject* result;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001026
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001027 if (!PyArg_ParseTuple(args, ":itertext"))
1028 return NULL;
1029
1030 if (!elementtree_itertext_obj) {
1031 PyErr_SetString(
1032 PyExc_RuntimeError,
1033 "itertext helper not found"
1034 );
1035 return NULL;
1036 }
1037
1038 args = PyTuple_New(1);
1039 if (!args)
1040 return NULL;
1041
1042 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
1043
1044 result = PyObject_CallObject(elementtree_itertext_obj, args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001045
1046 Py_DECREF(args);
1047
1048 return result;
1049}
1050
1051static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001052element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001053{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001054 ElementObject* self = (ElementObject*) self_;
1055
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001056 if (!self->extra || index < 0 || index >= self->extra->length) {
1057 PyErr_SetString(
1058 PyExc_IndexError,
1059 "child index out of range"
1060 );
1061 return NULL;
1062 }
1063
1064 Py_INCREF(self->extra->children[index]);
1065 return self->extra->children[index];
1066}
1067
1068static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001069element_insert(ElementObject* self, PyObject* args)
1070{
1071 int i;
1072
1073 int index;
1074 PyObject* element;
1075 if (!PyArg_ParseTuple(args, "iO!:insert", &index,
1076 &Element_Type, &element))
1077 return NULL;
1078
1079 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001080 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001081
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001082 if (index < 0) {
1083 index += self->extra->length;
1084 if (index < 0)
1085 index = 0;
1086 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001087 if (index > self->extra->length)
1088 index = self->extra->length;
1089
1090 if (element_resize(self, 1) < 0)
1091 return NULL;
1092
1093 for (i = self->extra->length; i > index; i--)
1094 self->extra->children[i] = self->extra->children[i-1];
1095
1096 Py_INCREF(element);
1097 self->extra->children[index] = element;
1098
1099 self->extra->length++;
1100
1101 Py_RETURN_NONE;
1102}
1103
1104static PyObject*
1105element_items(ElementObject* self, PyObject* args)
1106{
1107 if (!PyArg_ParseTuple(args, ":items"))
1108 return NULL;
1109
1110 if (!self->extra || self->extra->attrib == Py_None)
1111 return PyList_New(0);
1112
1113 return PyDict_Items(self->extra->attrib);
1114}
1115
1116static PyObject*
1117element_keys(ElementObject* self, PyObject* args)
1118{
1119 if (!PyArg_ParseTuple(args, ":keys"))
1120 return NULL;
1121
1122 if (!self->extra || self->extra->attrib == Py_None)
1123 return PyList_New(0);
1124
1125 return PyDict_Keys(self->extra->attrib);
1126}
1127
Martin v. Löwis18e16552006-02-15 17:27:45 +00001128static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001129element_length(ElementObject* self)
1130{
1131 if (!self->extra)
1132 return 0;
1133
1134 return self->extra->length;
1135}
1136
1137static PyObject*
1138element_makeelement(PyObject* self, PyObject* args, PyObject* kw)
1139{
1140 PyObject* elem;
1141
1142 PyObject* tag;
1143 PyObject* attrib;
1144 if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib))
1145 return NULL;
1146
1147 attrib = PyDict_Copy(attrib);
1148 if (!attrib)
1149 return NULL;
1150
Eli Bendersky092af1f2012-03-04 07:14:03 +02001151 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001152
1153 Py_DECREF(attrib);
1154
1155 return elem;
1156}
1157
1158static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001159element_remove(ElementObject* self, PyObject* args)
1160{
1161 int i;
1162
1163 PyObject* element;
1164 if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element))
1165 return NULL;
1166
1167 if (!self->extra) {
1168 /* element has no children, so raise exception */
1169 PyErr_SetString(
1170 PyExc_ValueError,
1171 "list.remove(x): x not in list"
1172 );
1173 return NULL;
1174 }
1175
1176 for (i = 0; i < self->extra->length; i++) {
1177 if (self->extra->children[i] == element)
1178 break;
Mark Dickinson211c6252009-02-01 10:28:51 +00001179 if (PyObject_RichCompareBool(self->extra->children[i], element, Py_EQ) == 1)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001180 break;
1181 }
1182
1183 if (i == self->extra->length) {
1184 /* element is not in children, so raise exception */
1185 PyErr_SetString(
1186 PyExc_ValueError,
1187 "list.remove(x): x not in list"
1188 );
1189 return NULL;
1190 }
1191
1192 Py_DECREF(self->extra->children[i]);
1193
1194 self->extra->length--;
1195
1196 for (; i < self->extra->length; i++)
1197 self->extra->children[i] = self->extra->children[i+1];
1198
1199 Py_RETURN_NONE;
1200}
1201
1202static PyObject*
1203element_repr(ElementObject* self)
1204{
Eli Bendersky092af1f2012-03-04 07:14:03 +02001205 if (self->tag)
1206 return PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1207 else
1208 return PyUnicode_FromFormat("<Element at %p>", self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001209}
1210
1211static PyObject*
1212element_set(ElementObject* self, PyObject* args)
1213{
1214 PyObject* attrib;
1215
1216 PyObject* key;
1217 PyObject* value;
1218 if (!PyArg_ParseTuple(args, "OO:set", &key, &value))
1219 return NULL;
1220
1221 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001222 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001223
1224 attrib = element_get_attrib(self);
1225 if (!attrib)
1226 return NULL;
1227
1228 if (PyDict_SetItem(attrib, key, value) < 0)
1229 return NULL;
1230
1231 Py_RETURN_NONE;
1232}
1233
1234static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001235element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001236{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001237 ElementObject* self = (ElementObject*) self_;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001238 int i;
1239 PyObject* old;
1240
1241 if (!self->extra || index < 0 || index >= self->extra->length) {
1242 PyErr_SetString(
1243 PyExc_IndexError,
1244 "child assignment index out of range");
1245 return -1;
1246 }
1247
1248 old = self->extra->children[index];
1249
1250 if (item) {
1251 Py_INCREF(item);
1252 self->extra->children[index] = item;
1253 } else {
1254 self->extra->length--;
1255 for (i = index; i < self->extra->length; i++)
1256 self->extra->children[i] = self->extra->children[i+1];
1257 }
1258
1259 Py_DECREF(old);
1260
1261 return 0;
1262}
1263
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001264static PyObject*
1265element_subscr(PyObject* self_, PyObject* item)
1266{
1267 ElementObject* self = (ElementObject*) self_;
1268
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001269 if (PyIndex_Check(item)) {
1270 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001271
1272 if (i == -1 && PyErr_Occurred()) {
1273 return NULL;
1274 }
1275 if (i < 0 && self->extra)
1276 i += self->extra->length;
1277 return element_getitem(self_, i);
1278 }
1279 else if (PySlice_Check(item)) {
1280 Py_ssize_t start, stop, step, slicelen, cur, i;
1281 PyObject* list;
1282
1283 if (!self->extra)
1284 return PyList_New(0);
1285
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001286 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001287 self->extra->length,
1288 &start, &stop, &step, &slicelen) < 0) {
1289 return NULL;
1290 }
1291
1292 if (slicelen <= 0)
1293 return PyList_New(0);
1294 else {
1295 list = PyList_New(slicelen);
1296 if (!list)
1297 return NULL;
1298
1299 for (cur = start, i = 0; i < slicelen;
1300 cur += step, i++) {
1301 PyObject* item = self->extra->children[cur];
1302 Py_INCREF(item);
1303 PyList_SET_ITEM(list, i, item);
1304 }
1305
1306 return list;
1307 }
1308 }
1309 else {
1310 PyErr_SetString(PyExc_TypeError,
1311 "element indices must be integers");
1312 return NULL;
1313 }
1314}
1315
1316static int
1317element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1318{
1319 ElementObject* self = (ElementObject*) self_;
1320
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001321 if (PyIndex_Check(item)) {
1322 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001323
1324 if (i == -1 && PyErr_Occurred()) {
1325 return -1;
1326 }
1327 if (i < 0 && self->extra)
1328 i += self->extra->length;
1329 return element_setitem(self_, i, value);
1330 }
1331 else if (PySlice_Check(item)) {
1332 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1333
1334 PyObject* recycle = NULL;
1335 PyObject* seq = NULL;
1336
1337 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001338 create_extra(self, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001339
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001340 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001341 self->extra->length,
1342 &start, &stop, &step, &slicelen) < 0) {
1343 return -1;
1344 }
1345
Eli Bendersky865756a2012-03-09 13:38:15 +02001346 if (value == NULL) {
1347 /* Delete slice */
1348 size_t cur;
1349 Py_ssize_t i;
1350
1351 if (slicelen <= 0)
1352 return 0;
1353
1354 /* Since we're deleting, the direction of the range doesn't matter,
1355 * so for simplicity make it always ascending.
1356 */
1357 if (step < 0) {
1358 stop = start + 1;
1359 start = stop + step * (slicelen - 1) - 1;
1360 step = -step;
1361 }
1362
1363 assert((size_t)slicelen <= PY_SIZE_MAX / sizeof(PyObject *));
1364
1365 /* recycle is a list that will contain all the children
1366 * scheduled for removal.
1367 */
1368 if (!(recycle = PyList_New(slicelen))) {
1369 PyErr_NoMemory();
1370 return -1;
1371 }
1372
1373 /* This loop walks over all the children that have to be deleted,
1374 * with cur pointing at them. num_moved is the amount of children
1375 * until the next deleted child that have to be "shifted down" to
1376 * occupy the deleted's places.
1377 * Note that in the ith iteration, shifting is done i+i places down
1378 * because i children were already removed.
1379 */
1380 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1381 /* Compute how many children have to be moved, clipping at the
1382 * list end.
1383 */
1384 Py_ssize_t num_moved = step - 1;
1385 if (cur + step >= (size_t)self->extra->length) {
1386 num_moved = self->extra->length - cur - 1;
1387 }
1388
1389 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1390
1391 memmove(
1392 self->extra->children + cur - i,
1393 self->extra->children + cur + 1,
1394 num_moved * sizeof(PyObject *));
1395 }
1396
1397 /* Leftover "tail" after the last removed child */
1398 cur = start + (size_t)slicelen * step;
1399 if (cur < (size_t)self->extra->length) {
1400 memmove(
1401 self->extra->children + cur - slicelen,
1402 self->extra->children + cur,
1403 (self->extra->length - cur) * sizeof(PyObject *));
1404 }
1405
1406 self->extra->length -= slicelen;
1407
1408 /* Discard the recycle list with all the deleted sub-elements */
1409 Py_XDECREF(recycle);
1410 return 0;
1411 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001412 else {
Eli Bendersky865756a2012-03-09 13:38:15 +02001413 /* A new slice is actually being assigned */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001414 seq = PySequence_Fast(value, "");
1415 if (!seq) {
1416 PyErr_Format(
1417 PyExc_TypeError,
1418 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1419 );
1420 return -1;
1421 }
1422 newlen = PySequence_Size(seq);
1423 }
1424
1425 if (step != 1 && newlen != slicelen)
1426 {
1427 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001428 "attempt to assign sequence of size %zd "
1429 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001430 newlen, slicelen
1431 );
1432 return -1;
1433 }
1434
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001435 /* Resize before creating the recycle bin, to prevent refleaks. */
1436 if (newlen > slicelen) {
1437 if (element_resize(self, newlen - slicelen) < 0) {
1438 if (seq) {
1439 Py_DECREF(seq);
1440 }
1441 return -1;
1442 }
1443 }
1444
1445 if (slicelen > 0) {
1446 /* to avoid recursive calls to this method (via decref), move
1447 old items to the recycle bin here, and get rid of them when
1448 we're done modifying the element */
1449 recycle = PyList_New(slicelen);
1450 if (!recycle) {
1451 if (seq) {
1452 Py_DECREF(seq);
1453 }
1454 return -1;
1455 }
1456 for (cur = start, i = 0; i < slicelen;
1457 cur += step, i++)
1458 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1459 }
1460
1461 if (newlen < slicelen) {
1462 /* delete slice */
1463 for (i = stop; i < self->extra->length; i++)
1464 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1465 } else if (newlen > slicelen) {
1466 /* insert slice */
1467 for (i = self->extra->length-1; i >= stop; i--)
1468 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1469 }
1470
1471 /* replace the slice */
1472 for (cur = start, i = 0; i < newlen;
1473 cur += step, i++) {
1474 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1475 Py_INCREF(element);
1476 self->extra->children[cur] = element;
1477 }
1478
1479 self->extra->length += newlen - slicelen;
1480
1481 if (seq) {
1482 Py_DECREF(seq);
1483 }
1484
1485 /* discard the recycle bin, and everything in it */
1486 Py_XDECREF(recycle);
1487
1488 return 0;
1489 }
1490 else {
1491 PyErr_SetString(PyExc_TypeError,
1492 "element indices must be integers");
1493 return -1;
1494 }
1495}
1496
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001497static PyMethodDef element_methods[] = {
1498
1499 {"clear", (PyCFunction) element_clear, METH_VARARGS},
1500
1501 {"get", (PyCFunction) element_get, METH_VARARGS},
1502 {"set", (PyCFunction) element_set, METH_VARARGS},
1503
1504 {"find", (PyCFunction) element_find, METH_VARARGS},
1505 {"findtext", (PyCFunction) element_findtext, METH_VARARGS},
1506 {"findall", (PyCFunction) element_findall, METH_VARARGS},
1507
1508 {"append", (PyCFunction) element_append, METH_VARARGS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001509 {"extend", (PyCFunction) element_extend, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001510 {"insert", (PyCFunction) element_insert, METH_VARARGS},
1511 {"remove", (PyCFunction) element_remove, METH_VARARGS},
1512
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001513 {"iter", (PyCFunction) element_iter, METH_VARARGS},
1514 {"itertext", (PyCFunction) element_itertext, METH_VARARGS},
1515 {"iterfind", (PyCFunction) element_iterfind, METH_VARARGS},
1516
1517 {"getiterator", (PyCFunction) element_iter, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001518 {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS},
1519
1520 {"items", (PyCFunction) element_items, METH_VARARGS},
1521 {"keys", (PyCFunction) element_keys, METH_VARARGS},
1522
1523 {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS},
1524
1525 {"__copy__", (PyCFunction) element_copy, METH_VARARGS},
1526 {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS},
1527
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001528 {NULL, NULL}
1529};
1530
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001531static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001532element_getattro(ElementObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001533{
1534 PyObject* res;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001535 char *name = "";
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001536
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001537 if (PyUnicode_Check(nameobj))
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001538 name = _PyUnicode_AsString(nameobj);
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001539
Alexander Belopolskye239d232010-12-08 23:31:48 +00001540 if (name == NULL)
1541 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001542
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001543 /* handle common attributes first */
1544 if (strcmp(name, "tag") == 0) {
1545 res = self->tag;
1546 Py_INCREF(res);
1547 return res;
1548 } else if (strcmp(name, "text") == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001549 res = element_get_text(self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001550 Py_INCREF(res);
1551 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001552 }
1553
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001554 /* methods */
1555 res = PyObject_GenericGetAttr((PyObject*) self, nameobj);
1556 if (res)
1557 return res;
1558
1559 /* less common attributes */
1560 if (strcmp(name, "tail") == 0) {
1561 PyErr_Clear();
1562 res = element_get_tail(self);
1563 } else if (strcmp(name, "attrib") == 0) {
1564 PyErr_Clear();
1565 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001566 create_extra(self, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001567 res = element_get_attrib(self);
1568 }
1569
1570 if (!res)
1571 return NULL;
1572
1573 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001574 return res;
1575}
1576
1577static int
1578element_setattr(ElementObject* self, const char* name, PyObject* value)
1579{
1580 if (value == NULL) {
1581 PyErr_SetString(
1582 PyExc_AttributeError,
1583 "can't delete element attributes"
1584 );
1585 return -1;
1586 }
1587
1588 if (strcmp(name, "tag") == 0) {
1589 Py_DECREF(self->tag);
1590 self->tag = value;
1591 Py_INCREF(self->tag);
1592 } else if (strcmp(name, "text") == 0) {
1593 Py_DECREF(JOIN_OBJ(self->text));
1594 self->text = value;
1595 Py_INCREF(self->text);
1596 } else if (strcmp(name, "tail") == 0) {
1597 Py_DECREF(JOIN_OBJ(self->tail));
1598 self->tail = value;
1599 Py_INCREF(self->tail);
1600 } else if (strcmp(name, "attrib") == 0) {
1601 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001602 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001603 Py_DECREF(self->extra->attrib);
1604 self->extra->attrib = value;
1605 Py_INCREF(self->extra->attrib);
1606 } else {
1607 PyErr_SetString(PyExc_AttributeError, name);
1608 return -1;
1609 }
1610
1611 return 0;
1612}
1613
1614static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001615 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001616 0, /* sq_concat */
1617 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001618 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001619 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001620 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001621 0,
1622};
1623
1624static PyMappingMethods element_as_mapping = {
1625 (lenfunc) element_length,
1626 (binaryfunc) element_subscr,
1627 (objobjargproc) element_ass_subscr,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001628};
1629
Neal Norwitz227b5332006-03-22 09:28:35 +00001630static PyTypeObject Element_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001631 PyVarObject_HEAD_INIT(NULL, 0)
1632 "Element", sizeof(ElementObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001633 /* methods */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001634 (destructor)element_dealloc, /* tp_dealloc */
1635 0, /* tp_print */
1636 0, /* tp_getattr */
1637 (setattrfunc)element_setattr, /* tp_setattr */
1638 0, /* tp_reserved */
1639 (reprfunc)element_repr, /* tp_repr */
1640 0, /* tp_as_number */
1641 &element_as_sequence, /* tp_as_sequence */
1642 &element_as_mapping, /* tp_as_mapping */
1643 0, /* tp_hash */
1644 0, /* tp_call */
1645 0, /* tp_str */
1646 (getattrofunc)element_getattro, /* tp_getattro */
1647 0, /* tp_setattro */
1648 0, /* tp_as_buffer */
1649 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
1650 0, /* tp_doc */
1651 0, /* tp_traverse */
1652 0, /* tp_clear */
1653 0, /* tp_richcompare */
1654 0, /* tp_weaklistoffset */
1655 0, /* tp_iter */
1656 0, /* tp_iternext */
1657 element_methods, /* tp_methods */
1658 0, /* tp_members */
1659 0, /* tp_getset */
1660 0, /* tp_base */
1661 0, /* tp_dict */
1662 0, /* tp_descr_get */
1663 0, /* tp_descr_set */
1664 0, /* tp_dictoffset */
1665 (initproc)element_init, /* tp_init */
1666 PyType_GenericAlloc, /* tp_alloc */
1667 element_new, /* tp_new */
1668 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001669};
1670
1671/* ==================================================================== */
1672/* the tree builder type */
1673
1674typedef struct {
1675 PyObject_HEAD
1676
1677 PyObject* root; /* root node (first created node) */
1678
1679 ElementObject* this; /* current node */
1680 ElementObject* last; /* most recently created node */
1681
1682 PyObject* data; /* data collector (string or list), or NULL */
1683
1684 PyObject* stack; /* element stack */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001685 Py_ssize_t index; /* current stack size (0=empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001686
1687 /* element tracing */
1688 PyObject* events; /* list of events, or NULL if not collecting */
1689 PyObject* start_event_obj; /* event objects (NULL to ignore) */
1690 PyObject* end_event_obj;
1691 PyObject* start_ns_event_obj;
1692 PyObject* end_ns_event_obj;
1693
1694} TreeBuilderObject;
1695
Neal Norwitz227b5332006-03-22 09:28:35 +00001696static PyTypeObject TreeBuilder_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001697
Christian Heimes90aa7642007-12-19 02:45:37 +00001698#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001699
1700/* -------------------------------------------------------------------- */
1701/* constructor and destructor */
1702
1703LOCAL(PyObject*)
1704treebuilder_new(void)
1705{
1706 TreeBuilderObject* self;
1707
1708 self = PyObject_New(TreeBuilderObject, &TreeBuilder_Type);
1709 if (self == NULL)
1710 return NULL;
1711
1712 self->root = NULL;
1713
1714 Py_INCREF(Py_None);
1715 self->this = (ElementObject*) Py_None;
1716
1717 Py_INCREF(Py_None);
1718 self->last = (ElementObject*) Py_None;
1719
1720 self->data = NULL;
1721
1722 self->stack = PyList_New(20);
1723 self->index = 0;
1724
1725 self->events = NULL;
1726 self->start_event_obj = self->end_event_obj = NULL;
1727 self->start_ns_event_obj = self->end_ns_event_obj = NULL;
1728
1729 ALLOC(sizeof(TreeBuilderObject), "create treebuilder");
1730
1731 return (PyObject*) self;
1732}
1733
1734static PyObject*
Thomas Wouters73e5a5b2006-06-08 15:35:45 +00001735treebuilder(PyObject* self_, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001736{
1737 if (!PyArg_ParseTuple(args, ":TreeBuilder"))
1738 return NULL;
1739
1740 return treebuilder_new();
1741}
1742
1743static void
1744treebuilder_dealloc(TreeBuilderObject* self)
1745{
1746 Py_XDECREF(self->end_ns_event_obj);
1747 Py_XDECREF(self->start_ns_event_obj);
1748 Py_XDECREF(self->end_event_obj);
1749 Py_XDECREF(self->start_event_obj);
1750 Py_XDECREF(self->events);
1751 Py_DECREF(self->stack);
1752 Py_XDECREF(self->data);
1753 Py_DECREF(self->last);
1754 Py_DECREF(self->this);
1755 Py_XDECREF(self->root);
1756
1757 RELEASE(sizeof(TreeBuilderObject), "destroy treebuilder");
1758
1759 PyObject_Del(self);
1760}
1761
1762/* -------------------------------------------------------------------- */
1763/* handlers */
1764
1765LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001766treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
1767 PyObject* attrib)
1768{
1769 PyObject* node;
1770 PyObject* this;
1771
1772 if (self->data) {
1773 if (self->this == self->last) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001774 Py_DECREF(JOIN_OBJ(self->last->text));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001775 self->last->text = JOIN_SET(
1776 self->data, PyList_CheckExact(self->data)
1777 );
1778 } else {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001779 Py_DECREF(JOIN_OBJ(self->last->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001780 self->last->tail = JOIN_SET(
1781 self->data, PyList_CheckExact(self->data)
1782 );
1783 }
1784 self->data = NULL;
1785 }
1786
Eli Bendersky092af1f2012-03-04 07:14:03 +02001787 node = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001788 if (!node)
1789 return NULL;
1790
1791 this = (PyObject*) self->this;
1792
1793 if (this != Py_None) {
1794 if (element_add_subelement((ElementObject*) this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001795 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001796 } else {
1797 if (self->root) {
1798 PyErr_SetString(
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001799 elementtree_parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001800 "multiple elements on top level"
1801 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001802 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001803 }
1804 Py_INCREF(node);
1805 self->root = node;
1806 }
1807
1808 if (self->index < PyList_GET_SIZE(self->stack)) {
1809 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001810 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001811 Py_INCREF(this);
1812 } else {
1813 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001814 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001815 }
1816 self->index++;
1817
1818 Py_DECREF(this);
1819 Py_INCREF(node);
1820 self->this = (ElementObject*) node;
1821
1822 Py_DECREF(self->last);
1823 Py_INCREF(node);
1824 self->last = (ElementObject*) node;
1825
1826 if (self->start_event_obj) {
1827 PyObject* res;
1828 PyObject* action = self->start_event_obj;
1829 res = PyTuple_New(2);
1830 if (res) {
1831 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
1832 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
1833 PyList_Append(self->events, res);
1834 Py_DECREF(res);
1835 } else
1836 PyErr_Clear(); /* FIXME: propagate error */
1837 }
1838
1839 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001840
1841 error:
1842 Py_DECREF(node);
1843 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001844}
1845
1846LOCAL(PyObject*)
1847treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
1848{
1849 if (!self->data) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001850 if (self->last == (ElementObject*) Py_None) {
1851 /* ignore calls to data before the first call to start */
1852 Py_RETURN_NONE;
1853 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001854 /* store the first item as is */
1855 Py_INCREF(data); self->data = data;
1856 } else {
1857 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00001858 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
1859 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001860 /* expat often generates single character data sections; handle
1861 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00001862 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
1863 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001864 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00001865 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001866 } else if (PyList_CheckExact(self->data)) {
1867 if (PyList_Append(self->data, data) < 0)
1868 return NULL;
1869 } else {
1870 PyObject* list = PyList_New(2);
1871 if (!list)
1872 return NULL;
1873 PyList_SET_ITEM(list, 0, self->data);
1874 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
1875 self->data = list;
1876 }
1877 }
1878
1879 Py_RETURN_NONE;
1880}
1881
1882LOCAL(PyObject*)
1883treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
1884{
1885 PyObject* item;
1886
1887 if (self->data) {
1888 if (self->this == self->last) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001889 Py_DECREF(JOIN_OBJ(self->last->text));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001890 self->last->text = JOIN_SET(
1891 self->data, PyList_CheckExact(self->data)
1892 );
1893 } else {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001894 Py_DECREF(JOIN_OBJ(self->last->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001895 self->last->tail = JOIN_SET(
1896 self->data, PyList_CheckExact(self->data)
1897 );
1898 }
1899 self->data = NULL;
1900 }
1901
1902 if (self->index == 0) {
1903 PyErr_SetString(
1904 PyExc_IndexError,
1905 "pop from empty stack"
1906 );
1907 return NULL;
1908 }
1909
1910 self->index--;
1911
1912 item = PyList_GET_ITEM(self->stack, self->index);
1913 Py_INCREF(item);
1914
1915 Py_DECREF(self->last);
1916
1917 self->last = (ElementObject*) self->this;
1918 self->this = (ElementObject*) item;
1919
1920 if (self->end_event_obj) {
1921 PyObject* res;
1922 PyObject* action = self->end_event_obj;
1923 PyObject* node = (PyObject*) self->last;
1924 res = PyTuple_New(2);
1925 if (res) {
1926 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
1927 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
1928 PyList_Append(self->events, res);
1929 Py_DECREF(res);
1930 } else
1931 PyErr_Clear(); /* FIXME: propagate error */
1932 }
1933
1934 Py_INCREF(self->last);
1935 return (PyObject*) self->last;
1936}
1937
1938LOCAL(void)
1939treebuilder_handle_namespace(TreeBuilderObject* self, int start,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001940 PyObject *prefix, PyObject *uri)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001941{
1942 PyObject* res;
1943 PyObject* action;
1944 PyObject* parcel;
1945
1946 if (!self->events)
1947 return;
1948
1949 if (start) {
1950 if (!self->start_ns_event_obj)
1951 return;
1952 action = self->start_ns_event_obj;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001953 parcel = Py_BuildValue("OO", prefix, uri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001954 if (!parcel)
1955 return;
1956 Py_INCREF(action);
1957 } else {
1958 if (!self->end_ns_event_obj)
1959 return;
1960 action = self->end_ns_event_obj;
1961 Py_INCREF(action);
1962 parcel = Py_None;
1963 Py_INCREF(parcel);
1964 }
1965
1966 res = PyTuple_New(2);
1967
1968 if (res) {
1969 PyTuple_SET_ITEM(res, 0, action);
1970 PyTuple_SET_ITEM(res, 1, parcel);
1971 PyList_Append(self->events, res);
1972 Py_DECREF(res);
1973 } else
1974 PyErr_Clear(); /* FIXME: propagate error */
1975}
1976
1977/* -------------------------------------------------------------------- */
1978/* methods (in alphabetical order) */
1979
1980static PyObject*
1981treebuilder_data(TreeBuilderObject* self, PyObject* args)
1982{
1983 PyObject* data;
1984 if (!PyArg_ParseTuple(args, "O:data", &data))
1985 return NULL;
1986
1987 return treebuilder_handle_data(self, data);
1988}
1989
1990static PyObject*
1991treebuilder_end(TreeBuilderObject* self, PyObject* args)
1992{
1993 PyObject* tag;
1994 if (!PyArg_ParseTuple(args, "O:end", &tag))
1995 return NULL;
1996
1997 return treebuilder_handle_end(self, tag);
1998}
1999
2000LOCAL(PyObject*)
2001treebuilder_done(TreeBuilderObject* self)
2002{
2003 PyObject* res;
2004
2005 /* FIXME: check stack size? */
2006
2007 if (self->root)
2008 res = self->root;
2009 else
2010 res = Py_None;
2011
2012 Py_INCREF(res);
2013 return res;
2014}
2015
2016static PyObject*
2017treebuilder_close(TreeBuilderObject* self, PyObject* args)
2018{
2019 if (!PyArg_ParseTuple(args, ":close"))
2020 return NULL;
2021
2022 return treebuilder_done(self);
2023}
2024
2025static PyObject*
2026treebuilder_start(TreeBuilderObject* self, PyObject* args)
2027{
2028 PyObject* tag;
2029 PyObject* attrib = Py_None;
2030 if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib))
2031 return NULL;
2032
2033 return treebuilder_handle_start(self, tag, attrib);
2034}
2035
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002036static PyMethodDef treebuilder_methods[] = {
2037 {"data", (PyCFunction) treebuilder_data, METH_VARARGS},
2038 {"start", (PyCFunction) treebuilder_start, METH_VARARGS},
2039 {"end", (PyCFunction) treebuilder_end, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002040 {"close", (PyCFunction) treebuilder_close, METH_VARARGS},
2041 {NULL, NULL}
2042};
2043
Neal Norwitz227b5332006-03-22 09:28:35 +00002044static PyTypeObject TreeBuilder_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002045 PyVarObject_HEAD_INIT(NULL, 0)
2046 "TreeBuilder", sizeof(TreeBuilderObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002047 /* methods */
2048 (destructor)treebuilder_dealloc, /* tp_dealloc */
2049 0, /* tp_print */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002050 0, /* tp_getattr */
2051 0, /* tp_setattr */
Mark Dickinsone94c6792009-02-02 20:36:42 +00002052 0, /* tp_reserved */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002053 0, /* tp_repr */
2054 0, /* tp_as_number */
2055 0, /* tp_as_sequence */
2056 0, /* tp_as_mapping */
2057 0, /* tp_hash */
2058 0, /* tp_call */
2059 0, /* tp_str */
2060 0, /* tp_getattro */
2061 0, /* tp_setattro */
2062 0, /* tp_as_buffer */
2063 Py_TPFLAGS_DEFAULT, /* tp_flags */
2064 0, /* tp_doc */
2065 0, /* tp_traverse */
2066 0, /* tp_clear */
2067 0, /* tp_richcompare */
2068 0, /* tp_weaklistoffset */
2069 0, /* tp_iter */
2070 0, /* tp_iternext */
2071 treebuilder_methods, /* tp_methods */
2072 0, /* tp_members */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002073};
2074
2075/* ==================================================================== */
2076/* the expat interface */
2077
2078#if defined(USE_EXPAT)
2079
2080#include "expat.h"
2081
2082#if defined(USE_PYEXPAT_CAPI)
2083#include "pyexpat.h"
2084static struct PyExpat_CAPI* expat_capi;
2085#define EXPAT(func) (expat_capi->func)
2086#else
2087#define EXPAT(func) (XML_##func)
2088#endif
2089
2090typedef struct {
2091 PyObject_HEAD
2092
2093 XML_Parser parser;
2094
2095 PyObject* target;
2096 PyObject* entity;
2097
2098 PyObject* names;
2099
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002100 PyObject* handle_start;
2101 PyObject* handle_data;
2102 PyObject* handle_end;
2103
2104 PyObject* handle_comment;
2105 PyObject* handle_pi;
2106
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002107 PyObject* handle_close;
2108
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002109} XMLParserObject;
2110
Neal Norwitz227b5332006-03-22 09:28:35 +00002111static PyTypeObject XMLParser_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002112
2113/* helpers */
2114
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002115LOCAL(PyObject*)
2116makeuniversal(XMLParserObject* self, const char* string)
2117{
2118 /* convert a UTF-8 tag/attribute name from the expat parser
2119 to a universal name string */
2120
2121 int size = strlen(string);
2122 PyObject* key;
2123 PyObject* value;
2124
2125 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00002126 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002127 if (!key)
2128 return NULL;
2129
2130 value = PyDict_GetItem(self->names, key);
2131
2132 if (value) {
2133 Py_INCREF(value);
2134 } else {
2135 /* new name. convert to universal name, and decode as
2136 necessary */
2137
2138 PyObject* tag;
2139 char* p;
2140 int i;
2141
2142 /* look for namespace separator */
2143 for (i = 0; i < size; i++)
2144 if (string[i] == '}')
2145 break;
2146 if (i != size) {
2147 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002148 tag = PyBytes_FromStringAndSize(NULL, size+1);
2149 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002150 p[0] = '{';
2151 memcpy(p+1, string, size);
2152 size++;
2153 } else {
2154 /* plain name; use key as tag */
2155 Py_INCREF(key);
2156 tag = key;
2157 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002158
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002159 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002160 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00002161 value = PyUnicode_DecodeUTF8(p, size, "strict");
2162 Py_DECREF(tag);
2163 if (!value) {
2164 Py_DECREF(key);
2165 return NULL;
2166 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002167
2168 /* add to names dictionary */
2169 if (PyDict_SetItem(self->names, key, value) < 0) {
2170 Py_DECREF(key);
2171 Py_DECREF(value);
2172 return NULL;
2173 }
2174 }
2175
2176 Py_DECREF(key);
2177 return value;
2178}
2179
Eli Bendersky5b77d812012-03-16 08:20:05 +02002180/* Set the ParseError exception with the given parameters.
2181 * If message is not NULL, it's used as the error string. Otherwise, the
2182 * message string is the default for the given error_code.
2183*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002184static void
Eli Bendersky5b77d812012-03-16 08:20:05 +02002185expat_set_error(enum XML_Error error_code, int line, int column, char *message)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002186{
Eli Bendersky5b77d812012-03-16 08:20:05 +02002187 PyObject *errmsg, *error, *position, *code;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002188
Victor Stinner499dfcf2011-03-21 13:26:24 +01002189 errmsg = PyUnicode_FromFormat("%s: line %d, column %d",
Eli Bendersky5b77d812012-03-16 08:20:05 +02002190 message ? message : EXPAT(ErrorString)(error_code),
2191 line, column);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002192 if (errmsg == NULL)
2193 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002194
Victor Stinner499dfcf2011-03-21 13:26:24 +01002195 error = PyObject_CallFunction(elementtree_parseerror_obj, "O", errmsg);
2196 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002197 if (!error)
2198 return;
2199
Eli Bendersky5b77d812012-03-16 08:20:05 +02002200 /* Add code and position attributes */
2201 code = PyLong_FromLong((long)error_code);
2202 if (!code) {
2203 Py_DECREF(error);
2204 return;
2205 }
2206 if (PyObject_SetAttrString(error, "code", code) == -1) {
2207 Py_DECREF(error);
2208 Py_DECREF(code);
2209 return;
2210 }
2211 Py_DECREF(code);
2212
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002213 position = Py_BuildValue("(ii)", line, column);
2214 if (!position) {
2215 Py_DECREF(error);
2216 return;
2217 }
2218 if (PyObject_SetAttrString(error, "position", position) == -1) {
2219 Py_DECREF(error);
2220 Py_DECREF(position);
2221 return;
2222 }
2223 Py_DECREF(position);
2224
2225 PyErr_SetObject(elementtree_parseerror_obj, error);
2226 Py_DECREF(error);
2227}
2228
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002229/* -------------------------------------------------------------------- */
2230/* handlers */
2231
2232static void
2233expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2234 int data_len)
2235{
2236 PyObject* key;
2237 PyObject* value;
2238 PyObject* res;
2239
2240 if (data_len < 2 || data_in[0] != '&')
2241 return;
2242
Neal Norwitz0269b912007-08-08 06:56:02 +00002243 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002244 if (!key)
2245 return;
2246
2247 value = PyDict_GetItem(self->entity, key);
2248
2249 if (value) {
2250 if (TreeBuilder_CheckExact(self->target))
2251 res = treebuilder_handle_data(
2252 (TreeBuilderObject*) self->target, value
2253 );
2254 else if (self->handle_data)
2255 res = PyObject_CallFunction(self->handle_data, "O", value);
2256 else
2257 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002258 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002259 } else if (!PyErr_Occurred()) {
2260 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00002261 char message[128] = "undefined entity ";
2262 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002263 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02002264 XML_ERROR_UNDEFINED_ENTITY,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002265 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02002266 EXPAT(GetErrorColumnNumber)(self->parser),
2267 message
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002268 );
2269 }
2270
2271 Py_DECREF(key);
2272}
2273
2274static void
2275expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2276 const XML_Char **attrib_in)
2277{
2278 PyObject* res;
2279 PyObject* tag;
2280 PyObject* attrib;
2281 int ok;
2282
2283 /* tag name */
2284 tag = makeuniversal(self, tag_in);
2285 if (!tag)
2286 return; /* parser will look for errors */
2287
2288 /* attributes */
2289 if (attrib_in[0]) {
2290 attrib = PyDict_New();
2291 if (!attrib)
2292 return;
2293 while (attrib_in[0] && attrib_in[1]) {
2294 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00002295 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002296 if (!key || !value) {
2297 Py_XDECREF(value);
2298 Py_XDECREF(key);
2299 Py_DECREF(attrib);
2300 return;
2301 }
2302 ok = PyDict_SetItem(attrib, key, value);
2303 Py_DECREF(value);
2304 Py_DECREF(key);
2305 if (ok < 0) {
2306 Py_DECREF(attrib);
2307 return;
2308 }
2309 attrib_in += 2;
2310 }
2311 } else {
2312 Py_INCREF(Py_None);
2313 attrib = Py_None;
2314 }
2315
2316 if (TreeBuilder_CheckExact(self->target))
2317 /* shortcut */
2318 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2319 tag, attrib);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002320 else if (self->handle_start) {
2321 if (attrib == Py_None) {
2322 Py_DECREF(attrib);
2323 attrib = PyDict_New();
2324 if (!attrib)
2325 return;
2326 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002327 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002328 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002329 res = NULL;
2330
2331 Py_DECREF(tag);
2332 Py_DECREF(attrib);
2333
2334 Py_XDECREF(res);
2335}
2336
2337static void
2338expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2339 int data_len)
2340{
2341 PyObject* data;
2342 PyObject* res;
2343
Neal Norwitz0269b912007-08-08 06:56:02 +00002344 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002345 if (!data)
2346 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002347
2348 if (TreeBuilder_CheckExact(self->target))
2349 /* shortcut */
2350 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
2351 else if (self->handle_data)
2352 res = PyObject_CallFunction(self->handle_data, "O", data);
2353 else
2354 res = NULL;
2355
2356 Py_DECREF(data);
2357
2358 Py_XDECREF(res);
2359}
2360
2361static void
2362expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
2363{
2364 PyObject* tag;
2365 PyObject* res = NULL;
2366
2367 if (TreeBuilder_CheckExact(self->target))
2368 /* shortcut */
2369 /* the standard tree builder doesn't look at the end tag */
2370 res = treebuilder_handle_end(
2371 (TreeBuilderObject*) self->target, Py_None
2372 );
2373 else if (self->handle_end) {
2374 tag = makeuniversal(self, tag_in);
2375 if (tag) {
2376 res = PyObject_CallFunction(self->handle_end, "O", tag);
2377 Py_DECREF(tag);
2378 }
2379 }
2380
2381 Py_XDECREF(res);
2382}
2383
2384static void
2385expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
2386 const XML_Char *uri)
2387{
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002388 PyObject* sprefix = NULL;
2389 PyObject* suri = NULL;
2390
2391 suri = PyUnicode_DecodeUTF8(uri, strlen(uri), "strict");
2392 if (!suri)
2393 return;
2394
2395 if (prefix)
2396 sprefix = PyUnicode_DecodeUTF8(prefix, strlen(prefix), "strict");
2397 else
2398 sprefix = PyUnicode_FromString("");
2399 if (!sprefix) {
2400 Py_DECREF(suri);
2401 return;
2402 }
2403
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002404 treebuilder_handle_namespace(
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002405 (TreeBuilderObject*) self->target, 1, sprefix, suri
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002406 );
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002407
2408 Py_DECREF(sprefix);
2409 Py_DECREF(suri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002410}
2411
2412static void
2413expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
2414{
2415 treebuilder_handle_namespace(
2416 (TreeBuilderObject*) self->target, 0, NULL, NULL
2417 );
2418}
2419
2420static void
2421expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
2422{
2423 PyObject* comment;
2424 PyObject* res;
2425
2426 if (self->handle_comment) {
Neal Norwitz0269b912007-08-08 06:56:02 +00002427 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002428 if (comment) {
2429 res = PyObject_CallFunction(self->handle_comment, "O", comment);
2430 Py_XDECREF(res);
2431 Py_DECREF(comment);
2432 }
2433 }
2434}
2435
2436static void
2437expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
2438 const XML_Char* data_in)
2439{
2440 PyObject* target;
2441 PyObject* data;
2442 PyObject* res;
2443
2444 if (self->handle_pi) {
Neal Norwitz0269b912007-08-08 06:56:02 +00002445 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
2446 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002447 if (target && data) {
2448 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
2449 Py_XDECREF(res);
2450 Py_DECREF(data);
2451 Py_DECREF(target);
2452 } else {
2453 Py_XDECREF(data);
2454 Py_XDECREF(target);
2455 }
2456 }
2457}
2458
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002459static int
2460expat_unknown_encoding_handler(XMLParserObject *self, const XML_Char *name,
2461 XML_Encoding *info)
2462{
2463 PyObject* u;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002464 unsigned char s[256];
2465 int i;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002466 void *data;
2467 unsigned int kind;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002468
2469 memset(info, 0, sizeof(XML_Encoding));
2470
2471 for (i = 0; i < 256; i++)
2472 s[i] = i;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002473
Fredrik Lundhc3389992005-12-25 11:40:19 +00002474 u = PyUnicode_Decode((char*) s, 256, name, "replace");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002475 if (!u)
2476 return XML_STATUS_ERROR;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002477 if (PyUnicode_READY(u))
2478 return XML_STATUS_ERROR;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002479
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002480 if (PyUnicode_GET_LENGTH(u) != 256) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002481 Py_DECREF(u);
2482 return XML_STATUS_ERROR;
2483 }
2484
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002485 kind = PyUnicode_KIND(u);
2486 data = PyUnicode_DATA(u);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002487 for (i = 0; i < 256; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002488 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
2489 if (ch != Py_UNICODE_REPLACEMENT_CHARACTER)
2490 info->map[i] = ch;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002491 else
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002492 info->map[i] = -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002493 }
2494
2495 Py_DECREF(u);
2496
2497 return XML_STATUS_OK;
2498}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002499
2500/* -------------------------------------------------------------------- */
2501/* constructor and destructor */
2502
2503static PyObject*
Thomas Wouters73e5a5b2006-06-08 15:35:45 +00002504xmlparser(PyObject* self_, PyObject* args, PyObject* kw)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002505{
2506 XMLParserObject* self;
2507 /* FIXME: does this need to be static? */
2508 static XML_Memory_Handling_Suite memory_handler;
2509
2510 PyObject* target = NULL;
2511 char* encoding = NULL;
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +00002512 static char* kwlist[] = { "target", "encoding", NULL };
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002513 if (!PyArg_ParseTupleAndKeywords(args, kw, "|Oz:XMLParser", kwlist,
2514 &target, &encoding))
2515 return NULL;
2516
2517#if defined(USE_PYEXPAT_CAPI)
2518 if (!expat_capi) {
2519 PyErr_SetString(
2520 PyExc_RuntimeError, "cannot load dispatch table from pyexpat"
2521 );
2522 return NULL;
2523 }
2524#endif
2525
2526 self = PyObject_New(XMLParserObject, &XMLParser_Type);
2527 if (self == NULL)
2528 return NULL;
2529
2530 self->entity = PyDict_New();
2531 if (!self->entity) {
2532 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002533 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002534 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002535
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002536 self->names = PyDict_New();
2537 if (!self->names) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002538 PyObject_Del(self->entity);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002539 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002540 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002541 }
2542
2543 memory_handler.malloc_fcn = PyObject_Malloc;
2544 memory_handler.realloc_fcn = PyObject_Realloc;
2545 memory_handler.free_fcn = PyObject_Free;
2546
2547 self->parser = EXPAT(ParserCreate_MM)(encoding, &memory_handler, "}");
2548 if (!self->parser) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002549 PyObject_Del(self->names);
2550 PyObject_Del(self->entity);
2551 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002552 PyErr_NoMemory();
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002553 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002554 }
2555
2556 /* setup target handlers */
2557 if (!target) {
2558 target = treebuilder_new();
2559 if (!target) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002560 EXPAT(ParserFree)(self->parser);
2561 PyObject_Del(self->names);
2562 PyObject_Del(self->entity);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002563 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002564 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002565 }
2566 } else
2567 Py_INCREF(target);
2568 self->target = target;
2569
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002570 self->handle_start = PyObject_GetAttrString(target, "start");
2571 self->handle_data = PyObject_GetAttrString(target, "data");
2572 self->handle_end = PyObject_GetAttrString(target, "end");
2573 self->handle_comment = PyObject_GetAttrString(target, "comment");
2574 self->handle_pi = PyObject_GetAttrString(target, "pi");
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002575 self->handle_close = PyObject_GetAttrString(target, "close");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002576
2577 PyErr_Clear();
2578
2579 /* configure parser */
2580 EXPAT(SetUserData)(self->parser, self);
2581 EXPAT(SetElementHandler)(
2582 self->parser,
2583 (XML_StartElementHandler) expat_start_handler,
2584 (XML_EndElementHandler) expat_end_handler
2585 );
2586 EXPAT(SetDefaultHandlerExpand)(
2587 self->parser,
2588 (XML_DefaultHandler) expat_default_handler
2589 );
2590 EXPAT(SetCharacterDataHandler)(
2591 self->parser,
2592 (XML_CharacterDataHandler) expat_data_handler
2593 );
2594 if (self->handle_comment)
2595 EXPAT(SetCommentHandler)(
2596 self->parser,
2597 (XML_CommentHandler) expat_comment_handler
2598 );
2599 if (self->handle_pi)
2600 EXPAT(SetProcessingInstructionHandler)(
2601 self->parser,
2602 (XML_ProcessingInstructionHandler) expat_pi_handler
2603 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002604 EXPAT(SetUnknownEncodingHandler)(
2605 self->parser,
2606 (XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL
2607 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002608
2609 ALLOC(sizeof(XMLParserObject), "create expatparser");
2610
2611 return (PyObject*) self;
2612}
2613
2614static void
2615xmlparser_dealloc(XMLParserObject* self)
2616{
2617 EXPAT(ParserFree)(self->parser);
2618
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002619 Py_XDECREF(self->handle_close);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002620 Py_XDECREF(self->handle_pi);
2621 Py_XDECREF(self->handle_comment);
2622 Py_XDECREF(self->handle_end);
2623 Py_XDECREF(self->handle_data);
2624 Py_XDECREF(self->handle_start);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002625
2626 Py_DECREF(self->target);
2627 Py_DECREF(self->entity);
2628 Py_DECREF(self->names);
2629
2630 RELEASE(sizeof(XMLParserObject), "destroy expatparser");
2631
2632 PyObject_Del(self);
2633}
2634
2635/* -------------------------------------------------------------------- */
2636/* methods (in alphabetical order) */
2637
2638LOCAL(PyObject*)
2639expat_parse(XMLParserObject* self, char* data, int data_len, int final)
2640{
2641 int ok;
2642
2643 ok = EXPAT(Parse)(self->parser, data, data_len, final);
2644
2645 if (PyErr_Occurred())
2646 return NULL;
2647
2648 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002649 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02002650 EXPAT(GetErrorCode)(self->parser),
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002651 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02002652 EXPAT(GetErrorColumnNumber)(self->parser),
2653 NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002654 );
2655 return NULL;
2656 }
2657
2658 Py_RETURN_NONE;
2659}
2660
2661static PyObject*
2662xmlparser_close(XMLParserObject* self, PyObject* args)
2663{
2664 /* end feeding data to parser */
2665
2666 PyObject* res;
2667 if (!PyArg_ParseTuple(args, ":close"))
2668 return NULL;
2669
2670 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002671 if (!res)
2672 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002673
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002674 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002675 Py_DECREF(res);
2676 return treebuilder_done((TreeBuilderObject*) self->target);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002677 } if (self->handle_close) {
2678 Py_DECREF(res);
2679 return PyObject_CallFunction(self->handle_close, "");
2680 } else
2681 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002682}
2683
2684static PyObject*
2685xmlparser_feed(XMLParserObject* self, PyObject* args)
2686{
2687 /* feed data to parser */
2688
2689 char* data;
2690 int data_len;
2691 if (!PyArg_ParseTuple(args, "s#:feed", &data, &data_len))
2692 return NULL;
2693
2694 return expat_parse(self, data, data_len, 0);
2695}
2696
2697static PyObject*
2698xmlparser_parse(XMLParserObject* self, PyObject* args)
2699{
2700 /* (internal) parse until end of input stream */
2701
2702 PyObject* reader;
2703 PyObject* buffer;
Eli Benderskyf996e772012-03-16 05:53:30 +02002704 PyObject* temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002705 PyObject* res;
2706
2707 PyObject* fileobj;
2708 if (!PyArg_ParseTuple(args, "O:_parse", &fileobj))
2709 return NULL;
2710
2711 reader = PyObject_GetAttrString(fileobj, "read");
2712 if (!reader)
2713 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002714
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002715 /* read from open file object */
2716 for (;;) {
2717
2718 buffer = PyObject_CallFunction(reader, "i", 64*1024);
2719
2720 if (!buffer) {
2721 /* read failed (e.g. due to KeyboardInterrupt) */
2722 Py_DECREF(reader);
2723 return NULL;
2724 }
2725
Eli Benderskyf996e772012-03-16 05:53:30 +02002726 if (PyUnicode_CheckExact(buffer)) {
2727 /* A unicode object is encoded into bytes using UTF-8 */
2728 if (PyUnicode_GET_SIZE(buffer) == 0) {
2729 Py_DECREF(buffer);
2730 break;
2731 }
2732 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
2733 if (!temp) {
2734 /* Propagate exception from PyUnicode_AsEncodedString */
2735 Py_DECREF(buffer);
2736 Py_DECREF(reader);
2737 return NULL;
2738 }
2739
2740 /* Here we no longer need the original buffer since it contains
2741 * unicode. Make it point to the encoded bytes object.
2742 */
2743 Py_DECREF(buffer);
2744 buffer = temp;
2745 }
2746 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002747 Py_DECREF(buffer);
2748 break;
2749 }
2750
2751 res = expat_parse(
Christian Heimes72b710a2008-05-26 13:28:38 +00002752 self, PyBytes_AS_STRING(buffer), PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002753 );
2754
2755 Py_DECREF(buffer);
2756
2757 if (!res) {
2758 Py_DECREF(reader);
2759 return NULL;
2760 }
2761 Py_DECREF(res);
2762
2763 }
2764
2765 Py_DECREF(reader);
2766
2767 res = expat_parse(self, "", 0, 1);
2768
2769 if (res && TreeBuilder_CheckExact(self->target)) {
2770 Py_DECREF(res);
2771 return treebuilder_done((TreeBuilderObject*) self->target);
2772 }
2773
2774 return res;
2775}
2776
2777static PyObject*
2778xmlparser_setevents(XMLParserObject* self, PyObject* args)
2779{
2780 /* activate element event reporting */
2781
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002782 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002783 TreeBuilderObject* target;
2784
2785 PyObject* events; /* event collector */
2786 PyObject* event_set = Py_None;
2787 if (!PyArg_ParseTuple(args, "O!|O:_setevents", &PyList_Type, &events,
2788 &event_set))
2789 return NULL;
2790
2791 if (!TreeBuilder_CheckExact(self->target)) {
2792 PyErr_SetString(
2793 PyExc_TypeError,
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01002794 "event handling only supported for ElementTree.TreeBuilder "
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002795 "targets"
2796 );
2797 return NULL;
2798 }
2799
2800 target = (TreeBuilderObject*) self->target;
2801
2802 Py_INCREF(events);
2803 Py_XDECREF(target->events);
2804 target->events = events;
2805
2806 /* clear out existing events */
2807 Py_XDECREF(target->start_event_obj); target->start_event_obj = NULL;
2808 Py_XDECREF(target->end_event_obj); target->end_event_obj = NULL;
2809 Py_XDECREF(target->start_ns_event_obj); target->start_ns_event_obj = NULL;
2810 Py_XDECREF(target->end_ns_event_obj); target->end_ns_event_obj = NULL;
2811
2812 if (event_set == Py_None) {
2813 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002814 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002815 Py_RETURN_NONE;
2816 }
2817
2818 if (!PyTuple_Check(event_set)) /* FIXME: handle arbitrary sequences */
2819 goto error;
2820
2821 for (i = 0; i < PyTuple_GET_SIZE(event_set); i++) {
2822 PyObject* item = PyTuple_GET_ITEM(event_set, i);
2823 char* event;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002824 if (PyUnicode_Check(item)) {
2825 event = _PyUnicode_AsString(item);
Victor Stinner0477bf32010-03-22 12:11:44 +00002826 if (event == NULL)
2827 goto error;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002828 } else if (PyBytes_Check(item))
2829 event = PyBytes_AS_STRING(item);
2830 else {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002831 goto error;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002832 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002833 if (strcmp(event, "start") == 0) {
2834 Py_INCREF(item);
2835 target->start_event_obj = item;
2836 } else if (strcmp(event, "end") == 0) {
2837 Py_INCREF(item);
2838 Py_XDECREF(target->end_event_obj);
2839 target->end_event_obj = item;
2840 } else if (strcmp(event, "start-ns") == 0) {
2841 Py_INCREF(item);
2842 Py_XDECREF(target->start_ns_event_obj);
2843 target->start_ns_event_obj = item;
2844 EXPAT(SetNamespaceDeclHandler)(
2845 self->parser,
2846 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2847 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2848 );
2849 } else if (strcmp(event, "end-ns") == 0) {
2850 Py_INCREF(item);
2851 Py_XDECREF(target->end_ns_event_obj);
2852 target->end_ns_event_obj = item;
2853 EXPAT(SetNamespaceDeclHandler)(
2854 self->parser,
2855 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2856 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2857 );
2858 } else {
2859 PyErr_Format(
2860 PyExc_ValueError,
2861 "unknown event '%s'", event
2862 );
2863 return NULL;
2864 }
2865 }
2866
2867 Py_RETURN_NONE;
2868
2869 error:
2870 PyErr_SetString(
2871 PyExc_TypeError,
2872 "invalid event tuple"
2873 );
2874 return NULL;
2875}
2876
2877static PyMethodDef xmlparser_methods[] = {
2878 {"feed", (PyCFunction) xmlparser_feed, METH_VARARGS},
2879 {"close", (PyCFunction) xmlparser_close, METH_VARARGS},
2880 {"_parse", (PyCFunction) xmlparser_parse, METH_VARARGS},
2881 {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
2882 {NULL, NULL}
2883};
2884
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002885static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002886xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002887{
Alexander Belopolskye239d232010-12-08 23:31:48 +00002888 if (PyUnicode_Check(nameobj)) {
2889 PyObject* res;
2890 if (PyUnicode_CompareWithASCIIString(nameobj, "entity") == 0)
2891 res = self->entity;
2892 else if (PyUnicode_CompareWithASCIIString(nameobj, "target") == 0)
2893 res = self->target;
2894 else if (PyUnicode_CompareWithASCIIString(nameobj, "version") == 0) {
2895 return PyUnicode_FromFormat(
2896 "Expat %d.%d.%d", XML_MAJOR_VERSION,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002897 XML_MINOR_VERSION, XML_MICRO_VERSION);
Alexander Belopolskye239d232010-12-08 23:31:48 +00002898 }
2899 else
2900 goto generic;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002901
Alexander Belopolskye239d232010-12-08 23:31:48 +00002902 Py_INCREF(res);
2903 return res;
2904 }
2905 generic:
2906 return PyObject_GenericGetAttr((PyObject*) self, nameobj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002907}
2908
Neal Norwitz227b5332006-03-22 09:28:35 +00002909static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002910 PyVarObject_HEAD_INIT(NULL, 0)
2911 "XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002912 /* methods */
2913 (destructor)xmlparser_dealloc, /* tp_dealloc */
2914 0, /* tp_print */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002915 0, /* tp_getattr */
2916 0, /* tp_setattr */
Mark Dickinsone94c6792009-02-02 20:36:42 +00002917 0, /* tp_reserved */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002918 0, /* tp_repr */
2919 0, /* tp_as_number */
2920 0, /* tp_as_sequence */
2921 0, /* tp_as_mapping */
2922 0, /* tp_hash */
2923 0, /* tp_call */
2924 0, /* tp_str */
2925 (getattrofunc)xmlparser_getattro, /* tp_getattro */
2926 0, /* tp_setattro */
2927 0, /* tp_as_buffer */
2928 Py_TPFLAGS_DEFAULT, /* tp_flags */
2929 0, /* tp_doc */
2930 0, /* tp_traverse */
2931 0, /* tp_clear */
2932 0, /* tp_richcompare */
2933 0, /* tp_weaklistoffset */
2934 0, /* tp_iter */
2935 0, /* tp_iternext */
2936 xmlparser_methods, /* tp_methods */
2937 0, /* tp_members */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002938};
2939
2940#endif
2941
2942/* ==================================================================== */
2943/* python module interface */
2944
2945static PyMethodDef _functions[] = {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002946 {"SubElement", (PyCFunction) subelement, METH_VARARGS|METH_KEYWORDS},
2947 {"TreeBuilder", (PyCFunction) treebuilder, METH_VARARGS},
2948#if defined(USE_EXPAT)
2949 {"XMLParser", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002950#endif
2951 {NULL, NULL}
2952};
2953
Martin v. Löwis1a214512008-06-11 05:26:20 +00002954
2955static struct PyModuleDef _elementtreemodule = {
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00002956 PyModuleDef_HEAD_INIT,
2957 "_elementtree",
2958 NULL,
2959 -1,
2960 _functions,
2961 NULL,
2962 NULL,
2963 NULL,
2964 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00002965};
2966
Neal Norwitzf6657e62006-12-28 04:47:50 +00002967PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00002968PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002969{
2970 PyObject* m;
2971 PyObject* g;
2972 char* bootstrap;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002973
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002974 /* Initialize object types */
2975 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00002976 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002977 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00002978 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002979#if defined(USE_EXPAT)
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002980 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00002981 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002982#endif
2983
Martin v. Löwis1a214512008-06-11 05:26:20 +00002984 m = PyModule_Create(&_elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002985 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00002986 return NULL;
2987
2988 /* The code below requires that the module gets already added
2989 to sys.modules. */
2990 PyDict_SetItemString(PyImport_GetModuleDict(),
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00002991 _elementtreemodule.m_name,
2992 m);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002993
2994 /* python glue code */
2995
2996 g = PyDict_New();
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002997 if (!g)
Martin v. Löwis1a214512008-06-11 05:26:20 +00002998 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002999
3000 PyDict_SetItemString(g, "__builtins__", PyEval_GetBuiltins());
3001
3002 bootstrap = (
3003
Florent Xiclunaf4bdf4e2012-02-11 11:28:16 +01003004 "from copy import deepcopy\n"
3005 "from xml.etree import ElementPath\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003006
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003007 "def iter(node, tag=None):\n" /* helper */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003008 " if tag == '*':\n"
3009 " tag = None\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003010 " if tag is None or node.tag == tag:\n"
3011 " yield node\n"
3012 " for node in node:\n"
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003013 " for node in iter(node, tag):\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003014 " yield node\n"
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003015
3016 "def itertext(node):\n" /* helper */
3017 " if node.text:\n"
3018 " yield node.text\n"
3019 " for e in node:\n"
3020 " for s in e.itertext():\n"
3021 " yield s\n"
3022 " if e.tail:\n"
3023 " yield e.tail\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003024
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003025 );
3026
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003027 if (!PyRun_String(bootstrap, Py_file_input, g, NULL))
3028 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003029
3030 elementpath_obj = PyDict_GetItemString(g, "ElementPath");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003031 elementtree_deepcopy_obj = PyDict_GetItemString(g, "deepcopy");
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003032 elementtree_iter_obj = PyDict_GetItemString(g, "iter");
3033 elementtree_itertext_obj = PyDict_GetItemString(g, "itertext");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003034
3035#if defined(USE_PYEXPAT_CAPI)
3036 /* link against pyexpat, if possible */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003037 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
3038 if (expat_capi) {
3039 /* check that it's usable */
3040 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
3041 expat_capi->size < sizeof(struct PyExpat_CAPI) ||
3042 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
3043 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
3044 expat_capi->MICRO_VERSION != XML_MICRO_VERSION)
3045 expat_capi = NULL;
3046 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003047#endif
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003048
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003049 elementtree_parseerror_obj = PyErr_NewException(
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003050 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003051 );
3052 Py_INCREF(elementtree_parseerror_obj);
3053 PyModule_AddObject(m, "ParseError", elementtree_parseerror_obj);
3054
Eli Bendersky092af1f2012-03-04 07:14:03 +02003055 Py_INCREF((PyObject *)&Element_Type);
3056 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
3057
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003058 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003059}