blob: a9da3ec18156d161259ba6780ed88c992ac75f48 [file] [log] [blame]
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001/*
2 * ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003 * $Id: _elementtree.c 3473 2009-01-11 22:53:55Z fredrik $
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
5 * elementtree accelerator
6 *
7 * History:
8 * 1999-06-20 fl created (as part of sgmlop)
9 * 2001-05-29 fl effdom edition
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000010 * 2003-02-27 fl elementtree edition (alpha)
11 * 2004-06-03 fl updates for elementtree 1.2
Florent Xiclunaf15351d2010-03-13 23:24:31 +000012 * 2005-01-05 fl major optimization effort
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000013 * 2005-01-11 fl first public release (cElementTree 0.8)
14 * 2005-01-12 fl split element object into base and extras
15 * 2005-01-13 fl use tagged pointers for tail/text (cElementTree 0.9)
16 * 2005-01-17 fl added treebuilder close method
17 * 2005-01-17 fl fixed crash in getchildren
18 * 2005-01-18 fl removed observer api, added iterparse (cElementTree 0.9.3)
19 * 2005-01-23 fl revised iterparse api; added namespace event support (0.9.8)
20 * 2005-01-26 fl added VERSION module property (cElementTree 1.0)
21 * 2005-01-28 fl added remove method (1.0.1)
22 * 2005-03-01 fl added iselement function; fixed makeelement aliasing (1.0.2)
23 * 2005-03-13 fl export Comment and ProcessingInstruction/PI helpers
24 * 2005-03-26 fl added Comment and PI support to XMLParser
25 * 2005-03-27 fl event optimizations; complain about bogus events
26 * 2005-08-08 fl fixed read error handling in parse
27 * 2005-08-11 fl added runtime test for copy workaround (1.0.3)
28 * 2005-12-13 fl added expat_capi support (for xml.etree) (1.0.4)
29 * 2005-12-16 fl added support for non-standard encodings
Fredrik Lundh44ed4db2006-03-12 21:06:35 +000030 * 2006-03-08 fl fixed a couple of potential null-refs and leaks
31 * 2006-03-12 fl merge in 2.5 ssize_t changes
Florent Xiclunaf15351d2010-03-13 23:24:31 +000032 * 2007-08-25 fl call custom builder's close method from XMLParser
33 * 2007-08-31 fl added iter, extend from ET 1.3
34 * 2007-09-01 fl fixed ParseError exception, setslice source type, etc
35 * 2007-09-03 fl fixed handling of negative insert indexes
36 * 2007-09-04 fl added itertext from ET 1.3
37 * 2007-09-06 fl added position attribute to ParseError exception
38 * 2008-06-06 fl delay error reporting in iterparse (from Hrvoje Niksic)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000039 *
Florent Xiclunaf15351d2010-03-13 23:24:31 +000040 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
41 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000042 *
43 * info@pythonware.com
44 * http://www.pythonware.com
45 */
46
Fredrik Lundh6d52b552005-12-16 22:06:43 +000047/* Licensed to PSF under a Contributor Agreement. */
Florent Xiclunaf15351d2010-03-13 23:24:31 +000048/* See http://www.python.org/psf/license for licensing details. */
Fredrik Lundh6d52b552005-12-16 22:06:43 +000049
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000050#include "Python.h"
51
Thomas Wouters00ee7ba2006-08-21 19:07:27 +000052#define VERSION "1.0.6"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000053
54/* -------------------------------------------------------------------- */
55/* configuration */
56
57/* Leave defined to include the expat-based XMLParser type */
58#define USE_EXPAT
59
Florent Xiclunaf15351d2010-03-13 23:24:31 +000060/* Define to do all expat calls via pyexpat's embedded expat library */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000061/* #define USE_PYEXPAT_CAPI */
62
63/* An element can hold this many children without extra memory
64 allocations. */
65#define STATIC_CHILDREN 4
66
67/* For best performance, chose a value so that 80-90% of all nodes
68 have no more than the given number of children. Set this to zero
69 to minimize the size of the element structure itself (this only
70 helps if you have lots of leaf nodes with attributes). */
71
72/* Also note that pymalloc always allocates blocks in multiples of
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010073 eight bytes. For the current C version of ElementTree, this means
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000074 that the number of children should be an even number, at least on
75 32-bit platforms. */
76
77/* -------------------------------------------------------------------- */
78
79#if 0
80static int memory = 0;
81#define ALLOC(size, comment)\
82do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
83#define RELEASE(size, comment)\
84do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
85#else
86#define ALLOC(size, comment)
87#define RELEASE(size, comment)
88#endif
89
90/* compiler tweaks */
91#if defined(_MSC_VER)
92#define LOCAL(type) static __inline type __fastcall
93#else
94#define LOCAL(type) static type
95#endif
96
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000097/* macros used to store 'join' flags in string object pointers. note
98 that all use of text and tail as object pointers must be wrapped in
99 JOIN_OBJ. see comments in the ElementObject definition for more
100 info. */
101#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
102#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
103#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~1))
104
105/* glue functions (see the init function for details) */
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000106static PyObject* elementtree_parseerror_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000107static PyObject* elementtree_deepcopy_obj;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000108static PyObject* elementtree_iter_obj;
109static PyObject* elementtree_itertext_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000110static PyObject* elementpath_obj;
111
112/* helpers */
113
114LOCAL(PyObject*)
115deepcopy(PyObject* object, PyObject* memo)
116{
117 /* do a deep copy of the given object */
118
119 PyObject* args;
120 PyObject* result;
121
122 if (!elementtree_deepcopy_obj) {
123 PyErr_SetString(
124 PyExc_RuntimeError,
125 "deepcopy helper not found"
126 );
127 return NULL;
128 }
129
130 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000131 if (!args)
132 return NULL;
133
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000134 Py_INCREF(object); PyTuple_SET_ITEM(args, 0, (PyObject*) object);
135 Py_INCREF(memo); PyTuple_SET_ITEM(args, 1, (PyObject*) memo);
136
137 result = PyObject_CallObject(elementtree_deepcopy_obj, args);
138
139 Py_DECREF(args);
140
141 return result;
142}
143
144LOCAL(PyObject*)
145list_join(PyObject* list)
146{
147 /* join list elements (destroying the list in the process) */
148
149 PyObject* joiner;
150 PyObject* function;
151 PyObject* args;
152 PyObject* result;
153
154 switch (PyList_GET_SIZE(list)) {
155 case 0:
156 Py_DECREF(list);
Christian Heimes72b710a2008-05-26 13:28:38 +0000157 return PyBytes_FromString("");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000158 case 1:
159 result = PyList_GET_ITEM(list, 0);
160 Py_INCREF(result);
161 Py_DECREF(list);
162 return result;
163 }
164
165 /* two or more elements: slice out a suitable separator from the
166 first member, and use that to join the entire list */
167
168 joiner = PySequence_GetSlice(PyList_GET_ITEM(list, 0), 0, 0);
169 if (!joiner)
170 return NULL;
171
172 function = PyObject_GetAttrString(joiner, "join");
173 if (!function) {
174 Py_DECREF(joiner);
175 return NULL;
176 }
177
178 args = PyTuple_New(1);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000179 if (!args)
180 return NULL;
181
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000182 PyTuple_SET_ITEM(args, 0, list);
183
184 result = PyObject_CallObject(function, args);
185
186 Py_DECREF(args); /* also removes list */
187 Py_DECREF(function);
188 Py_DECREF(joiner);
189
190 return result;
191}
192
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000193/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200194/* the Element type */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000195
196typedef struct {
197
198 /* attributes (a dictionary object), or None if no attributes */
199 PyObject* attrib;
200
201 /* child elements */
202 int length; /* actual number of items */
203 int allocated; /* allocated items */
204
205 /* this either points to _children or to a malloced buffer */
206 PyObject* *children;
207
208 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100209
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000210} ElementObjectExtra;
211
212typedef struct {
213 PyObject_HEAD
214
215 /* element tag (a string). */
216 PyObject* tag;
217
218 /* text before first child. note that this is a tagged pointer;
219 use JOIN_OBJ to get the object pointer. the join flag is used
220 to distinguish lists created by the tree builder from lists
221 assigned to the attribute by application code; the former
222 should be joined before being returned to the user, the latter
223 should be left intact. */
224 PyObject* text;
225
226 /* text after this element, in parent. note that this is a tagged
227 pointer; use JOIN_OBJ to get the object pointer. */
228 PyObject* tail;
229
230 ElementObjectExtra* extra;
231
232} ElementObject;
233
Neal Norwitz227b5332006-03-22 09:28:35 +0000234static PyTypeObject Element_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000235
Christian Heimes90aa7642007-12-19 02:45:37 +0000236#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000237
238/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200239/* Element constructors and destructor */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000240
241LOCAL(int)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200242create_extra(ElementObject* self, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000243{
244 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
245 if (!self->extra)
246 return -1;
247
248 if (!attrib)
249 attrib = Py_None;
250
251 Py_INCREF(attrib);
252 self->extra->attrib = attrib;
253
254 self->extra->length = 0;
255 self->extra->allocated = STATIC_CHILDREN;
256 self->extra->children = self->extra->_children;
257
258 return 0;
259}
260
261LOCAL(void)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200262dealloc_extra(ElementObject* self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000263{
264 int i;
265
266 Py_DECREF(self->extra->attrib);
267
268 for (i = 0; i < self->extra->length; i++)
269 Py_DECREF(self->extra->children[i]);
270
271 if (self->extra->children != self->extra->_children)
272 PyObject_Free(self->extra->children);
273
274 PyObject_Free(self->extra);
275}
276
Eli Bendersky092af1f2012-03-04 07:14:03 +0200277/* Convenience internal function to create new Element objects with the given
278 * tag and attributes.
279*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000280LOCAL(PyObject*)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200281create_new_element(PyObject* tag, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000282{
283 ElementObject* self;
284
Eli Bendersky0192ba32012-03-30 16:38:33 +0300285 self = PyObject_GC_New(ElementObject, &Element_Type);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000286 if (self == NULL)
287 return NULL;
288
289 /* use None for empty dictionaries */
290 if (PyDict_CheckExact(attrib) && !PyDict_Size(attrib))
291 attrib = Py_None;
292
293 self->extra = NULL;
294
295 if (attrib != Py_None) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200296 if (create_extra(self, attrib) < 0) {
Thomas Wouters477c8d52006-05-27 19:21:47 +0000297 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000298 return NULL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000299 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000300 }
301
302 Py_INCREF(tag);
303 self->tag = tag;
304
305 Py_INCREF(Py_None);
306 self->text = Py_None;
307
308 Py_INCREF(Py_None);
309 self->tail = Py_None;
310
311 ALLOC(sizeof(ElementObject), "create element");
Eli Bendersky0192ba32012-03-30 16:38:33 +0300312 PyObject_GC_Track(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000313 return (PyObject*) self;
314}
315
Eli Bendersky092af1f2012-03-04 07:14:03 +0200316static PyObject *
317element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
318{
319 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
320 if (e != NULL) {
321 Py_INCREF(Py_None);
322 e->tag = Py_None;
323
324 Py_INCREF(Py_None);
325 e->text = Py_None;
326
327 Py_INCREF(Py_None);
328 e->tail = Py_None;
329
330 e->extra = NULL;
331 }
332 return (PyObject *)e;
333}
334
335static int
336element_init(PyObject *self, PyObject *args, PyObject *kwds)
337{
338 PyObject *tag;
339 PyObject *tmp;
340 PyObject *attrib = NULL;
341 ElementObject *self_elem;
342
343 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
344 return -1;
345
346 if (attrib || kwds) {
347 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
348 if (!attrib)
349 return -1;
350 if (kwds)
351 PyDict_Update(attrib, kwds);
352 } else {
353 Py_INCREF(Py_None);
354 attrib = Py_None;
355 }
356
357 self_elem = (ElementObject *)self;
358
359 /* Use None for empty dictionaries */
360 if (PyDict_CheckExact(attrib) && PyDict_Size(attrib) == 0) {
361 Py_INCREF(Py_None);
362 attrib = Py_None;
363 }
364
365 if (attrib != Py_None) {
366 if (create_extra(self_elem, attrib) < 0) {
367 PyObject_Del(self_elem);
368 return -1;
369 }
370 }
371
372 /* If create_extra needed attrib, it took a reference to it, so we can
373 * release ours anyway.
374 */
375 Py_DECREF(attrib);
376
377 /* Replace the objects already pointed to by tag, text and tail. */
378 tmp = self_elem->tag;
379 self_elem->tag = tag;
380 Py_INCREF(tag);
381 Py_DECREF(tmp);
382
383 tmp = self_elem->text;
384 self_elem->text = Py_None;
385 Py_INCREF(Py_None);
386 Py_DECREF(JOIN_OBJ(tmp));
387
388 tmp = self_elem->tail;
389 self_elem->tail = Py_None;
390 Py_INCREF(Py_None);
391 Py_DECREF(JOIN_OBJ(tmp));
392
393 return 0;
394}
395
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000396LOCAL(int)
397element_resize(ElementObject* self, int extra)
398{
399 int size;
400 PyObject* *children;
401
402 /* make sure self->children can hold the given number of extra
403 elements. set an exception and return -1 if allocation failed */
404
405 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200406 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000407
408 size = self->extra->length + extra;
409
410 if (size > self->extra->allocated) {
411 /* use Python 2.4's list growth strategy */
412 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000413 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100414 * which needs at least 4 bytes.
415 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000416 * be safe.
417 */
418 size = size ? size : 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000419 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000420 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100421 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000422 * false alarm always assume at least one child to be safe.
423 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000424 children = PyObject_Realloc(self->extra->children,
425 size * sizeof(PyObject*));
426 if (!children)
427 goto nomemory;
428 } else {
429 children = PyObject_Malloc(size * sizeof(PyObject*));
430 if (!children)
431 goto nomemory;
432 /* copy existing children from static area to malloc buffer */
433 memcpy(children, self->extra->children,
434 self->extra->length * sizeof(PyObject*));
435 }
436 self->extra->children = children;
437 self->extra->allocated = size;
438 }
439
440 return 0;
441
442 nomemory:
443 PyErr_NoMemory();
444 return -1;
445}
446
447LOCAL(int)
448element_add_subelement(ElementObject* self, PyObject* element)
449{
450 /* add a child element to a parent */
451
452 if (element_resize(self, 1) < 0)
453 return -1;
454
455 Py_INCREF(element);
456 self->extra->children[self->extra->length] = element;
457
458 self->extra->length++;
459
460 return 0;
461}
462
463LOCAL(PyObject*)
464element_get_attrib(ElementObject* self)
465{
466 /* return borrowed reference to attrib dictionary */
467 /* note: this function assumes that the extra section exists */
468
469 PyObject* res = self->extra->attrib;
470
471 if (res == Py_None) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000472 Py_DECREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000473 /* create missing dictionary */
474 res = PyDict_New();
475 if (!res)
476 return NULL;
477 self->extra->attrib = res;
478 }
479
480 return res;
481}
482
483LOCAL(PyObject*)
484element_get_text(ElementObject* self)
485{
486 /* return borrowed reference to text attribute */
487
488 PyObject* res = self->text;
489
490 if (JOIN_GET(res)) {
491 res = JOIN_OBJ(res);
492 if (PyList_CheckExact(res)) {
493 res = list_join(res);
494 if (!res)
495 return NULL;
496 self->text = res;
497 }
498 }
499
500 return res;
501}
502
503LOCAL(PyObject*)
504element_get_tail(ElementObject* self)
505{
506 /* return borrowed reference to text attribute */
507
508 PyObject* res = self->tail;
509
510 if (JOIN_GET(res)) {
511 res = JOIN_OBJ(res);
512 if (PyList_CheckExact(res)) {
513 res = list_join(res);
514 if (!res)
515 return NULL;
516 self->tail = res;
517 }
518 }
519
520 return res;
521}
522
523static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000524subelement(PyObject* self, PyObject* args, PyObject* kw)
525{
526 PyObject* elem;
527
528 ElementObject* parent;
529 PyObject* tag;
530 PyObject* attrib = NULL;
531 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
532 &Element_Type, &parent, &tag,
533 &PyDict_Type, &attrib))
534 return NULL;
535
536 if (attrib || kw) {
537 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
538 if (!attrib)
539 return NULL;
540 if (kw)
541 PyDict_Update(attrib, kw);
542 } else {
543 Py_INCREF(Py_None);
544 attrib = Py_None;
545 }
546
Eli Bendersky092af1f2012-03-04 07:14:03 +0200547 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000548
549 Py_DECREF(attrib);
550
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000551 if (element_add_subelement(parent, elem) < 0) {
552 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000553 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000554 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000555
556 return elem;
557}
558
Eli Bendersky0192ba32012-03-30 16:38:33 +0300559static int
560element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
561{
562 Py_VISIT(self->tag);
563 Py_VISIT(JOIN_OBJ(self->text));
564 Py_VISIT(JOIN_OBJ(self->tail));
565
566 if (self->extra) {
567 int i;
568 Py_VISIT(self->extra->attrib);
569
570 for (i = 0; i < self->extra->length; ++i)
571 Py_VISIT(self->extra->children[i]);
572 }
573 return 0;
574}
575
576static int
577element_gc_clear(ElementObject *self)
578{
579 PyObject *text = JOIN_OBJ(self->text);
580 PyObject *tail = JOIN_OBJ(self->tail);
581 Py_CLEAR(self->tag);
582 Py_CLEAR(text);
583 Py_CLEAR(tail);
584
585 /* After dropping all references from extra, it's no longer valid anyway,
586 ** so fully deallocate it (see also element_clearmethod)
587 */
588 if (self->extra) {
589 dealloc_extra(self);
590 self->extra = NULL;
591 }
592 return 0;
593}
594
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000595static void
596element_dealloc(ElementObject* self)
597{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300598 PyObject_GC_UnTrack(self);
599 /* element_gc_clear clears all references and deallocates extra
600 */
601 element_gc_clear(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000602
603 RELEASE(sizeof(ElementObject), "destroy element");
Eli Bendersky092af1f2012-03-04 07:14:03 +0200604 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000605}
606
607/* -------------------------------------------------------------------- */
608/* methods (in alphabetical order) */
609
610static PyObject*
611element_append(ElementObject* self, PyObject* args)
612{
613 PyObject* element;
614 if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element))
615 return NULL;
616
617 if (element_add_subelement(self, element) < 0)
618 return NULL;
619
620 Py_RETURN_NONE;
621}
622
623static PyObject*
Eli Bendersky0192ba32012-03-30 16:38:33 +0300624element_clearmethod(ElementObject* self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000625{
626 if (!PyArg_ParseTuple(args, ":clear"))
627 return NULL;
628
629 if (self->extra) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200630 dealloc_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000631 self->extra = NULL;
632 }
633
634 Py_INCREF(Py_None);
635 Py_DECREF(JOIN_OBJ(self->text));
636 self->text = Py_None;
637
638 Py_INCREF(Py_None);
639 Py_DECREF(JOIN_OBJ(self->tail));
640 self->tail = Py_None;
641
642 Py_RETURN_NONE;
643}
644
645static PyObject*
646element_copy(ElementObject* self, PyObject* args)
647{
648 int i;
649 ElementObject* element;
650
651 if (!PyArg_ParseTuple(args, ":__copy__"))
652 return NULL;
653
Eli Bendersky092af1f2012-03-04 07:14:03 +0200654 element = (ElementObject*) create_new_element(
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000655 self->tag, (self->extra) ? self->extra->attrib : Py_None
656 );
657 if (!element)
658 return NULL;
659
660 Py_DECREF(JOIN_OBJ(element->text));
661 element->text = self->text;
662 Py_INCREF(JOIN_OBJ(element->text));
663
664 Py_DECREF(JOIN_OBJ(element->tail));
665 element->tail = self->tail;
666 Py_INCREF(JOIN_OBJ(element->tail));
667
668 if (self->extra) {
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100669
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000670 if (element_resize(element, self->extra->length) < 0) {
671 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000672 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000673 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000674
675 for (i = 0; i < self->extra->length; i++) {
676 Py_INCREF(self->extra->children[i]);
677 element->extra->children[i] = self->extra->children[i];
678 }
679
680 element->extra->length = self->extra->length;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100681
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000682 }
683
684 return (PyObject*) element;
685}
686
687static PyObject*
688element_deepcopy(ElementObject* self, PyObject* args)
689{
690 int i;
691 ElementObject* element;
692 PyObject* tag;
693 PyObject* attrib;
694 PyObject* text;
695 PyObject* tail;
696 PyObject* id;
697
698 PyObject* memo;
699 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
700 return NULL;
701
702 tag = deepcopy(self->tag, memo);
703 if (!tag)
704 return NULL;
705
706 if (self->extra) {
707 attrib = deepcopy(self->extra->attrib, memo);
708 if (!attrib) {
709 Py_DECREF(tag);
710 return NULL;
711 }
712 } else {
713 Py_INCREF(Py_None);
714 attrib = Py_None;
715 }
716
Eli Bendersky092af1f2012-03-04 07:14:03 +0200717 element = (ElementObject*) create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000718
719 Py_DECREF(tag);
720 Py_DECREF(attrib);
721
722 if (!element)
723 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100724
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000725 text = deepcopy(JOIN_OBJ(self->text), memo);
726 if (!text)
727 goto error;
728 Py_DECREF(element->text);
729 element->text = JOIN_SET(text, JOIN_GET(self->text));
730
731 tail = deepcopy(JOIN_OBJ(self->tail), memo);
732 if (!tail)
733 goto error;
734 Py_DECREF(element->tail);
735 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
736
737 if (self->extra) {
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100738
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000739 if (element_resize(element, self->extra->length) < 0)
740 goto error;
741
742 for (i = 0; i < self->extra->length; i++) {
743 PyObject* child = deepcopy(self->extra->children[i], memo);
744 if (!child) {
745 element->extra->length = i;
746 goto error;
747 }
748 element->extra->children[i] = child;
749 }
750
751 element->extra->length = self->extra->length;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100752
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000753 }
754
755 /* add object to memo dictionary (so deepcopy won't visit it again) */
Christian Heimes217cfd12007-12-02 14:31:20 +0000756 id = PyLong_FromLong((Py_uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000757 if (!id)
758 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000759
760 i = PyDict_SetItem(memo, id, (PyObject*) element);
761
762 Py_DECREF(id);
763
764 if (i < 0)
765 goto error;
766
767 return (PyObject*) element;
768
769 error:
770 Py_DECREF(element);
771 return NULL;
772}
773
774LOCAL(int)
775checkpath(PyObject* tag)
776{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000777 Py_ssize_t i;
778 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000779
780 /* check if a tag contains an xpath character */
781
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000782#define PATHCHAR(ch) \
783 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000784
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000785 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200786 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
787 void *data = PyUnicode_DATA(tag);
788 unsigned int kind = PyUnicode_KIND(tag);
789 for (i = 0; i < len; i++) {
790 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
791 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000792 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200793 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000794 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200795 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000796 return 1;
797 }
798 return 0;
799 }
Christian Heimes72b710a2008-05-26 13:28:38 +0000800 if (PyBytes_Check(tag)) {
801 char *p = PyBytes_AS_STRING(tag);
802 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000803 if (p[i] == '{')
804 check = 0;
805 else if (p[i] == '}')
806 check = 1;
807 else if (check && PATHCHAR(p[i]))
808 return 1;
809 }
810 return 0;
811 }
812
813 return 1; /* unknown type; might be path expression */
814}
815
816static PyObject*
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000817element_extend(ElementObject* self, PyObject* args)
818{
819 PyObject* seq;
820 Py_ssize_t i, seqlen = 0;
821
822 PyObject* seq_in;
823 if (!PyArg_ParseTuple(args, "O:extend", &seq_in))
824 return NULL;
825
826 seq = PySequence_Fast(seq_in, "");
827 if (!seq) {
828 PyErr_Format(
829 PyExc_TypeError,
830 "expected sequence, not \"%.200s\"", Py_TYPE(seq_in)->tp_name
831 );
832 return NULL;
833 }
834
835 seqlen = PySequence_Size(seq);
836 for (i = 0; i < seqlen; i++) {
837 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
Eli Bendersky396e8fc2012-03-23 14:24:20 +0200838 if (!PyObject_IsInstance(element, (PyObject *)&Element_Type)) {
839 Py_DECREF(seq);
840 PyErr_Format(
841 PyExc_TypeError,
842 "expected an Element, not \"%.200s\"",
843 Py_TYPE(element)->tp_name);
844 return NULL;
845 }
846
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000847 if (element_add_subelement(self, element) < 0) {
848 Py_DECREF(seq);
849 return NULL;
850 }
851 }
852
853 Py_DECREF(seq);
854
855 Py_RETURN_NONE;
856}
857
858static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000859element_find(ElementObject* self, PyObject* args)
860{
861 int i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000862 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000863 PyObject* namespaces = Py_None;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200864
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000865 if (!PyArg_ParseTuple(args, "O|O:find", &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000866 return NULL;
867
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200868 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200869 _Py_IDENTIFIER(find);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200870 return _PyObject_CallMethodId(
871 elementpath_obj, &PyId_find, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000872 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200873 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000874
875 if (!self->extra)
876 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100877
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000878 for (i = 0; i < self->extra->length; i++) {
879 PyObject* item = self->extra->children[i];
880 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +0000881 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000882 Py_INCREF(item);
883 return item;
884 }
885 }
886
887 Py_RETURN_NONE;
888}
889
890static PyObject*
891element_findtext(ElementObject* self, PyObject* args)
892{
893 int i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000894 PyObject* tag;
895 PyObject* default_value = Py_None;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000896 PyObject* namespaces = Py_None;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200897 _Py_IDENTIFIER(findtext);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200898
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000899 if (!PyArg_ParseTuple(args, "O|OO:findtext", &tag, &default_value, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000900 return NULL;
901
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000902 if (checkpath(tag) || namespaces != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200903 return _PyObject_CallMethodId(
904 elementpath_obj, &PyId_findtext, "OOOO", self, tag, default_value, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000905 );
906
907 if (!self->extra) {
908 Py_INCREF(default_value);
909 return default_value;
910 }
911
912 for (i = 0; i < self->extra->length; i++) {
913 ElementObject* item = (ElementObject*) self->extra->children[i];
Mark Dickinson211c6252009-02-01 10:28:51 +0000914 if (Element_CheckExact(item) && (PyObject_RichCompareBool(item->tag, tag, Py_EQ) == 1)) {
915
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000916 PyObject* text = element_get_text(item);
917 if (text == Py_None)
Christian Heimes72b710a2008-05-26 13:28:38 +0000918 return PyBytes_FromString("");
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000919 Py_XINCREF(text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000920 return text;
921 }
922 }
923
924 Py_INCREF(default_value);
925 return default_value;
926}
927
928static PyObject*
929element_findall(ElementObject* self, PyObject* args)
930{
931 int i;
932 PyObject* out;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000933 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000934 PyObject* namespaces = Py_None;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200935
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000936 if (!PyArg_ParseTuple(args, "O|O:findall", &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000937 return NULL;
938
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200939 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200940 _Py_IDENTIFIER(findall);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200941 return _PyObject_CallMethodId(
942 elementpath_obj, &PyId_findall, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000943 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200944 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000945
946 out = PyList_New(0);
947 if (!out)
948 return NULL;
949
950 if (!self->extra)
951 return out;
952
953 for (i = 0; i < self->extra->length; i++) {
954 PyObject* item = self->extra->children[i];
955 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +0000956 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000957 if (PyList_Append(out, item) < 0) {
958 Py_DECREF(out);
959 return NULL;
960 }
961 }
962 }
963
964 return out;
965}
966
967static PyObject*
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000968element_iterfind(ElementObject* self, PyObject* args)
969{
970 PyObject* tag;
971 PyObject* namespaces = Py_None;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200972 _Py_IDENTIFIER(iterfind);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200973
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000974 if (!PyArg_ParseTuple(args, "O|O:iterfind", &tag, &namespaces))
975 return NULL;
976
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200977 return _PyObject_CallMethodId(
978 elementpath_obj, &PyId_iterfind, "OOO", self, tag, namespaces
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000979 );
980}
981
982static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000983element_get(ElementObject* self, PyObject* args)
984{
985 PyObject* value;
986
987 PyObject* key;
988 PyObject* default_value = Py_None;
989 if (!PyArg_ParseTuple(args, "O|O:get", &key, &default_value))
990 return NULL;
991
992 if (!self->extra || self->extra->attrib == Py_None)
993 value = default_value;
994 else {
995 value = PyDict_GetItem(self->extra->attrib, key);
996 if (!value)
997 value = default_value;
998 }
999
1000 Py_INCREF(value);
1001 return value;
1002}
1003
1004static PyObject*
1005element_getchildren(ElementObject* self, PyObject* args)
1006{
1007 int i;
1008 PyObject* list;
1009
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001010 /* FIXME: report as deprecated? */
1011
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001012 if (!PyArg_ParseTuple(args, ":getchildren"))
1013 return NULL;
1014
1015 if (!self->extra)
1016 return PyList_New(0);
1017
1018 list = PyList_New(self->extra->length);
1019 if (!list)
1020 return NULL;
1021
1022 for (i = 0; i < self->extra->length; i++) {
1023 PyObject* item = self->extra->children[i];
1024 Py_INCREF(item);
1025 PyList_SET_ITEM(list, i, item);
1026 }
1027
1028 return list;
1029}
1030
1031static PyObject*
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001032element_iter(ElementObject* self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001033{
1034 PyObject* result;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001035
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001036 PyObject* tag = Py_None;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001037 if (!PyArg_ParseTuple(args, "|O:iter", &tag))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001038 return NULL;
1039
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001040 if (!elementtree_iter_obj) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001041 PyErr_SetString(
1042 PyExc_RuntimeError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001043 "iter helper not found"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001044 );
1045 return NULL;
1046 }
1047
1048 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001049 if (!args)
1050 return NULL;
Neal Norwitz02876df2006-02-07 06:58:52 +00001051
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001052 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
1053 Py_INCREF(tag); PyTuple_SET_ITEM(args, 1, (PyObject*) tag);
1054
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001055 result = PyObject_CallObject(elementtree_iter_obj, args);
1056
1057 Py_DECREF(args);
1058
1059 return result;
1060}
1061
1062
1063static PyObject*
1064element_itertext(ElementObject* self, PyObject* args)
1065{
1066 PyObject* result;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001067
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001068 if (!PyArg_ParseTuple(args, ":itertext"))
1069 return NULL;
1070
1071 if (!elementtree_itertext_obj) {
1072 PyErr_SetString(
1073 PyExc_RuntimeError,
1074 "itertext helper not found"
1075 );
1076 return NULL;
1077 }
1078
1079 args = PyTuple_New(1);
1080 if (!args)
1081 return NULL;
1082
1083 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
1084
1085 result = PyObject_CallObject(elementtree_itertext_obj, args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001086
1087 Py_DECREF(args);
1088
1089 return result;
1090}
1091
1092static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001093element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001094{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001095 ElementObject* self = (ElementObject*) self_;
1096
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001097 if (!self->extra || index < 0 || index >= self->extra->length) {
1098 PyErr_SetString(
1099 PyExc_IndexError,
1100 "child index out of range"
1101 );
1102 return NULL;
1103 }
1104
1105 Py_INCREF(self->extra->children[index]);
1106 return self->extra->children[index];
1107}
1108
1109static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001110element_insert(ElementObject* self, PyObject* args)
1111{
1112 int i;
1113
1114 int index;
1115 PyObject* element;
1116 if (!PyArg_ParseTuple(args, "iO!:insert", &index,
1117 &Element_Type, &element))
1118 return NULL;
1119
1120 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001121 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001122
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001123 if (index < 0) {
1124 index += self->extra->length;
1125 if (index < 0)
1126 index = 0;
1127 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001128 if (index > self->extra->length)
1129 index = self->extra->length;
1130
1131 if (element_resize(self, 1) < 0)
1132 return NULL;
1133
1134 for (i = self->extra->length; i > index; i--)
1135 self->extra->children[i] = self->extra->children[i-1];
1136
1137 Py_INCREF(element);
1138 self->extra->children[index] = element;
1139
1140 self->extra->length++;
1141
1142 Py_RETURN_NONE;
1143}
1144
1145static PyObject*
1146element_items(ElementObject* self, PyObject* args)
1147{
1148 if (!PyArg_ParseTuple(args, ":items"))
1149 return NULL;
1150
1151 if (!self->extra || self->extra->attrib == Py_None)
1152 return PyList_New(0);
1153
1154 return PyDict_Items(self->extra->attrib);
1155}
1156
1157static PyObject*
1158element_keys(ElementObject* self, PyObject* args)
1159{
1160 if (!PyArg_ParseTuple(args, ":keys"))
1161 return NULL;
1162
1163 if (!self->extra || self->extra->attrib == Py_None)
1164 return PyList_New(0);
1165
1166 return PyDict_Keys(self->extra->attrib);
1167}
1168
Martin v. Löwis18e16552006-02-15 17:27:45 +00001169static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001170element_length(ElementObject* self)
1171{
1172 if (!self->extra)
1173 return 0;
1174
1175 return self->extra->length;
1176}
1177
1178static PyObject*
1179element_makeelement(PyObject* self, PyObject* args, PyObject* kw)
1180{
1181 PyObject* elem;
1182
1183 PyObject* tag;
1184 PyObject* attrib;
1185 if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib))
1186 return NULL;
1187
1188 attrib = PyDict_Copy(attrib);
1189 if (!attrib)
1190 return NULL;
1191
Eli Bendersky092af1f2012-03-04 07:14:03 +02001192 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001193
1194 Py_DECREF(attrib);
1195
1196 return elem;
1197}
1198
1199static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001200element_remove(ElementObject* self, PyObject* args)
1201{
1202 int i;
1203
1204 PyObject* element;
1205 if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element))
1206 return NULL;
1207
1208 if (!self->extra) {
1209 /* element has no children, so raise exception */
1210 PyErr_SetString(
1211 PyExc_ValueError,
1212 "list.remove(x): x not in list"
1213 );
1214 return NULL;
1215 }
1216
1217 for (i = 0; i < self->extra->length; i++) {
1218 if (self->extra->children[i] == element)
1219 break;
Mark Dickinson211c6252009-02-01 10:28:51 +00001220 if (PyObject_RichCompareBool(self->extra->children[i], element, Py_EQ) == 1)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001221 break;
1222 }
1223
1224 if (i == self->extra->length) {
1225 /* element is not in children, so raise exception */
1226 PyErr_SetString(
1227 PyExc_ValueError,
1228 "list.remove(x): x not in list"
1229 );
1230 return NULL;
1231 }
1232
1233 Py_DECREF(self->extra->children[i]);
1234
1235 self->extra->length--;
1236
1237 for (; i < self->extra->length; i++)
1238 self->extra->children[i] = self->extra->children[i+1];
1239
1240 Py_RETURN_NONE;
1241}
1242
1243static PyObject*
1244element_repr(ElementObject* self)
1245{
Eli Bendersky092af1f2012-03-04 07:14:03 +02001246 if (self->tag)
1247 return PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1248 else
1249 return PyUnicode_FromFormat("<Element at %p>", self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001250}
1251
1252static PyObject*
1253element_set(ElementObject* self, PyObject* args)
1254{
1255 PyObject* attrib;
1256
1257 PyObject* key;
1258 PyObject* value;
1259 if (!PyArg_ParseTuple(args, "OO:set", &key, &value))
1260 return NULL;
1261
1262 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001263 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001264
1265 attrib = element_get_attrib(self);
1266 if (!attrib)
1267 return NULL;
1268
1269 if (PyDict_SetItem(attrib, key, value) < 0)
1270 return NULL;
1271
1272 Py_RETURN_NONE;
1273}
1274
1275static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001276element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001277{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001278 ElementObject* self = (ElementObject*) self_;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001279 int i;
1280 PyObject* old;
1281
1282 if (!self->extra || index < 0 || index >= self->extra->length) {
1283 PyErr_SetString(
1284 PyExc_IndexError,
1285 "child assignment index out of range");
1286 return -1;
1287 }
1288
1289 old = self->extra->children[index];
1290
1291 if (item) {
1292 Py_INCREF(item);
1293 self->extra->children[index] = item;
1294 } else {
1295 self->extra->length--;
1296 for (i = index; i < self->extra->length; i++)
1297 self->extra->children[i] = self->extra->children[i+1];
1298 }
1299
1300 Py_DECREF(old);
1301
1302 return 0;
1303}
1304
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001305static PyObject*
1306element_subscr(PyObject* self_, PyObject* item)
1307{
1308 ElementObject* self = (ElementObject*) self_;
1309
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001310 if (PyIndex_Check(item)) {
1311 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001312
1313 if (i == -1 && PyErr_Occurred()) {
1314 return NULL;
1315 }
1316 if (i < 0 && self->extra)
1317 i += self->extra->length;
1318 return element_getitem(self_, i);
1319 }
1320 else if (PySlice_Check(item)) {
1321 Py_ssize_t start, stop, step, slicelen, cur, i;
1322 PyObject* list;
1323
1324 if (!self->extra)
1325 return PyList_New(0);
1326
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001327 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001328 self->extra->length,
1329 &start, &stop, &step, &slicelen) < 0) {
1330 return NULL;
1331 }
1332
1333 if (slicelen <= 0)
1334 return PyList_New(0);
1335 else {
1336 list = PyList_New(slicelen);
1337 if (!list)
1338 return NULL;
1339
1340 for (cur = start, i = 0; i < slicelen;
1341 cur += step, i++) {
1342 PyObject* item = self->extra->children[cur];
1343 Py_INCREF(item);
1344 PyList_SET_ITEM(list, i, item);
1345 }
1346
1347 return list;
1348 }
1349 }
1350 else {
1351 PyErr_SetString(PyExc_TypeError,
1352 "element indices must be integers");
1353 return NULL;
1354 }
1355}
1356
1357static int
1358element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1359{
1360 ElementObject* self = (ElementObject*) self_;
1361
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001362 if (PyIndex_Check(item)) {
1363 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001364
1365 if (i == -1 && PyErr_Occurred()) {
1366 return -1;
1367 }
1368 if (i < 0 && self->extra)
1369 i += self->extra->length;
1370 return element_setitem(self_, i, value);
1371 }
1372 else if (PySlice_Check(item)) {
1373 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1374
1375 PyObject* recycle = NULL;
1376 PyObject* seq = NULL;
1377
1378 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001379 create_extra(self, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001380
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001381 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001382 self->extra->length,
1383 &start, &stop, &step, &slicelen) < 0) {
1384 return -1;
1385 }
1386
Eli Bendersky865756a2012-03-09 13:38:15 +02001387 if (value == NULL) {
1388 /* Delete slice */
1389 size_t cur;
1390 Py_ssize_t i;
1391
1392 if (slicelen <= 0)
1393 return 0;
1394
1395 /* Since we're deleting, the direction of the range doesn't matter,
1396 * so for simplicity make it always ascending.
1397 */
1398 if (step < 0) {
1399 stop = start + 1;
1400 start = stop + step * (slicelen - 1) - 1;
1401 step = -step;
1402 }
1403
1404 assert((size_t)slicelen <= PY_SIZE_MAX / sizeof(PyObject *));
1405
1406 /* recycle is a list that will contain all the children
1407 * scheduled for removal.
1408 */
1409 if (!(recycle = PyList_New(slicelen))) {
1410 PyErr_NoMemory();
1411 return -1;
1412 }
1413
1414 /* This loop walks over all the children that have to be deleted,
1415 * with cur pointing at them. num_moved is the amount of children
1416 * until the next deleted child that have to be "shifted down" to
1417 * occupy the deleted's places.
1418 * Note that in the ith iteration, shifting is done i+i places down
1419 * because i children were already removed.
1420 */
1421 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1422 /* Compute how many children have to be moved, clipping at the
1423 * list end.
1424 */
1425 Py_ssize_t num_moved = step - 1;
1426 if (cur + step >= (size_t)self->extra->length) {
1427 num_moved = self->extra->length - cur - 1;
1428 }
1429
1430 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1431
1432 memmove(
1433 self->extra->children + cur - i,
1434 self->extra->children + cur + 1,
1435 num_moved * sizeof(PyObject *));
1436 }
1437
1438 /* Leftover "tail" after the last removed child */
1439 cur = start + (size_t)slicelen * step;
1440 if (cur < (size_t)self->extra->length) {
1441 memmove(
1442 self->extra->children + cur - slicelen,
1443 self->extra->children + cur,
1444 (self->extra->length - cur) * sizeof(PyObject *));
1445 }
1446
1447 self->extra->length -= slicelen;
1448
1449 /* Discard the recycle list with all the deleted sub-elements */
1450 Py_XDECREF(recycle);
1451 return 0;
1452 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001453 else {
Eli Bendersky865756a2012-03-09 13:38:15 +02001454 /* A new slice is actually being assigned */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001455 seq = PySequence_Fast(value, "");
1456 if (!seq) {
1457 PyErr_Format(
1458 PyExc_TypeError,
1459 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1460 );
1461 return -1;
1462 }
1463 newlen = PySequence_Size(seq);
1464 }
1465
1466 if (step != 1 && newlen != slicelen)
1467 {
1468 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001469 "attempt to assign sequence of size %zd "
1470 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001471 newlen, slicelen
1472 );
1473 return -1;
1474 }
1475
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001476 /* Resize before creating the recycle bin, to prevent refleaks. */
1477 if (newlen > slicelen) {
1478 if (element_resize(self, newlen - slicelen) < 0) {
1479 if (seq) {
1480 Py_DECREF(seq);
1481 }
1482 return -1;
1483 }
1484 }
1485
1486 if (slicelen > 0) {
1487 /* to avoid recursive calls to this method (via decref), move
1488 old items to the recycle bin here, and get rid of them when
1489 we're done modifying the element */
1490 recycle = PyList_New(slicelen);
1491 if (!recycle) {
1492 if (seq) {
1493 Py_DECREF(seq);
1494 }
1495 return -1;
1496 }
1497 for (cur = start, i = 0; i < slicelen;
1498 cur += step, i++)
1499 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1500 }
1501
1502 if (newlen < slicelen) {
1503 /* delete slice */
1504 for (i = stop; i < self->extra->length; i++)
1505 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1506 } else if (newlen > slicelen) {
1507 /* insert slice */
1508 for (i = self->extra->length-1; i >= stop; i--)
1509 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1510 }
1511
1512 /* replace the slice */
1513 for (cur = start, i = 0; i < newlen;
1514 cur += step, i++) {
1515 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1516 Py_INCREF(element);
1517 self->extra->children[cur] = element;
1518 }
1519
1520 self->extra->length += newlen - slicelen;
1521
1522 if (seq) {
1523 Py_DECREF(seq);
1524 }
1525
1526 /* discard the recycle bin, and everything in it */
1527 Py_XDECREF(recycle);
1528
1529 return 0;
1530 }
1531 else {
1532 PyErr_SetString(PyExc_TypeError,
1533 "element indices must be integers");
1534 return -1;
1535 }
1536}
1537
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001538static PyMethodDef element_methods[] = {
1539
Eli Bendersky0192ba32012-03-30 16:38:33 +03001540 {"clear", (PyCFunction) element_clearmethod, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001541
1542 {"get", (PyCFunction) element_get, METH_VARARGS},
1543 {"set", (PyCFunction) element_set, METH_VARARGS},
1544
1545 {"find", (PyCFunction) element_find, METH_VARARGS},
1546 {"findtext", (PyCFunction) element_findtext, METH_VARARGS},
1547 {"findall", (PyCFunction) element_findall, METH_VARARGS},
1548
1549 {"append", (PyCFunction) element_append, METH_VARARGS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001550 {"extend", (PyCFunction) element_extend, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001551 {"insert", (PyCFunction) element_insert, METH_VARARGS},
1552 {"remove", (PyCFunction) element_remove, METH_VARARGS},
1553
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001554 {"iter", (PyCFunction) element_iter, METH_VARARGS},
1555 {"itertext", (PyCFunction) element_itertext, METH_VARARGS},
1556 {"iterfind", (PyCFunction) element_iterfind, METH_VARARGS},
1557
1558 {"getiterator", (PyCFunction) element_iter, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001559 {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS},
1560
1561 {"items", (PyCFunction) element_items, METH_VARARGS},
1562 {"keys", (PyCFunction) element_keys, METH_VARARGS},
1563
1564 {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS},
1565
1566 {"__copy__", (PyCFunction) element_copy, METH_VARARGS},
1567 {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS},
1568
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001569 {NULL, NULL}
1570};
1571
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001572static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001573element_getattro(ElementObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001574{
1575 PyObject* res;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001576 char *name = "";
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001577
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001578 if (PyUnicode_Check(nameobj))
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001579 name = _PyUnicode_AsString(nameobj);
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001580
Alexander Belopolskye239d232010-12-08 23:31:48 +00001581 if (name == NULL)
1582 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001583
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001584 /* handle common attributes first */
1585 if (strcmp(name, "tag") == 0) {
1586 res = self->tag;
1587 Py_INCREF(res);
1588 return res;
1589 } else if (strcmp(name, "text") == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001590 res = element_get_text(self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001591 Py_INCREF(res);
1592 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001593 }
1594
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001595 /* methods */
1596 res = PyObject_GenericGetAttr((PyObject*) self, nameobj);
1597 if (res)
1598 return res;
1599
1600 /* less common attributes */
1601 if (strcmp(name, "tail") == 0) {
1602 PyErr_Clear();
1603 res = element_get_tail(self);
1604 } else if (strcmp(name, "attrib") == 0) {
1605 PyErr_Clear();
1606 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001607 create_extra(self, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001608 res = element_get_attrib(self);
1609 }
1610
1611 if (!res)
1612 return NULL;
1613
1614 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001615 return res;
1616}
1617
1618static int
1619element_setattr(ElementObject* self, const char* name, PyObject* value)
1620{
1621 if (value == NULL) {
1622 PyErr_SetString(
1623 PyExc_AttributeError,
1624 "can't delete element attributes"
1625 );
1626 return -1;
1627 }
1628
1629 if (strcmp(name, "tag") == 0) {
1630 Py_DECREF(self->tag);
1631 self->tag = value;
1632 Py_INCREF(self->tag);
1633 } else if (strcmp(name, "text") == 0) {
1634 Py_DECREF(JOIN_OBJ(self->text));
1635 self->text = value;
1636 Py_INCREF(self->text);
1637 } else if (strcmp(name, "tail") == 0) {
1638 Py_DECREF(JOIN_OBJ(self->tail));
1639 self->tail = value;
1640 Py_INCREF(self->tail);
1641 } else if (strcmp(name, "attrib") == 0) {
1642 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001643 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001644 Py_DECREF(self->extra->attrib);
1645 self->extra->attrib = value;
1646 Py_INCREF(self->extra->attrib);
1647 } else {
1648 PyErr_SetString(PyExc_AttributeError, name);
1649 return -1;
1650 }
1651
1652 return 0;
1653}
1654
1655static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001656 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001657 0, /* sq_concat */
1658 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001659 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001660 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001661 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001662 0,
1663};
1664
1665static PyMappingMethods element_as_mapping = {
1666 (lenfunc) element_length,
1667 (binaryfunc) element_subscr,
1668 (objobjargproc) element_ass_subscr,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001669};
1670
Neal Norwitz227b5332006-03-22 09:28:35 +00001671static PyTypeObject Element_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001672 PyVarObject_HEAD_INIT(NULL, 0)
1673 "Element", sizeof(ElementObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001674 /* methods */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001675 (destructor)element_dealloc, /* tp_dealloc */
1676 0, /* tp_print */
1677 0, /* tp_getattr */
1678 (setattrfunc)element_setattr, /* tp_setattr */
1679 0, /* tp_reserved */
1680 (reprfunc)element_repr, /* tp_repr */
1681 0, /* tp_as_number */
1682 &element_as_sequence, /* tp_as_sequence */
1683 &element_as_mapping, /* tp_as_mapping */
1684 0, /* tp_hash */
1685 0, /* tp_call */
1686 0, /* tp_str */
1687 (getattrofunc)element_getattro, /* tp_getattro */
1688 0, /* tp_setattro */
1689 0, /* tp_as_buffer */
Eli Bendersky0192ba32012-03-30 16:38:33 +03001690 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
1691 /* tp_flags */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001692 0, /* tp_doc */
Eli Bendersky0192ba32012-03-30 16:38:33 +03001693 (traverseproc)element_gc_traverse, /* tp_traverse */
1694 (inquiry)element_gc_clear, /* tp_clear */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001695 0, /* tp_richcompare */
1696 0, /* tp_weaklistoffset */
1697 0, /* tp_iter */
1698 0, /* tp_iternext */
1699 element_methods, /* tp_methods */
1700 0, /* tp_members */
1701 0, /* tp_getset */
1702 0, /* tp_base */
1703 0, /* tp_dict */
1704 0, /* tp_descr_get */
1705 0, /* tp_descr_set */
1706 0, /* tp_dictoffset */
1707 (initproc)element_init, /* tp_init */
1708 PyType_GenericAlloc, /* tp_alloc */
1709 element_new, /* tp_new */
1710 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001711};
1712
1713/* ==================================================================== */
1714/* the tree builder type */
1715
1716typedef struct {
1717 PyObject_HEAD
1718
1719 PyObject* root; /* root node (first created node) */
1720
1721 ElementObject* this; /* current node */
1722 ElementObject* last; /* most recently created node */
1723
1724 PyObject* data; /* data collector (string or list), or NULL */
1725
1726 PyObject* stack; /* element stack */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001727 Py_ssize_t index; /* current stack size (0=empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001728
1729 /* element tracing */
1730 PyObject* events; /* list of events, or NULL if not collecting */
1731 PyObject* start_event_obj; /* event objects (NULL to ignore) */
1732 PyObject* end_event_obj;
1733 PyObject* start_ns_event_obj;
1734 PyObject* end_ns_event_obj;
1735
1736} TreeBuilderObject;
1737
Neal Norwitz227b5332006-03-22 09:28:35 +00001738static PyTypeObject TreeBuilder_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001739
Christian Heimes90aa7642007-12-19 02:45:37 +00001740#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001741
1742/* -------------------------------------------------------------------- */
1743/* constructor and destructor */
1744
1745LOCAL(PyObject*)
1746treebuilder_new(void)
1747{
1748 TreeBuilderObject* self;
1749
1750 self = PyObject_New(TreeBuilderObject, &TreeBuilder_Type);
1751 if (self == NULL)
1752 return NULL;
1753
1754 self->root = NULL;
1755
1756 Py_INCREF(Py_None);
1757 self->this = (ElementObject*) Py_None;
1758
1759 Py_INCREF(Py_None);
1760 self->last = (ElementObject*) Py_None;
1761
1762 self->data = NULL;
1763
1764 self->stack = PyList_New(20);
1765 self->index = 0;
1766
1767 self->events = NULL;
1768 self->start_event_obj = self->end_event_obj = NULL;
1769 self->start_ns_event_obj = self->end_ns_event_obj = NULL;
1770
1771 ALLOC(sizeof(TreeBuilderObject), "create treebuilder");
1772
1773 return (PyObject*) self;
1774}
1775
1776static PyObject*
Thomas Wouters73e5a5b2006-06-08 15:35:45 +00001777treebuilder(PyObject* self_, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001778{
1779 if (!PyArg_ParseTuple(args, ":TreeBuilder"))
1780 return NULL;
1781
1782 return treebuilder_new();
1783}
1784
1785static void
1786treebuilder_dealloc(TreeBuilderObject* self)
1787{
1788 Py_XDECREF(self->end_ns_event_obj);
1789 Py_XDECREF(self->start_ns_event_obj);
1790 Py_XDECREF(self->end_event_obj);
1791 Py_XDECREF(self->start_event_obj);
1792 Py_XDECREF(self->events);
1793 Py_DECREF(self->stack);
1794 Py_XDECREF(self->data);
1795 Py_DECREF(self->last);
1796 Py_DECREF(self->this);
1797 Py_XDECREF(self->root);
1798
1799 RELEASE(sizeof(TreeBuilderObject), "destroy treebuilder");
1800
1801 PyObject_Del(self);
1802}
1803
1804/* -------------------------------------------------------------------- */
1805/* handlers */
1806
1807LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001808treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
1809 PyObject* attrib)
1810{
1811 PyObject* node;
1812 PyObject* this;
1813
1814 if (self->data) {
1815 if (self->this == self->last) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001816 Py_DECREF(JOIN_OBJ(self->last->text));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001817 self->last->text = JOIN_SET(
1818 self->data, PyList_CheckExact(self->data)
1819 );
1820 } else {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001821 Py_DECREF(JOIN_OBJ(self->last->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001822 self->last->tail = JOIN_SET(
1823 self->data, PyList_CheckExact(self->data)
1824 );
1825 }
1826 self->data = NULL;
1827 }
1828
Eli Bendersky092af1f2012-03-04 07:14:03 +02001829 node = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001830 if (!node)
1831 return NULL;
1832
1833 this = (PyObject*) self->this;
1834
1835 if (this != Py_None) {
1836 if (element_add_subelement((ElementObject*) this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001837 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001838 } else {
1839 if (self->root) {
1840 PyErr_SetString(
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001841 elementtree_parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001842 "multiple elements on top level"
1843 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001844 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001845 }
1846 Py_INCREF(node);
1847 self->root = node;
1848 }
1849
1850 if (self->index < PyList_GET_SIZE(self->stack)) {
1851 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001852 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001853 Py_INCREF(this);
1854 } else {
1855 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001856 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001857 }
1858 self->index++;
1859
1860 Py_DECREF(this);
1861 Py_INCREF(node);
1862 self->this = (ElementObject*) node;
1863
1864 Py_DECREF(self->last);
1865 Py_INCREF(node);
1866 self->last = (ElementObject*) node;
1867
1868 if (self->start_event_obj) {
1869 PyObject* res;
1870 PyObject* action = self->start_event_obj;
1871 res = PyTuple_New(2);
1872 if (res) {
1873 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
1874 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
1875 PyList_Append(self->events, res);
1876 Py_DECREF(res);
1877 } else
1878 PyErr_Clear(); /* FIXME: propagate error */
1879 }
1880
1881 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001882
1883 error:
1884 Py_DECREF(node);
1885 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001886}
1887
1888LOCAL(PyObject*)
1889treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
1890{
1891 if (!self->data) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001892 if (self->last == (ElementObject*) Py_None) {
1893 /* ignore calls to data before the first call to start */
1894 Py_RETURN_NONE;
1895 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001896 /* store the first item as is */
1897 Py_INCREF(data); self->data = data;
1898 } else {
1899 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00001900 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
1901 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001902 /* expat often generates single character data sections; handle
1903 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00001904 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
1905 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001906 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00001907 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001908 } else if (PyList_CheckExact(self->data)) {
1909 if (PyList_Append(self->data, data) < 0)
1910 return NULL;
1911 } else {
1912 PyObject* list = PyList_New(2);
1913 if (!list)
1914 return NULL;
1915 PyList_SET_ITEM(list, 0, self->data);
1916 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
1917 self->data = list;
1918 }
1919 }
1920
1921 Py_RETURN_NONE;
1922}
1923
1924LOCAL(PyObject*)
1925treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
1926{
1927 PyObject* item;
1928
1929 if (self->data) {
1930 if (self->this == self->last) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001931 Py_DECREF(JOIN_OBJ(self->last->text));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001932 self->last->text = JOIN_SET(
1933 self->data, PyList_CheckExact(self->data)
1934 );
1935 } else {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001936 Py_DECREF(JOIN_OBJ(self->last->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001937 self->last->tail = JOIN_SET(
1938 self->data, PyList_CheckExact(self->data)
1939 );
1940 }
1941 self->data = NULL;
1942 }
1943
1944 if (self->index == 0) {
1945 PyErr_SetString(
1946 PyExc_IndexError,
1947 "pop from empty stack"
1948 );
1949 return NULL;
1950 }
1951
1952 self->index--;
1953
1954 item = PyList_GET_ITEM(self->stack, self->index);
1955 Py_INCREF(item);
1956
1957 Py_DECREF(self->last);
1958
1959 self->last = (ElementObject*) self->this;
1960 self->this = (ElementObject*) item;
1961
1962 if (self->end_event_obj) {
1963 PyObject* res;
1964 PyObject* action = self->end_event_obj;
1965 PyObject* node = (PyObject*) self->last;
1966 res = PyTuple_New(2);
1967 if (res) {
1968 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
1969 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
1970 PyList_Append(self->events, res);
1971 Py_DECREF(res);
1972 } else
1973 PyErr_Clear(); /* FIXME: propagate error */
1974 }
1975
1976 Py_INCREF(self->last);
1977 return (PyObject*) self->last;
1978}
1979
1980LOCAL(void)
1981treebuilder_handle_namespace(TreeBuilderObject* self, int start,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001982 PyObject *prefix, PyObject *uri)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001983{
1984 PyObject* res;
1985 PyObject* action;
1986 PyObject* parcel;
1987
1988 if (!self->events)
1989 return;
1990
1991 if (start) {
1992 if (!self->start_ns_event_obj)
1993 return;
1994 action = self->start_ns_event_obj;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001995 parcel = Py_BuildValue("OO", prefix, uri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001996 if (!parcel)
1997 return;
1998 Py_INCREF(action);
1999 } else {
2000 if (!self->end_ns_event_obj)
2001 return;
2002 action = self->end_ns_event_obj;
2003 Py_INCREF(action);
2004 parcel = Py_None;
2005 Py_INCREF(parcel);
2006 }
2007
2008 res = PyTuple_New(2);
2009
2010 if (res) {
2011 PyTuple_SET_ITEM(res, 0, action);
2012 PyTuple_SET_ITEM(res, 1, parcel);
2013 PyList_Append(self->events, res);
2014 Py_DECREF(res);
2015 } else
2016 PyErr_Clear(); /* FIXME: propagate error */
2017}
2018
2019/* -------------------------------------------------------------------- */
2020/* methods (in alphabetical order) */
2021
2022static PyObject*
2023treebuilder_data(TreeBuilderObject* self, PyObject* args)
2024{
2025 PyObject* data;
2026 if (!PyArg_ParseTuple(args, "O:data", &data))
2027 return NULL;
2028
2029 return treebuilder_handle_data(self, data);
2030}
2031
2032static PyObject*
2033treebuilder_end(TreeBuilderObject* self, PyObject* args)
2034{
2035 PyObject* tag;
2036 if (!PyArg_ParseTuple(args, "O:end", &tag))
2037 return NULL;
2038
2039 return treebuilder_handle_end(self, tag);
2040}
2041
2042LOCAL(PyObject*)
2043treebuilder_done(TreeBuilderObject* self)
2044{
2045 PyObject* res;
2046
2047 /* FIXME: check stack size? */
2048
2049 if (self->root)
2050 res = self->root;
2051 else
2052 res = Py_None;
2053
2054 Py_INCREF(res);
2055 return res;
2056}
2057
2058static PyObject*
2059treebuilder_close(TreeBuilderObject* self, PyObject* args)
2060{
2061 if (!PyArg_ParseTuple(args, ":close"))
2062 return NULL;
2063
2064 return treebuilder_done(self);
2065}
2066
2067static PyObject*
2068treebuilder_start(TreeBuilderObject* self, PyObject* args)
2069{
2070 PyObject* tag;
2071 PyObject* attrib = Py_None;
2072 if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib))
2073 return NULL;
2074
2075 return treebuilder_handle_start(self, tag, attrib);
2076}
2077
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002078static PyMethodDef treebuilder_methods[] = {
2079 {"data", (PyCFunction) treebuilder_data, METH_VARARGS},
2080 {"start", (PyCFunction) treebuilder_start, METH_VARARGS},
2081 {"end", (PyCFunction) treebuilder_end, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002082 {"close", (PyCFunction) treebuilder_close, METH_VARARGS},
2083 {NULL, NULL}
2084};
2085
Neal Norwitz227b5332006-03-22 09:28:35 +00002086static PyTypeObject TreeBuilder_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002087 PyVarObject_HEAD_INIT(NULL, 0)
2088 "TreeBuilder", sizeof(TreeBuilderObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002089 /* methods */
2090 (destructor)treebuilder_dealloc, /* tp_dealloc */
2091 0, /* tp_print */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002092 0, /* tp_getattr */
2093 0, /* tp_setattr */
Mark Dickinsone94c6792009-02-02 20:36:42 +00002094 0, /* tp_reserved */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002095 0, /* tp_repr */
2096 0, /* tp_as_number */
2097 0, /* tp_as_sequence */
2098 0, /* tp_as_mapping */
2099 0, /* tp_hash */
2100 0, /* tp_call */
2101 0, /* tp_str */
2102 0, /* tp_getattro */
2103 0, /* tp_setattro */
2104 0, /* tp_as_buffer */
2105 Py_TPFLAGS_DEFAULT, /* tp_flags */
2106 0, /* tp_doc */
2107 0, /* tp_traverse */
2108 0, /* tp_clear */
2109 0, /* tp_richcompare */
2110 0, /* tp_weaklistoffset */
2111 0, /* tp_iter */
2112 0, /* tp_iternext */
2113 treebuilder_methods, /* tp_methods */
2114 0, /* tp_members */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002115};
2116
2117/* ==================================================================== */
2118/* the expat interface */
2119
2120#if defined(USE_EXPAT)
2121
2122#include "expat.h"
2123
2124#if defined(USE_PYEXPAT_CAPI)
2125#include "pyexpat.h"
2126static struct PyExpat_CAPI* expat_capi;
2127#define EXPAT(func) (expat_capi->func)
2128#else
2129#define EXPAT(func) (XML_##func)
2130#endif
2131
2132typedef struct {
2133 PyObject_HEAD
2134
2135 XML_Parser parser;
2136
2137 PyObject* target;
2138 PyObject* entity;
2139
2140 PyObject* names;
2141
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002142 PyObject* handle_start;
2143 PyObject* handle_data;
2144 PyObject* handle_end;
2145
2146 PyObject* handle_comment;
2147 PyObject* handle_pi;
2148
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002149 PyObject* handle_close;
2150
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002151} XMLParserObject;
2152
Neal Norwitz227b5332006-03-22 09:28:35 +00002153static PyTypeObject XMLParser_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002154
2155/* helpers */
2156
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002157LOCAL(PyObject*)
2158makeuniversal(XMLParserObject* self, const char* string)
2159{
2160 /* convert a UTF-8 tag/attribute name from the expat parser
2161 to a universal name string */
2162
2163 int size = strlen(string);
2164 PyObject* key;
2165 PyObject* value;
2166
2167 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00002168 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002169 if (!key)
2170 return NULL;
2171
2172 value = PyDict_GetItem(self->names, key);
2173
2174 if (value) {
2175 Py_INCREF(value);
2176 } else {
2177 /* new name. convert to universal name, and decode as
2178 necessary */
2179
2180 PyObject* tag;
2181 char* p;
2182 int i;
2183
2184 /* look for namespace separator */
2185 for (i = 0; i < size; i++)
2186 if (string[i] == '}')
2187 break;
2188 if (i != size) {
2189 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002190 tag = PyBytes_FromStringAndSize(NULL, size+1);
2191 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002192 p[0] = '{';
2193 memcpy(p+1, string, size);
2194 size++;
2195 } else {
2196 /* plain name; use key as tag */
2197 Py_INCREF(key);
2198 tag = key;
2199 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002200
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002201 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002202 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00002203 value = PyUnicode_DecodeUTF8(p, size, "strict");
2204 Py_DECREF(tag);
2205 if (!value) {
2206 Py_DECREF(key);
2207 return NULL;
2208 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002209
2210 /* add to names dictionary */
2211 if (PyDict_SetItem(self->names, key, value) < 0) {
2212 Py_DECREF(key);
2213 Py_DECREF(value);
2214 return NULL;
2215 }
2216 }
2217
2218 Py_DECREF(key);
2219 return value;
2220}
2221
Eli Bendersky5b77d812012-03-16 08:20:05 +02002222/* Set the ParseError exception with the given parameters.
2223 * If message is not NULL, it's used as the error string. Otherwise, the
2224 * message string is the default for the given error_code.
2225*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002226static void
Eli Bendersky5b77d812012-03-16 08:20:05 +02002227expat_set_error(enum XML_Error error_code, int line, int column, char *message)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002228{
Eli Bendersky5b77d812012-03-16 08:20:05 +02002229 PyObject *errmsg, *error, *position, *code;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002230
Victor Stinner499dfcf2011-03-21 13:26:24 +01002231 errmsg = PyUnicode_FromFormat("%s: line %d, column %d",
Eli Bendersky5b77d812012-03-16 08:20:05 +02002232 message ? message : EXPAT(ErrorString)(error_code),
2233 line, column);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002234 if (errmsg == NULL)
2235 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002236
Victor Stinner499dfcf2011-03-21 13:26:24 +01002237 error = PyObject_CallFunction(elementtree_parseerror_obj, "O", errmsg);
2238 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002239 if (!error)
2240 return;
2241
Eli Bendersky5b77d812012-03-16 08:20:05 +02002242 /* Add code and position attributes */
2243 code = PyLong_FromLong((long)error_code);
2244 if (!code) {
2245 Py_DECREF(error);
2246 return;
2247 }
2248 if (PyObject_SetAttrString(error, "code", code) == -1) {
2249 Py_DECREF(error);
2250 Py_DECREF(code);
2251 return;
2252 }
2253 Py_DECREF(code);
2254
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002255 position = Py_BuildValue("(ii)", line, column);
2256 if (!position) {
2257 Py_DECREF(error);
2258 return;
2259 }
2260 if (PyObject_SetAttrString(error, "position", position) == -1) {
2261 Py_DECREF(error);
2262 Py_DECREF(position);
2263 return;
2264 }
2265 Py_DECREF(position);
2266
2267 PyErr_SetObject(elementtree_parseerror_obj, error);
2268 Py_DECREF(error);
2269}
2270
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002271/* -------------------------------------------------------------------- */
2272/* handlers */
2273
2274static void
2275expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2276 int data_len)
2277{
2278 PyObject* key;
2279 PyObject* value;
2280 PyObject* res;
2281
2282 if (data_len < 2 || data_in[0] != '&')
2283 return;
2284
Neal Norwitz0269b912007-08-08 06:56:02 +00002285 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002286 if (!key)
2287 return;
2288
2289 value = PyDict_GetItem(self->entity, key);
2290
2291 if (value) {
2292 if (TreeBuilder_CheckExact(self->target))
2293 res = treebuilder_handle_data(
2294 (TreeBuilderObject*) self->target, value
2295 );
2296 else if (self->handle_data)
2297 res = PyObject_CallFunction(self->handle_data, "O", value);
2298 else
2299 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002300 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002301 } else if (!PyErr_Occurred()) {
2302 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00002303 char message[128] = "undefined entity ";
2304 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002305 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02002306 XML_ERROR_UNDEFINED_ENTITY,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002307 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02002308 EXPAT(GetErrorColumnNumber)(self->parser),
2309 message
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002310 );
2311 }
2312
2313 Py_DECREF(key);
2314}
2315
2316static void
2317expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2318 const XML_Char **attrib_in)
2319{
2320 PyObject* res;
2321 PyObject* tag;
2322 PyObject* attrib;
2323 int ok;
2324
2325 /* tag name */
2326 tag = makeuniversal(self, tag_in);
2327 if (!tag)
2328 return; /* parser will look for errors */
2329
2330 /* attributes */
2331 if (attrib_in[0]) {
2332 attrib = PyDict_New();
2333 if (!attrib)
2334 return;
2335 while (attrib_in[0] && attrib_in[1]) {
2336 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00002337 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002338 if (!key || !value) {
2339 Py_XDECREF(value);
2340 Py_XDECREF(key);
2341 Py_DECREF(attrib);
2342 return;
2343 }
2344 ok = PyDict_SetItem(attrib, key, value);
2345 Py_DECREF(value);
2346 Py_DECREF(key);
2347 if (ok < 0) {
2348 Py_DECREF(attrib);
2349 return;
2350 }
2351 attrib_in += 2;
2352 }
2353 } else {
2354 Py_INCREF(Py_None);
2355 attrib = Py_None;
2356 }
2357
2358 if (TreeBuilder_CheckExact(self->target))
2359 /* shortcut */
2360 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2361 tag, attrib);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002362 else if (self->handle_start) {
2363 if (attrib == Py_None) {
2364 Py_DECREF(attrib);
2365 attrib = PyDict_New();
2366 if (!attrib)
2367 return;
2368 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002369 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002370 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002371 res = NULL;
2372
2373 Py_DECREF(tag);
2374 Py_DECREF(attrib);
2375
2376 Py_XDECREF(res);
2377}
2378
2379static void
2380expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2381 int data_len)
2382{
2383 PyObject* data;
2384 PyObject* res;
2385
Neal Norwitz0269b912007-08-08 06:56:02 +00002386 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002387 if (!data)
2388 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002389
2390 if (TreeBuilder_CheckExact(self->target))
2391 /* shortcut */
2392 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
2393 else if (self->handle_data)
2394 res = PyObject_CallFunction(self->handle_data, "O", data);
2395 else
2396 res = NULL;
2397
2398 Py_DECREF(data);
2399
2400 Py_XDECREF(res);
2401}
2402
2403static void
2404expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
2405{
2406 PyObject* tag;
2407 PyObject* res = NULL;
2408
2409 if (TreeBuilder_CheckExact(self->target))
2410 /* shortcut */
2411 /* the standard tree builder doesn't look at the end tag */
2412 res = treebuilder_handle_end(
2413 (TreeBuilderObject*) self->target, Py_None
2414 );
2415 else if (self->handle_end) {
2416 tag = makeuniversal(self, tag_in);
2417 if (tag) {
2418 res = PyObject_CallFunction(self->handle_end, "O", tag);
2419 Py_DECREF(tag);
2420 }
2421 }
2422
2423 Py_XDECREF(res);
2424}
2425
2426static void
2427expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
2428 const XML_Char *uri)
2429{
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002430 PyObject* sprefix = NULL;
2431 PyObject* suri = NULL;
2432
2433 suri = PyUnicode_DecodeUTF8(uri, strlen(uri), "strict");
2434 if (!suri)
2435 return;
2436
2437 if (prefix)
2438 sprefix = PyUnicode_DecodeUTF8(prefix, strlen(prefix), "strict");
2439 else
2440 sprefix = PyUnicode_FromString("");
2441 if (!sprefix) {
2442 Py_DECREF(suri);
2443 return;
2444 }
2445
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002446 treebuilder_handle_namespace(
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002447 (TreeBuilderObject*) self->target, 1, sprefix, suri
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002448 );
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002449
2450 Py_DECREF(sprefix);
2451 Py_DECREF(suri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002452}
2453
2454static void
2455expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
2456{
2457 treebuilder_handle_namespace(
2458 (TreeBuilderObject*) self->target, 0, NULL, NULL
2459 );
2460}
2461
2462static void
2463expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
2464{
2465 PyObject* comment;
2466 PyObject* res;
2467
2468 if (self->handle_comment) {
Neal Norwitz0269b912007-08-08 06:56:02 +00002469 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002470 if (comment) {
2471 res = PyObject_CallFunction(self->handle_comment, "O", comment);
2472 Py_XDECREF(res);
2473 Py_DECREF(comment);
2474 }
2475 }
2476}
2477
2478static void
2479expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
2480 const XML_Char* data_in)
2481{
2482 PyObject* target;
2483 PyObject* data;
2484 PyObject* res;
2485
2486 if (self->handle_pi) {
Neal Norwitz0269b912007-08-08 06:56:02 +00002487 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
2488 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002489 if (target && data) {
2490 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
2491 Py_XDECREF(res);
2492 Py_DECREF(data);
2493 Py_DECREF(target);
2494 } else {
2495 Py_XDECREF(data);
2496 Py_XDECREF(target);
2497 }
2498 }
2499}
2500
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002501static int
2502expat_unknown_encoding_handler(XMLParserObject *self, const XML_Char *name,
2503 XML_Encoding *info)
2504{
2505 PyObject* u;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002506 unsigned char s[256];
2507 int i;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002508 void *data;
2509 unsigned int kind;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002510
2511 memset(info, 0, sizeof(XML_Encoding));
2512
2513 for (i = 0; i < 256; i++)
2514 s[i] = i;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002515
Fredrik Lundhc3389992005-12-25 11:40:19 +00002516 u = PyUnicode_Decode((char*) s, 256, name, "replace");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002517 if (!u)
2518 return XML_STATUS_ERROR;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002519 if (PyUnicode_READY(u))
2520 return XML_STATUS_ERROR;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002521
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002522 if (PyUnicode_GET_LENGTH(u) != 256) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002523 Py_DECREF(u);
2524 return XML_STATUS_ERROR;
2525 }
2526
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002527 kind = PyUnicode_KIND(u);
2528 data = PyUnicode_DATA(u);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002529 for (i = 0; i < 256; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002530 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
2531 if (ch != Py_UNICODE_REPLACEMENT_CHARACTER)
2532 info->map[i] = ch;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002533 else
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002534 info->map[i] = -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002535 }
2536
2537 Py_DECREF(u);
2538
2539 return XML_STATUS_OK;
2540}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002541
2542/* -------------------------------------------------------------------- */
2543/* constructor and destructor */
2544
2545static PyObject*
Thomas Wouters73e5a5b2006-06-08 15:35:45 +00002546xmlparser(PyObject* self_, PyObject* args, PyObject* kw)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002547{
2548 XMLParserObject* self;
2549 /* FIXME: does this need to be static? */
2550 static XML_Memory_Handling_Suite memory_handler;
2551
2552 PyObject* target = NULL;
2553 char* encoding = NULL;
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +00002554 static char* kwlist[] = { "target", "encoding", NULL };
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002555 if (!PyArg_ParseTupleAndKeywords(args, kw, "|Oz:XMLParser", kwlist,
2556 &target, &encoding))
2557 return NULL;
2558
2559#if defined(USE_PYEXPAT_CAPI)
2560 if (!expat_capi) {
2561 PyErr_SetString(
2562 PyExc_RuntimeError, "cannot load dispatch table from pyexpat"
2563 );
2564 return NULL;
2565 }
2566#endif
2567
2568 self = PyObject_New(XMLParserObject, &XMLParser_Type);
2569 if (self == NULL)
2570 return NULL;
2571
2572 self->entity = PyDict_New();
2573 if (!self->entity) {
2574 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002575 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002576 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002577
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002578 self->names = PyDict_New();
2579 if (!self->names) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002580 PyObject_Del(self->entity);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002581 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002582 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002583 }
2584
2585 memory_handler.malloc_fcn = PyObject_Malloc;
2586 memory_handler.realloc_fcn = PyObject_Realloc;
2587 memory_handler.free_fcn = PyObject_Free;
2588
2589 self->parser = EXPAT(ParserCreate_MM)(encoding, &memory_handler, "}");
2590 if (!self->parser) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002591 PyObject_Del(self->names);
2592 PyObject_Del(self->entity);
2593 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002594 PyErr_NoMemory();
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002595 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002596 }
2597
2598 /* setup target handlers */
2599 if (!target) {
2600 target = treebuilder_new();
2601 if (!target) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002602 EXPAT(ParserFree)(self->parser);
2603 PyObject_Del(self->names);
2604 PyObject_Del(self->entity);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002605 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002606 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002607 }
2608 } else
2609 Py_INCREF(target);
2610 self->target = target;
2611
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002612 self->handle_start = PyObject_GetAttrString(target, "start");
2613 self->handle_data = PyObject_GetAttrString(target, "data");
2614 self->handle_end = PyObject_GetAttrString(target, "end");
2615 self->handle_comment = PyObject_GetAttrString(target, "comment");
2616 self->handle_pi = PyObject_GetAttrString(target, "pi");
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002617 self->handle_close = PyObject_GetAttrString(target, "close");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002618
2619 PyErr_Clear();
2620
2621 /* configure parser */
2622 EXPAT(SetUserData)(self->parser, self);
2623 EXPAT(SetElementHandler)(
2624 self->parser,
2625 (XML_StartElementHandler) expat_start_handler,
2626 (XML_EndElementHandler) expat_end_handler
2627 );
2628 EXPAT(SetDefaultHandlerExpand)(
2629 self->parser,
2630 (XML_DefaultHandler) expat_default_handler
2631 );
2632 EXPAT(SetCharacterDataHandler)(
2633 self->parser,
2634 (XML_CharacterDataHandler) expat_data_handler
2635 );
2636 if (self->handle_comment)
2637 EXPAT(SetCommentHandler)(
2638 self->parser,
2639 (XML_CommentHandler) expat_comment_handler
2640 );
2641 if (self->handle_pi)
2642 EXPAT(SetProcessingInstructionHandler)(
2643 self->parser,
2644 (XML_ProcessingInstructionHandler) expat_pi_handler
2645 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002646 EXPAT(SetUnknownEncodingHandler)(
2647 self->parser,
2648 (XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL
2649 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002650
2651 ALLOC(sizeof(XMLParserObject), "create expatparser");
2652
2653 return (PyObject*) self;
2654}
2655
2656static void
2657xmlparser_dealloc(XMLParserObject* self)
2658{
2659 EXPAT(ParserFree)(self->parser);
2660
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002661 Py_XDECREF(self->handle_close);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002662 Py_XDECREF(self->handle_pi);
2663 Py_XDECREF(self->handle_comment);
2664 Py_XDECREF(self->handle_end);
2665 Py_XDECREF(self->handle_data);
2666 Py_XDECREF(self->handle_start);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002667
2668 Py_DECREF(self->target);
2669 Py_DECREF(self->entity);
2670 Py_DECREF(self->names);
2671
2672 RELEASE(sizeof(XMLParserObject), "destroy expatparser");
2673
2674 PyObject_Del(self);
2675}
2676
2677/* -------------------------------------------------------------------- */
2678/* methods (in alphabetical order) */
2679
2680LOCAL(PyObject*)
2681expat_parse(XMLParserObject* self, char* data, int data_len, int final)
2682{
2683 int ok;
2684
2685 ok = EXPAT(Parse)(self->parser, data, data_len, final);
2686
2687 if (PyErr_Occurred())
2688 return NULL;
2689
2690 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002691 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02002692 EXPAT(GetErrorCode)(self->parser),
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002693 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02002694 EXPAT(GetErrorColumnNumber)(self->parser),
2695 NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002696 );
2697 return NULL;
2698 }
2699
2700 Py_RETURN_NONE;
2701}
2702
2703static PyObject*
2704xmlparser_close(XMLParserObject* self, PyObject* args)
2705{
2706 /* end feeding data to parser */
2707
2708 PyObject* res;
2709 if (!PyArg_ParseTuple(args, ":close"))
2710 return NULL;
2711
2712 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002713 if (!res)
2714 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002715
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002716 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002717 Py_DECREF(res);
2718 return treebuilder_done((TreeBuilderObject*) self->target);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002719 } if (self->handle_close) {
2720 Py_DECREF(res);
2721 return PyObject_CallFunction(self->handle_close, "");
2722 } else
2723 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002724}
2725
2726static PyObject*
2727xmlparser_feed(XMLParserObject* self, PyObject* args)
2728{
2729 /* feed data to parser */
2730
2731 char* data;
2732 int data_len;
2733 if (!PyArg_ParseTuple(args, "s#:feed", &data, &data_len))
2734 return NULL;
2735
2736 return expat_parse(self, data, data_len, 0);
2737}
2738
2739static PyObject*
2740xmlparser_parse(XMLParserObject* self, PyObject* args)
2741{
2742 /* (internal) parse until end of input stream */
2743
2744 PyObject* reader;
2745 PyObject* buffer;
Eli Benderskyf996e772012-03-16 05:53:30 +02002746 PyObject* temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002747 PyObject* res;
2748
2749 PyObject* fileobj;
2750 if (!PyArg_ParseTuple(args, "O:_parse", &fileobj))
2751 return NULL;
2752
2753 reader = PyObject_GetAttrString(fileobj, "read");
2754 if (!reader)
2755 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002756
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002757 /* read from open file object */
2758 for (;;) {
2759
2760 buffer = PyObject_CallFunction(reader, "i", 64*1024);
2761
2762 if (!buffer) {
2763 /* read failed (e.g. due to KeyboardInterrupt) */
2764 Py_DECREF(reader);
2765 return NULL;
2766 }
2767
Eli Benderskyf996e772012-03-16 05:53:30 +02002768 if (PyUnicode_CheckExact(buffer)) {
2769 /* A unicode object is encoded into bytes using UTF-8 */
2770 if (PyUnicode_GET_SIZE(buffer) == 0) {
2771 Py_DECREF(buffer);
2772 break;
2773 }
2774 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
2775 if (!temp) {
2776 /* Propagate exception from PyUnicode_AsEncodedString */
2777 Py_DECREF(buffer);
2778 Py_DECREF(reader);
2779 return NULL;
2780 }
2781
2782 /* Here we no longer need the original buffer since it contains
2783 * unicode. Make it point to the encoded bytes object.
2784 */
2785 Py_DECREF(buffer);
2786 buffer = temp;
2787 }
2788 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002789 Py_DECREF(buffer);
2790 break;
2791 }
2792
2793 res = expat_parse(
Christian Heimes72b710a2008-05-26 13:28:38 +00002794 self, PyBytes_AS_STRING(buffer), PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002795 );
2796
2797 Py_DECREF(buffer);
2798
2799 if (!res) {
2800 Py_DECREF(reader);
2801 return NULL;
2802 }
2803 Py_DECREF(res);
2804
2805 }
2806
2807 Py_DECREF(reader);
2808
2809 res = expat_parse(self, "", 0, 1);
2810
2811 if (res && TreeBuilder_CheckExact(self->target)) {
2812 Py_DECREF(res);
2813 return treebuilder_done((TreeBuilderObject*) self->target);
2814 }
2815
2816 return res;
2817}
2818
2819static PyObject*
2820xmlparser_setevents(XMLParserObject* self, PyObject* args)
2821{
2822 /* activate element event reporting */
2823
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002824 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002825 TreeBuilderObject* target;
2826
2827 PyObject* events; /* event collector */
2828 PyObject* event_set = Py_None;
2829 if (!PyArg_ParseTuple(args, "O!|O:_setevents", &PyList_Type, &events,
2830 &event_set))
2831 return NULL;
2832
2833 if (!TreeBuilder_CheckExact(self->target)) {
2834 PyErr_SetString(
2835 PyExc_TypeError,
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01002836 "event handling only supported for ElementTree.TreeBuilder "
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002837 "targets"
2838 );
2839 return NULL;
2840 }
2841
2842 target = (TreeBuilderObject*) self->target;
2843
2844 Py_INCREF(events);
2845 Py_XDECREF(target->events);
2846 target->events = events;
2847
2848 /* clear out existing events */
2849 Py_XDECREF(target->start_event_obj); target->start_event_obj = NULL;
2850 Py_XDECREF(target->end_event_obj); target->end_event_obj = NULL;
2851 Py_XDECREF(target->start_ns_event_obj); target->start_ns_event_obj = NULL;
2852 Py_XDECREF(target->end_ns_event_obj); target->end_ns_event_obj = NULL;
2853
2854 if (event_set == Py_None) {
2855 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002856 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002857 Py_RETURN_NONE;
2858 }
2859
2860 if (!PyTuple_Check(event_set)) /* FIXME: handle arbitrary sequences */
2861 goto error;
2862
2863 for (i = 0; i < PyTuple_GET_SIZE(event_set); i++) {
2864 PyObject* item = PyTuple_GET_ITEM(event_set, i);
2865 char* event;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002866 if (PyUnicode_Check(item)) {
2867 event = _PyUnicode_AsString(item);
Victor Stinner0477bf32010-03-22 12:11:44 +00002868 if (event == NULL)
2869 goto error;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002870 } else if (PyBytes_Check(item))
2871 event = PyBytes_AS_STRING(item);
2872 else {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002873 goto error;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002874 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002875 if (strcmp(event, "start") == 0) {
2876 Py_INCREF(item);
2877 target->start_event_obj = item;
2878 } else if (strcmp(event, "end") == 0) {
2879 Py_INCREF(item);
2880 Py_XDECREF(target->end_event_obj);
2881 target->end_event_obj = item;
2882 } else if (strcmp(event, "start-ns") == 0) {
2883 Py_INCREF(item);
2884 Py_XDECREF(target->start_ns_event_obj);
2885 target->start_ns_event_obj = item;
2886 EXPAT(SetNamespaceDeclHandler)(
2887 self->parser,
2888 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2889 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2890 );
2891 } else if (strcmp(event, "end-ns") == 0) {
2892 Py_INCREF(item);
2893 Py_XDECREF(target->end_ns_event_obj);
2894 target->end_ns_event_obj = item;
2895 EXPAT(SetNamespaceDeclHandler)(
2896 self->parser,
2897 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2898 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2899 );
2900 } else {
2901 PyErr_Format(
2902 PyExc_ValueError,
2903 "unknown event '%s'", event
2904 );
2905 return NULL;
2906 }
2907 }
2908
2909 Py_RETURN_NONE;
2910
2911 error:
2912 PyErr_SetString(
2913 PyExc_TypeError,
2914 "invalid event tuple"
2915 );
2916 return NULL;
2917}
2918
2919static PyMethodDef xmlparser_methods[] = {
2920 {"feed", (PyCFunction) xmlparser_feed, METH_VARARGS},
2921 {"close", (PyCFunction) xmlparser_close, METH_VARARGS},
2922 {"_parse", (PyCFunction) xmlparser_parse, METH_VARARGS},
2923 {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
2924 {NULL, NULL}
2925};
2926
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002927static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002928xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002929{
Alexander Belopolskye239d232010-12-08 23:31:48 +00002930 if (PyUnicode_Check(nameobj)) {
2931 PyObject* res;
2932 if (PyUnicode_CompareWithASCIIString(nameobj, "entity") == 0)
2933 res = self->entity;
2934 else if (PyUnicode_CompareWithASCIIString(nameobj, "target") == 0)
2935 res = self->target;
2936 else if (PyUnicode_CompareWithASCIIString(nameobj, "version") == 0) {
2937 return PyUnicode_FromFormat(
2938 "Expat %d.%d.%d", XML_MAJOR_VERSION,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002939 XML_MINOR_VERSION, XML_MICRO_VERSION);
Alexander Belopolskye239d232010-12-08 23:31:48 +00002940 }
2941 else
2942 goto generic;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002943
Alexander Belopolskye239d232010-12-08 23:31:48 +00002944 Py_INCREF(res);
2945 return res;
2946 }
2947 generic:
2948 return PyObject_GenericGetAttr((PyObject*) self, nameobj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002949}
2950
Neal Norwitz227b5332006-03-22 09:28:35 +00002951static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002952 PyVarObject_HEAD_INIT(NULL, 0)
2953 "XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002954 /* methods */
2955 (destructor)xmlparser_dealloc, /* tp_dealloc */
2956 0, /* tp_print */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002957 0, /* tp_getattr */
2958 0, /* tp_setattr */
Mark Dickinsone94c6792009-02-02 20:36:42 +00002959 0, /* tp_reserved */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002960 0, /* tp_repr */
2961 0, /* tp_as_number */
2962 0, /* tp_as_sequence */
2963 0, /* tp_as_mapping */
2964 0, /* tp_hash */
2965 0, /* tp_call */
2966 0, /* tp_str */
2967 (getattrofunc)xmlparser_getattro, /* tp_getattro */
2968 0, /* tp_setattro */
2969 0, /* tp_as_buffer */
2970 Py_TPFLAGS_DEFAULT, /* tp_flags */
2971 0, /* tp_doc */
2972 0, /* tp_traverse */
2973 0, /* tp_clear */
2974 0, /* tp_richcompare */
2975 0, /* tp_weaklistoffset */
2976 0, /* tp_iter */
2977 0, /* tp_iternext */
2978 xmlparser_methods, /* tp_methods */
2979 0, /* tp_members */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002980};
2981
2982#endif
2983
2984/* ==================================================================== */
2985/* python module interface */
2986
2987static PyMethodDef _functions[] = {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002988 {"SubElement", (PyCFunction) subelement, METH_VARARGS|METH_KEYWORDS},
2989 {"TreeBuilder", (PyCFunction) treebuilder, METH_VARARGS},
2990#if defined(USE_EXPAT)
2991 {"XMLParser", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002992#endif
2993 {NULL, NULL}
2994};
2995
Martin v. Löwis1a214512008-06-11 05:26:20 +00002996
2997static struct PyModuleDef _elementtreemodule = {
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00002998 PyModuleDef_HEAD_INIT,
2999 "_elementtree",
3000 NULL,
3001 -1,
3002 _functions,
3003 NULL,
3004 NULL,
3005 NULL,
3006 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00003007};
3008
Neal Norwitzf6657e62006-12-28 04:47:50 +00003009PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00003010PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003011{
3012 PyObject* m;
3013 PyObject* g;
3014 char* bootstrap;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003015
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003016 /* Initialize object types */
3017 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003018 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003019 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003020 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003021#if defined(USE_EXPAT)
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003022 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003023 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003024#endif
3025
Martin v. Löwis1a214512008-06-11 05:26:20 +00003026 m = PyModule_Create(&_elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003027 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00003028 return NULL;
3029
3030 /* The code below requires that the module gets already added
3031 to sys.modules. */
3032 PyDict_SetItemString(PyImport_GetModuleDict(),
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003033 _elementtreemodule.m_name,
3034 m);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003035
3036 /* python glue code */
3037
3038 g = PyDict_New();
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003039 if (!g)
Martin v. Löwis1a214512008-06-11 05:26:20 +00003040 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003041
3042 PyDict_SetItemString(g, "__builtins__", PyEval_GetBuiltins());
3043
3044 bootstrap = (
3045
Florent Xiclunaf4bdf4e2012-02-11 11:28:16 +01003046 "from copy import deepcopy\n"
3047 "from xml.etree import ElementPath\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003048
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003049 "def iter(node, tag=None):\n" /* helper */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003050 " if tag == '*':\n"
3051 " tag = None\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003052 " if tag is None or node.tag == tag:\n"
3053 " yield node\n"
3054 " for node in node:\n"
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003055 " for node in iter(node, tag):\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003056 " yield node\n"
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003057
3058 "def itertext(node):\n" /* helper */
3059 " if node.text:\n"
3060 " yield node.text\n"
3061 " for e in node:\n"
3062 " for s in e.itertext():\n"
3063 " yield s\n"
3064 " if e.tail:\n"
3065 " yield e.tail\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003066
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003067 );
3068
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003069 if (!PyRun_String(bootstrap, Py_file_input, g, NULL))
3070 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003071
3072 elementpath_obj = PyDict_GetItemString(g, "ElementPath");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003073 elementtree_deepcopy_obj = PyDict_GetItemString(g, "deepcopy");
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003074 elementtree_iter_obj = PyDict_GetItemString(g, "iter");
3075 elementtree_itertext_obj = PyDict_GetItemString(g, "itertext");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003076
3077#if defined(USE_PYEXPAT_CAPI)
3078 /* link against pyexpat, if possible */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003079 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
3080 if (expat_capi) {
3081 /* check that it's usable */
3082 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
3083 expat_capi->size < sizeof(struct PyExpat_CAPI) ||
3084 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
3085 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
3086 expat_capi->MICRO_VERSION != XML_MICRO_VERSION)
3087 expat_capi = NULL;
3088 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003089#endif
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003090
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003091 elementtree_parseerror_obj = PyErr_NewException(
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003092 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003093 );
3094 Py_INCREF(elementtree_parseerror_obj);
3095 PyModule_AddObject(m, "ParseError", elementtree_parseerror_obj);
3096
Eli Bendersky092af1f2012-03-04 07:14:03 +02003097 Py_INCREF((PyObject *)&Element_Type);
3098 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
3099
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003100 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003101}